|
20 | 20 | #include <algorithm> |
21 | 21 | #include <vector> |
22 | 22 | #include <array> |
| 23 | +#include <thread> |
23 | 24 |
|
24 | 25 | #include "Rtypes.h" |
25 | 26 | #include "TLinearFitter.h" |
@@ -69,9 +70,9 @@ TFitResultPtr fit(const size_t nBins, const T* arr, const T xMin, const T xMax, |
69 | 70 | // create an empty TFitResult |
70 | 71 | std::shared_ptr<TFitResult> tfr(new TFitResult()); |
71 | 72 | // create the fitter from an empty fit result |
72 | | - //std::shared_ptr<ROOT::Fit::Fitter> fitter(new ROOT::Fit::Fitter(std::static_pointer_cast<ROOT::Fit::FitResult>(tfr) ) ); |
| 73 | + // std::shared_ptr<ROOT::Fit::Fitter> fitter(new ROOT::Fit::Fitter(std::static_pointer_cast<ROOT::Fit::FitResult>(tfr) ) ); |
73 | 74 | ROOT::Fit::Fitter fitter(tfr); |
74 | | - //ROOT::Fit::FitConfig & fitConfig = fitter->Config(); |
| 75 | + // ROOT::Fit::FitConfig & fitConfig = fitter->Config(); |
75 | 76 |
|
76 | 77 | const double binWidth = double(xMax - xMin) / double(nBins); |
77 | 78 |
|
@@ -225,8 +226,8 @@ bool medmadGaus(size_t nBins, const T* arr, const T xMin, const T xMax, std::arr |
225 | 226 | /// -1: only one point has been used for the calculation - center of gravity was uesed for calculation |
226 | 227 | /// -4: invalid result!! |
227 | 228 | /// |
228 | | -//template <typename T> |
229 | | -//Double_t fitGaus(const size_t nBins, const T *arr, const T xMin, const T xMax, std::vector<T>& param); |
| 229 | +// template <typename T> |
| 230 | +// Double_t fitGaus(const size_t nBins, const T *arr, const T xMin, const T xMax, std::vector<T>& param); |
230 | 231 | template <typename T> |
231 | 232 | Double_t fitGaus(const size_t nBins, const T* arr, const T xMin, const T xMax, std::vector<T>& param) |
232 | 233 | { |
@@ -301,7 +302,7 @@ Double_t fitGaus(const size_t nBins, const T* arr, const T xMin, const T xMax, s |
301 | 302 | Double_t chi2 = 0; |
302 | 303 | if (npoints >= 3) { |
303 | 304 | if (npoints == 3) { |
304 | | - //analytic calculation of the parameters for three points |
| 305 | + // analytic calculation of the parameters for three points |
305 | 306 | A.Invert(); |
306 | 307 | TMatrixD res(1, 3); |
307 | 308 | res.Mult(A, b); |
@@ -334,7 +335,7 @@ Double_t fitGaus(const size_t nBins, const T* arr, const T xMin, const T xMax, s |
334 | 335 | } |
335 | 336 |
|
336 | 337 | if (npoints == 2) { |
337 | | - //use center of gravity for 2 points |
| 338 | + // use center of gravity for 2 points |
338 | 339 | meanCOG /= sumCOG; |
339 | 340 | rms2COG /= sumCOG; |
340 | 341 | param[0] = max; |
@@ -524,7 +525,7 @@ R median(std::vector<T> v) |
524 | 525 | auto n = v.size() / 2; |
525 | 526 | nth_element(v.begin(), v.begin() + n, v.end()); |
526 | 527 | auto med = R{v[n]}; |
527 | | - if (!(v.size() & 1)) { //If the set size is even |
| 528 | + if (!(v.size() & 1)) { // If the set size is even |
528 | 529 | auto max_it = max_element(v.begin(), v.begin() + n); |
529 | 530 | med = R{(*max_it + med) / 2.0}; |
530 | 531 | } |
@@ -788,6 +789,169 @@ T MAD2Sigma(int np, T* y) |
788 | 789 | return median * 1.4826; // convert to Gaussian sigma |
789 | 790 | } |
790 | 791 |
|
| 792 | +/// \return returns the index of the closest timestamps to the left and right of the given timestamp |
| 793 | +/// \param timestamps vector of timestamps |
| 794 | +/// \param timestamp the timestamp to find the closest timestamps for |
| 795 | +template <typename DataTimeType, typename DataTime> |
| 796 | +std::optional<std::pair<size_t, size_t>> findClosestIndices(const std::vector<DataTimeType>& timestamps, DataTime timestamp) |
| 797 | +{ |
| 798 | + if (timestamps.empty()) { |
| 799 | + LOGP(warning, "Timestamp vector is empty!"); |
| 800 | + return std::nullopt; |
| 801 | + } |
| 802 | + |
| 803 | + if (timestamp <= timestamps.front()) { |
| 804 | + return std::pair{0, 0}; |
| 805 | + } else if (timestamp >= timestamps.back()) { |
| 806 | + return std::pair{timestamps.size() - 1, timestamps.size() - 1}; |
| 807 | + } |
| 808 | + |
| 809 | + const auto it = std::lower_bound(timestamps.begin(), timestamps.end(), timestamp); |
| 810 | + const size_t idx = std::distance(timestamps.begin(), it); |
| 811 | + const auto prevTimestamp = timestamps[idx - 1]; |
| 812 | + const auto nextTimestamp = timestamps[idx]; |
| 813 | + return std::pair{(idx - 1), idx}; |
| 814 | +} |
| 815 | + |
| 816 | +struct RollingStats { |
| 817 | + RollingStats() = default; |
| 818 | + RollingStats(const int nValues) |
| 819 | + { |
| 820 | + median.resize(nValues); |
| 821 | + std.resize(nValues); |
| 822 | + nPoints.resize(nValues); |
| 823 | + closestDistanceL.resize(nValues); |
| 824 | + closestDistanceR.resize(nValues); |
| 825 | + } |
| 826 | + |
| 827 | + std::vector<float> median; ///< median of rolling data |
| 828 | + std::vector<float> std; ///< std of rolling data |
| 829 | + std::vector<int> nPoints; ///< number of points used for the calculation |
| 830 | + std::vector<float> closestDistanceL; ///< distance of closest point to the left |
| 831 | + std::vector<float> closestDistanceR; ///< distance of closest point to the right |
| 832 | + |
| 833 | + ClassDefNV(RollingStats, 1); |
| 834 | +}; |
| 835 | + |
| 836 | +/// \brief calculates the rolling statistics of the input data |
| 837 | +/// \return returns the rolling statistics |
| 838 | +/// \param timeData times of the input data (assumed to be sorted) |
| 839 | +/// \param data values of the input data |
| 840 | +/// \param times times for which to calculate the rolling statistics |
| 841 | +/// \param deltaMax time range for which the rolling statistics is calculated |
| 842 | +/// \param mNthreads number of threads to use for the calculation |
| 843 | +/// \param minPoints minimum number of points to use for the calculation of the statistics - otherwise use nearest nClosestPoints points weighted with distance |
| 844 | +/// \param nClosestPoints number of closest points in case of number of points in given range is smaller than minPoints |
| 845 | +template <typename DataTimeType, typename DataType, typename DataTime> |
| 846 | +RollingStats getRollingStatistics(const DataTimeType& timeData, const DataType& data, const DataTime& times, const double deltaMax, const int mNthreads, const size_t minPoints = 4, const size_t nClosestPoints = 4) |
| 847 | +{ |
| 848 | + // output statistics |
| 849 | + const size_t vecSize = times.size(); |
| 850 | + RollingStats stats(vecSize); |
| 851 | + |
| 852 | + if (!std::is_sorted(timeData.begin(), timeData.end())) { |
| 853 | + LOGP(error, "Input data is NOT sorted!"); |
| 854 | + return stats; |
| 855 | + } |
| 856 | + |
| 857 | + if (timeData.empty()) { |
| 858 | + LOGP(error, "Input data is empty!"); |
| 859 | + return stats; |
| 860 | + } |
| 861 | + |
| 862 | + const size_t dataSize = data.size(); |
| 863 | + const size_t timeDataSize = timeData.size(); |
| 864 | + if (timeDataSize != dataSize) { |
| 865 | + LOGP(error, "Input data has different sizes {}!={}", timeDataSize, dataSize); |
| 866 | + return stats; |
| 867 | + } |
| 868 | + |
| 869 | + auto myThread = [&](int iThread) { |
| 870 | + // data in given time window for median calculation |
| 871 | + DataType window; |
| 872 | + for (size_t i = iThread; i < vecSize; i += mNthreads) { |
| 873 | + const double timeI = times[i]; |
| 874 | + |
| 875 | + // lower index |
| 876 | + const double timeStampLower = timeI - deltaMax; |
| 877 | + const auto lower = std::lower_bound(timeData.begin(), timeData.end(), timeStampLower); |
| 878 | + size_t idxStart = std::distance(timeData.begin(), lower); |
| 879 | + |
| 880 | + // upper index |
| 881 | + const double timeStampUpper = timeI + deltaMax; |
| 882 | + const auto upper = std::lower_bound(timeData.begin(), timeData.end(), timeStampUpper); |
| 883 | + size_t idxEnd = std::distance(timeData.begin(), upper); |
| 884 | + |
| 885 | + // closest data point |
| 886 | + if (auto idxClosest = findClosestIndices(timeData, timeI)) { |
| 887 | + auto [idxLeft, idxRight] = *idxClosest; |
| 888 | + const auto closestL = std::abs(timeData[idxLeft] - timeI); |
| 889 | + const auto closestR = std::abs(timeData[idxRight] - timeI); |
| 890 | + stats.closestDistanceL[i] = closestL; |
| 891 | + stats.closestDistanceR[i] = closestR; |
| 892 | + |
| 893 | + // if no points are in the range use the n closest points - n from the left and n from the right |
| 894 | + const size_t reqSize = idxEnd - idxStart; |
| 895 | + if (reqSize < minPoints) { |
| 896 | + // calculate weighted average |
| 897 | + idxStart = (idxRight > nClosestPoints) ? (idxRight - nClosestPoints) : 0; |
| 898 | + idxEnd = std::min(data.size(), idxRight + nClosestPoints); |
| 899 | + constexpr float epsilon = 1e-6f; |
| 900 | + double weightedSum = 0.0; |
| 901 | + double weightTotal = 0.0; |
| 902 | + for (size_t j = idxStart; j < idxEnd; ++j) { |
| 903 | + const double dist = std::abs(timeI - timeData[j]); |
| 904 | + const double weight = 1.0 / (dist + epsilon); |
| 905 | + weightedSum += weight * data[j]; |
| 906 | + weightTotal += weight; |
| 907 | + } |
| 908 | + stats.median[i] = (weightTotal > 0.) ? (weightedSum / weightTotal) : 0.0f; |
| 909 | + } else { |
| 910 | + // calculate statistics |
| 911 | + stats.nPoints[i] = reqSize; |
| 912 | + |
| 913 | + if (idxStart >= data.size()) { |
| 914 | + stats.median[i] = data.back(); |
| 915 | + continue; |
| 916 | + } |
| 917 | + |
| 918 | + if (reqSize <= 1) { |
| 919 | + stats.median[i] = data[idxStart]; |
| 920 | + continue; |
| 921 | + } |
| 922 | + |
| 923 | + // calculate median |
| 924 | + window.clear(); |
| 925 | + if (reqSize > window.capacity()) { |
| 926 | + window.reserve(static_cast<size_t>(reqSize * 1.5)); |
| 927 | + } |
| 928 | + window.insert(window.end(), data.begin() + idxStart, data.begin() + idxEnd); |
| 929 | + const size_t middle = window.size() / 2; |
| 930 | + std::nth_element(window.begin(), window.begin() + middle, window.end()); |
| 931 | + stats.median[i] = (window.size() % 2 == 1) ? window[middle] : ((window[middle - 1] + window[middle]) / 2.0); |
| 932 | + |
| 933 | + // calculate the stdev |
| 934 | + const float mean = std::accumulate(window.begin(), window.end(), 0.0f) / window.size(); |
| 935 | + std::transform(window.begin(), window.end(), window.begin(), [mean](const float val) { return val - mean; }); |
| 936 | + const float sqsum = std::inner_product(window.begin(), window.end(), window.begin(), 0.0f); |
| 937 | + const float stdev = std::sqrt(sqsum / window.size()); |
| 938 | + stats.std[i] = stdev; |
| 939 | + } |
| 940 | + } |
| 941 | + } |
| 942 | + }; |
| 943 | + |
| 944 | + std::vector<std::thread> threads(mNthreads); |
| 945 | + for (int i = 0; i < mNthreads; i++) { |
| 946 | + threads[i] = std::thread(myThread, i); |
| 947 | + } |
| 948 | + |
| 949 | + for (auto& th : threads) { |
| 950 | + th.join(); |
| 951 | + } |
| 952 | + return stats; |
| 953 | +} |
| 954 | + |
791 | 955 | } // namespace math_utils |
792 | 956 | } // namespace o2 |
793 | 957 | #endif |
0 commit comments