5959#include " SimulationDataFormat/MCCompLabel.h"
6060#endif
6161
62- namespace o2 ::gpu::internal
63- {
64- }
62+ static constexpr int32_t kMaxParts = 400 ;
63+ static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS;
64+
6565using namespace o2 ::gpu;
66- using namespace o2 ::gpu::internal;
6766using namespace o2 ::tpc;
6867using namespace gputpcgmmergertypes ;
6968
70- static constexpr int32_t kMaxParts = 400 ;
71- static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS;
72-
7369namespace o2 ::gpu::internal
7470{
7571struct MergeLooperParam {
@@ -78,8 +74,79 @@ struct MergeLooperParam {
7874 float y;
7975 uint32_t id;
8076};
77+
78+ struct MergeBorderTracks_compMax {
79+ GPUd () bool operator ()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
80+ {
81+ return GPUCA_DETERMINISTIC_CODE ((a.fMax != b.fMax ) ? (a.fMax < b.fMax ) : (a.fId < b.fId ), a.fMax < b.fMax );
82+ }
83+ };
84+ struct MergeBorderTracks_compMin {
85+ GPUd () bool operator ()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
86+ {
87+ return GPUCA_DETERMINISTIC_CODE ((a.fMin != b.fMin ) ? (a.fMin < b.fMin ) : (a.fId < b.fId ), a.fMin < b.fMin );
88+ }
89+ };
90+
91+ struct GPUTPCGMMergerSortTracks_comp {
92+ const GPUTPCGMMergedTrack* const mCmp ;
93+ GPUhd () GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp (cmp) {}
94+ GPUd () bool operator ()(const int32_t aa, const int32_t bb)
95+ {
96+ const GPUTPCGMMergedTrack& GPUrestrict () a = mCmp [aa];
97+ const GPUTPCGMMergedTrack& GPUrestrict () b = mCmp [bb];
98+ if (a.CCE () != b.CCE ()) {
99+ return a.CCE () > b.CCE ();
100+ }
101+ if (a.Legs () != b.Legs ()) {
102+ return a.Legs () > b.Legs ();
103+ }
104+ GPUCA_DETERMINISTIC_CODE ( // clang-format off
105+ if (a.NClusters () != b.NClusters ()) {
106+ return a.NClusters () > b.NClusters ();
107+ } if (CAMath::Abs (a.GetParam ().GetQPt ()) != CAMath::Abs (b.GetParam ().GetQPt ())) {
108+ return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
109+ } if (a.GetParam ().GetY () != b.GetParam ().GetY ()) {
110+ return a.GetParam ().GetY () > b.GetParam ().GetY ();
111+ }
112+ return aa > bb;
113+ , // !GPUCA_DETERMINISTIC_CODE
114+ return a.NClusters () > b.NClusters ();
115+ ) // clang-format on
116+ }
117+ };
118+
119+ struct GPUTPCGMMergerSortTracksQPt_comp {
120+ const GPUTPCGMMergedTrack* const mCmp ;
121+ GPUhd () GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp (cmp) {}
122+ GPUd () bool operator ()(const int32_t aa, const int32_t bb)
123+ {
124+ const GPUTPCGMMergedTrack& GPUrestrict () a = mCmp [aa];
125+ const GPUTPCGMMergedTrack& GPUrestrict () b = mCmp [bb];
126+ GPUCA_DETERMINISTIC_CODE ( // clang-format off
127+ if (CAMath::Abs (a.GetParam ().GetQPt ()) != CAMath::Abs (b.GetParam ().GetQPt ())) {
128+ return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
129+ } if (a.GetParam ().GetY () != b.GetParam ().GetY ()) {
130+ return a.GetParam ().GetY () > b.GetParam ().GetY ();
131+ }
132+ return a.GetParam ().GetZ () > b.GetParam ().GetZ ();
133+ , // !GPUCA_DETERMINISTIC_CODE
134+ return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
135+ ) // clang-format on
136+ }
137+ };
138+
139+ struct GPUTPCGMMergerMergeLoopers_comp {
140+ GPUd () bool operator ()(const MergeLooperParam& a, const MergeLooperParam& b)
141+ {
142+ return GPUCA_DETERMINISTIC_CODE (CAMath::Abs (a.refz ) != CAMath::Abs (b.refz ) ? CAMath::Abs (a.refz ) < CAMath::Abs (b.refz ) : a.id < b.id , CAMath::Abs (a.refz ) < CAMath::Abs (b.refz ));
143+ }
144+ };
145+
81146} // namespace o2::gpu::internal
82147
148+ using namespace o2 ::gpu::internal;
149+
83150#ifndef GPUCA_GPUCODE
84151
85152#include " GPUQA.h"
@@ -742,11 +809,11 @@ template <>
742809GPUd () void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax)
743810{
744811#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
745- if (iThread == 0 ) {
812+ if (iThread == 0 && iBlock == 0 ) {
746813 if (cmpMax) {
747- GPUCommonAlgorithm::sortDeviceDynamic (range, range + N, []( const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE ((a. fMax != b. fMax ) ? (a. fMax < b. fMax ) : (a. fId < b. fId ), a. fMax < b. fMax ); } );
814+ GPUCommonAlgorithm::sortDeviceDynamic (range, range + N, MergeBorderTracks_compMax () );
748815 } else {
749- GPUCommonAlgorithm::sortDeviceDynamic (range, range + N, []( const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE ((a. fMin != b. fMin ) ? (a. fMin < b. fMin ) : (a. fId < b. fId ), a. fMin < b. fMin ); } );
816+ GPUCommonAlgorithm::sortDeviceDynamic (range, range + N, MergeBorderTracks_compMin () );
750817 }
751818 }
752819#endif
@@ -1757,60 +1824,18 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr
17571824GPUd () void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
17581825{
17591826#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1760- if (iThread || iBlock) {
1761- return ;
1827+ if (iThread == 0 && iBlock == 0 ) {
1828+ GPUCommonAlgorithm::sortDeviceDynamic ( mTrackOrderProcess , mTrackOrderProcess + mMemory -> nMergedTracks , GPUTPCGMMergerSortTracks_comp ( mOutputTracks )) ;
17621829 }
1763- // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object
1764- auto comp = [cmp = mOutputTracks ](const int32_t aa, const int32_t bb) {
1765- const GPUTPCGMMergedTrack& GPUrestrict () a = cmp[aa];
1766- const GPUTPCGMMergedTrack& GPUrestrict () b = cmp[bb];
1767- if (a.CCE () != b.CCE ()) {
1768- return a.CCE () > b.CCE ();
1769- }
1770- if (a.Legs () != b.Legs ()) {
1771- return a.Legs () > b.Legs ();
1772- }
1773- GPUCA_DETERMINISTIC_CODE ( // clang-format off
1774- if (a.NClusters () != b.NClusters ()) {
1775- return a.NClusters () > b.NClusters ();
1776- } if (CAMath::Abs (a.GetParam ().GetQPt ()) != CAMath::Abs (b.GetParam ().GetQPt ())) {
1777- return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
1778- } if (a.GetParam ().GetY () != b.GetParam ().GetY ()) {
1779- return a.GetParam ().GetY () > b.GetParam ().GetY ();
1780- }
1781- return aa > bb;
1782- , // !GPUCA_DETERMINISTIC_CODE
1783- return a.NClusters () > b.NClusters ();
1784- ) // clang-format on
1785- };
1786-
1787- GPUCommonAlgorithm::sortDeviceDynamic (mTrackOrderProcess , mTrackOrderProcess + mMemory ->nMergedTracks , comp);
17881830#endif
17891831}
17901832
17911833GPUd () void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
17921834{
17931835#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1794- if (iThread || iBlock) {
1795- return ;
1836+ if (iThread == 0 && iBlock == 0 ) {
1837+ GPUCommonAlgorithm::sortDeviceDynamic ( mTrackSort , mTrackSort + mMemory -> nMergedTracks , GPUTPCGMMergerSortTracksQPt_comp ( mOutputTracks )) ;
17961838 }
1797- // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object
1798- auto comp = [cmp = mOutputTracks ](const int32_t aa, const int32_t bb) {
1799- const GPUTPCGMMergedTrack& GPUrestrict () a = cmp[aa];
1800- const GPUTPCGMMergedTrack& GPUrestrict () b = cmp[bb];
1801- GPUCA_DETERMINISTIC_CODE ( // clang-format off
1802- if (CAMath::Abs (a.GetParam ().GetQPt ()) != CAMath::Abs (b.GetParam ().GetQPt ())) {
1803- return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
1804- } if (a.GetParam ().GetY () != b.GetParam ().GetY ()) {
1805- return a.GetParam ().GetY () > b.GetParam ().GetY ();
1806- }
1807- return a.GetParam ().GetZ () > b.GetParam ().GetZ ();
1808- , // !GPUCA_DETERMINISTIC_CODE
1809- return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
1810- ) // clang-format on
1811- };
1812-
1813- GPUCommonAlgorithm::sortDeviceDynamic (mTrackSort , mTrackSort + mMemory ->nMergedTracks , comp);
18141839#endif
18151840}
18161841
@@ -1945,11 +1970,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads,
19451970GPUd () void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
19461971{
19471972#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
1948- if (iThread || iBlock) {
1949- return ;
1973+ if (iThread == 0 && iBlock == 0 ) {
1974+ GPUCommonAlgorithm::sortDeviceDynamic ( mLooperCandidates , mLooperCandidates + mMemory -> nLooperMatchCandidates , GPUTPCGMMergerMergeLoopers_comp ()) ;
19501975 }
1951- auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return GPUCA_DETERMINISTIC_CODE (CAMath::Abs (a.refz ) != CAMath::Abs (b.refz ) ? CAMath::Abs (a.refz ) < CAMath::Abs (b.refz ) : a.id < b.id , CAMath::Abs (a.refz ) < CAMath::Abs (b.refz )); };
1952- GPUCommonAlgorithm::sortDeviceDynamic (mLooperCandidates , mLooperCandidates + mMemory ->nLooperMatchCandidates , comp);
19531976#endif
19541977}
19551978
0 commit comments