@@ -736,46 +736,15 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea
736736#endif
737737}
738738
739- #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize MergeBorderTracks<3>
740- namespace o2 ::gpu::internal
741- {
742- namespace // anonymous
743- {
744- struct MergeBorderTracks_compMax {
745- GPUd () bool operator ()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
746- {
747- return GPUCA_DETERMINISTIC_CODE ((a.fMax != b.fMax ) ? (a.fMax < b.fMax ) : (a.fId < b.fId ), a.fMax < b.fMax );
748- }
749- };
750- struct MergeBorderTracks_compMin {
751- GPUd () bool operator ()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
752- {
753- return GPUCA_DETERMINISTIC_CODE ((a.fMin != b.fMin ) ? (a.fMin < b.fMin ) : (a.fId < b.fId ), a.fMin < b.fMin );
754- }
755- };
756- } // anonymous namespace
757- } // namespace o2::gpu::internal
758-
759- template <>
760- inline void GPUCA_M_CAT (GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const & range, int32_t const & N, int32_t const & cmpMax)
761- {
762- if (cmpMax) {
763- GPUCommonAlgorithm::sortOnDevice (this , _xyz.x .stream , range, N, MergeBorderTracks_compMax ());
764- } else {
765- GPUCommonAlgorithm::sortOnDevice (this , _xyz.x .stream , range, N, MergeBorderTracks_compMin ());
766- }
767- }
768- #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize MergeBorderTracks<3>
769-
770739template <>
771740GPUd () void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax)
772741{
773742#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
774743 if (iThread == 0 ) {
775744 if (cmpMax) {
776- GPUCommonAlgorithm::sortDeviceDynamic (range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax ; });
745+ GPUCommonAlgorithm::sortDeviceDynamic (range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE (( a.fMax != b. fMax ) ? (a. fMax < b.fMax ) : (a. fId < b. fId ), a. fMax < b. fMax ) ; });
777746 } else {
778- GPUCommonAlgorithm::sortDeviceDynamic (range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin ; });
747+ GPUCommonAlgorithm::sortDeviceDynamic (range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE (( a.fMin != b. fMin ) ? (a. fMin < b.fMin ) : (a. fId < b. fId ), a. fMin < b. fMin ) ; });
779748 }
780749 }
781750#endif
@@ -1783,74 +1752,6 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr
17831752 }
17841753}
17851754
1786- #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
1787- namespace o2 ::gpu::internal
1788- {
1789- namespace // anonymous
1790- {
1791- struct GPUTPCGMMergerSortTracks_comp {
1792- const GPUTPCGMMergedTrack* const mCmp ;
1793- GPUhd () GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp (cmp) {}
1794- GPUd () bool operator ()(const int32_t aa, const int32_t bb)
1795- {
1796- const GPUTPCGMMergedTrack& GPUrestrict () a = mCmp [aa];
1797- const GPUTPCGMMergedTrack& GPUrestrict () b = mCmp [bb];
1798- if (a.CCE () != b.CCE ()) {
1799- return a.CCE () > b.CCE ();
1800- }
1801- if (a.Legs () != b.Legs ()) {
1802- return a.Legs () > b.Legs ();
1803- }
1804- GPUCA_DETERMINISTIC_CODE ( // clang-format off
1805- if (a.NClusters () != b.NClusters ()) {
1806- return a.NClusters () > b.NClusters ();
1807- } if (CAMath::Abs (a.GetParam ().GetQPt ()) != CAMath::Abs (b.GetParam ().GetQPt ())) {
1808- return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
1809- } if (a.GetParam ().GetY () != b.GetParam ().GetY ()) {
1810- return a.GetParam ().GetY () > b.GetParam ().GetY ();
1811- }
1812- return aa > bb;
1813- , // !GPUCA_DETERMINISTIC_CODE
1814- return a.NClusters () > b.NClusters ();
1815- ) // clang-format on
1816- }
1817- };
1818-
1819- struct GPUTPCGMMergerSortTracksQPt_comp {
1820- const GPUTPCGMMergedTrack* const mCmp ;
1821- GPUhd () GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp (cmp) {}
1822- GPUd () bool operator ()(const int32_t aa, const int32_t bb)
1823- {
1824- const GPUTPCGMMergedTrack& GPUrestrict () a = mCmp [aa];
1825- const GPUTPCGMMergedTrack& GPUrestrict () b = mCmp [bb];
1826- GPUCA_DETERMINISTIC_CODE ( // clang-format off
1827- if (CAMath::Abs (a.GetParam ().GetQPt ()) != CAMath::Abs (b.GetParam ().GetQPt ())) {
1828- return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
1829- } if (a.GetParam ().GetY () != b.GetParam ().GetY ()) {
1830- return a.GetParam ().GetY () > b.GetParam ().GetY ();
1831- }
1832- return a.GetParam ().GetZ () > b.GetParam ().GetZ ();
1833- , // !GPUCA_DETERMINISTIC_CODE
1834- return CAMath::Abs (a.GetParam ().GetQPt ()) > CAMath::Abs (b.GetParam ().GetQPt ());
1835- ) // clang-format on
1836- }
1837- };
1838- } // anonymous namespace
1839- } // namespace o2::gpu::internal
1840-
1841- template <>
1842- inline void GPUCA_M_CAT (GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
1843- {
1844- GPUCommonAlgorithm::sortOnDevice (this , _xyz.x .stream , mProcessorsShadow ->tpcMerger .TrackOrderProcess (), processors ()->tpcMerger .NOutputTracks (), GPUTPCGMMergerSortTracks_comp (mProcessorsShadow ->tpcMerger .OutputTracks ()));
1845- }
1846-
1847- template <>
1848- inline void GPUCA_M_CAT (GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
1849- {
1850- GPUCommonAlgorithm::sortOnDevice (this , _xyz.x .stream , mProcessorsShadow ->tpcMerger .TrackSort (), processors ()->tpcMerger .NOutputTracks (), GPUTPCGMMergerSortTracksQPt_comp (mProcessorsShadow ->tpcMerger .OutputTracks ()));
1851- }
1852- #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
1853-
18541755GPUd () void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
18551756{
18561757#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
@@ -2050,27 +1951,6 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads,
20501951#endif
20511952}
20521953
2053- #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
2054- namespace o2 ::gpu::internal
2055- {
2056- namespace // anonymous
2057- {
2058- struct GPUTPCGMMergerMergeLoopers_comp {
2059- GPUd () bool operator ()(const MergeLooperParam& a, const MergeLooperParam& b)
2060- {
2061- return CAMath::Abs (a.refz ) < CAMath::Abs (b.refz );
2062- }
2063- };
2064- } // anonymous namespace
2065- } // namespace o2::gpu::internal
2066-
2067- template <>
2068- inline void GPUCA_M_CAT (GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(const krnlSetupTime& _xyz)
2069- {
2070- GPUCommonAlgorithm::sortOnDevice (this , _xyz.x .stream , mProcessorsShadow ->tpcMerger .LooperCandidates (), processors ()->tpcMerger .Memory ()->nLooperMatchCandidates , GPUTPCGMMergerMergeLoopers_comp ());
2071- }
2072- #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
2073-
20741954GPUd () void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
20751955{
20761956 const MergeLooperParam* params = mLooperCandidates ;
0 commit comments