Skip to content

Commit fe6001f

Browse files
committed
GPU: Move kernel specializations to dedicated file
1 parent da0cef9 commit fe6001f

File tree

7 files changed

+164
-150
lines changed

7 files changed

+164
-150
lines changed

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,12 @@ using namespace o2::gpu;
2222

2323
#include "GPUReconstructionIncludesDeviceAll.h"
2424

25+
#include "GPUReconstructionCUDAKernelsSpecialize.inc"
26+
2527
#if defined(__HIPCC__) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM)
2628
__global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUConstantMemBuffer; }
2729
#endif
2830

29-
template <>
30-
inline void GPUReconstructionCUDA::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
31-
{
32-
GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream]));
33-
}
34-
3531
template <class T, int32_t I, typename... Args>
3632
inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args)
3733
{
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file GPUReconstructionCUDAKernelsSpecialize.inc
13+
/// \author David Rohr
14+
15+
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS)
16+
17+
namespace o2::gpu::internal
18+
{
19+
namespace // anonymous
20+
{
21+
struct MergeBorderTracks_compMax {
22+
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
23+
{
24+
return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax);
25+
}
26+
};
27+
struct MergeBorderTracks_compMin {
28+
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
29+
{
30+
return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin);
31+
}
32+
};
33+
34+
struct GPUTPCGMMergerSortTracks_comp {
35+
const GPUTPCGMMergedTrack* const mCmp;
36+
GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
37+
GPUd() bool operator()(const int32_t aa, const int32_t bb)
38+
{
39+
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
40+
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
41+
if (a.CCE() != b.CCE()) {
42+
return a.CCE() > b.CCE();
43+
}
44+
if (a.Legs() != b.Legs()) {
45+
return a.Legs() > b.Legs();
46+
}
47+
GPUCA_DETERMINISTIC_CODE( // clang-format off
48+
if (a.NClusters() != b.NClusters()) {
49+
return a.NClusters() > b.NClusters();
50+
} if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
51+
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
52+
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
53+
return a.GetParam().GetY() > b.GetParam().GetY();
54+
}
55+
return aa > bb;
56+
, // !GPUCA_DETERMINISTIC_CODE
57+
return a.NClusters() > b.NClusters();
58+
) // clang-format on
59+
}
60+
};
61+
62+
struct GPUTPCGMMergerSortTracksQPt_comp {
63+
const GPUTPCGMMergedTrack* const mCmp;
64+
GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
65+
GPUd() bool operator()(const int32_t aa, const int32_t bb)
66+
{
67+
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
68+
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
69+
GPUCA_DETERMINISTIC_CODE( // clang-format off
70+
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
71+
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
72+
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
73+
return a.GetParam().GetY() > b.GetParam().GetY();
74+
}
75+
return a.GetParam().GetZ() > b.GetParam().GetZ();
76+
, // !GPUCA_DETERMINISTIC_CODE
77+
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
78+
) // clang-format on
79+
}
80+
};
81+
82+
struct GPUTPCGMMergerMergeLoopers_comp {
83+
GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b)
84+
{
85+
return CAMath::Abs(a.refz) < CAMath::Abs(b.refz);
86+
}
87+
};
88+
89+
struct GPUTPCGMO2OutputSort_comp {
90+
GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b)
91+
{
92+
return (a.y > b.y);
93+
}
94+
};
95+
96+
} // anonymous namespace
97+
} // namespace o2::gpu::internal
98+
99+
template <>
100+
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax)
101+
{
102+
if (cmpMax) {
103+
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax());
104+
} else {
105+
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMin());
106+
}
107+
}
108+
109+
template <>
110+
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
111+
{
112+
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
113+
}
114+
115+
template <>
116+
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
117+
{
118+
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
119+
}
120+
121+
template <>
122+
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(const krnlSetupTime& _xyz)
123+
{
124+
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp());
125+
}
126+
127+
template <>
128+
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(const krnlSetupTime& _xyz)
129+
{
130+
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp());
131+
}
132+
#endif // GPUCA_SPECIALIZE_THRUST_SORTS
133+
134+
template <>
135+
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
136+
{
137+
GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream]));
138+
}

GPU/GPUTracking/Base/hip/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}")
2424
if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}")
2525
set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify)
2626
file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR})
27-
set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu)
27+
set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDAKernelsSpecialize.inc GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu)
2828
set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesSystem.h)
2929
set(HIP_SOURCES "")
3030
foreach(file ${GPUCA_HIP_FILE_LIST})

GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,7 @@
1515
#include "GPUReconstructionOCLIncludesHost.h"
1616
#include "GPUReconstructionKernelIncludes.h"
1717

18-
template <>
19-
inline void GPUReconstructionOCL::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
20-
{
21-
cl_int4 val0 = {0, 0, 0, 0};
22-
GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList<cl_event>(), _xyz.z.ev->getEventList<cl_event>()));
23-
}
18+
#include "GPUReconstructionOCLKernelsSpecialize.inc"
2419

2520
template <class T, int32_t I, typename... Args>
2621
inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file GPUReconstructionOCLKernelsSpecialize.inc
13+
/// \author David Rohr
14+
15+
template <>
16+
inline void GPUReconstructionOCL::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
17+
{
18+
cl_int4 val0 = {0, 0, 0, 0};
19+
GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList<cl_event>(), _xyz.z.ev->getEventList<cl_event>()));
20+
}

GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx

Lines changed: 2 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -736,46 +736,15 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea
736736
#endif
737737
}
738738

739-
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize MergeBorderTracks<3>
740-
namespace o2::gpu::internal
741-
{
742-
namespace // anonymous
743-
{
744-
struct MergeBorderTracks_compMax {
745-
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
746-
{
747-
return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax);
748-
}
749-
};
750-
struct MergeBorderTracks_compMin {
751-
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
752-
{
753-
return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin);
754-
}
755-
};
756-
} // anonymous namespace
757-
} // namespace o2::gpu::internal
758-
759-
template <>
760-
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax)
761-
{
762-
if (cmpMax) {
763-
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax());
764-
} else {
765-
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMin());
766-
}
767-
}
768-
#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize MergeBorderTracks<3>
769-
770739
template <>
771740
GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax)
772741
{
773742
#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
774743
if (iThread == 0) {
775744
if (cmpMax) {
776-
GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; });
745+
GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); });
777746
} else {
778-
GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; });
747+
GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); });
779748
}
780749
}
781750
#endif
@@ -1783,74 +1752,6 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr
17831752
}
17841753
}
17851754

1786-
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
1787-
namespace o2::gpu::internal
1788-
{
1789-
namespace // anonymous
1790-
{
1791-
struct GPUTPCGMMergerSortTracks_comp {
1792-
const GPUTPCGMMergedTrack* const mCmp;
1793-
GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
1794-
GPUd() bool operator()(const int32_t aa, const int32_t bb)
1795-
{
1796-
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
1797-
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
1798-
if (a.CCE() != b.CCE()) {
1799-
return a.CCE() > b.CCE();
1800-
}
1801-
if (a.Legs() != b.Legs()) {
1802-
return a.Legs() > b.Legs();
1803-
}
1804-
GPUCA_DETERMINISTIC_CODE( // clang-format off
1805-
if (a.NClusters() != b.NClusters()) {
1806-
return a.NClusters() > b.NClusters();
1807-
} if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
1808-
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1809-
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
1810-
return a.GetParam().GetY() > b.GetParam().GetY();
1811-
}
1812-
return aa > bb;
1813-
, // !GPUCA_DETERMINISTIC_CODE
1814-
return a.NClusters() > b.NClusters();
1815-
) // clang-format on
1816-
}
1817-
};
1818-
1819-
struct GPUTPCGMMergerSortTracksQPt_comp {
1820-
const GPUTPCGMMergedTrack* const mCmp;
1821-
GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
1822-
GPUd() bool operator()(const int32_t aa, const int32_t bb)
1823-
{
1824-
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
1825-
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
1826-
GPUCA_DETERMINISTIC_CODE( // clang-format off
1827-
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
1828-
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1829-
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
1830-
return a.GetParam().GetY() > b.GetParam().GetY();
1831-
}
1832-
return a.GetParam().GetZ() > b.GetParam().GetZ();
1833-
, // !GPUCA_DETERMINISTIC_CODE
1834-
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
1835-
) // clang-format on
1836-
}
1837-
};
1838-
} // anonymous namespace
1839-
} // namespace o2::gpu::internal
1840-
1841-
template <>
1842-
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
1843-
{
1844-
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
1845-
}
1846-
1847-
template <>
1848-
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
1849-
{
1850-
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
1851-
}
1852-
#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
1853-
18541755
GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
18551756
{
18561757
#ifndef GPUCA_SPECIALIZE_THRUST_SORTS
@@ -2050,27 +1951,6 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads,
20501951
#endif
20511952
}
20521953

2053-
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
2054-
namespace o2::gpu::internal
2055-
{
2056-
namespace // anonymous
2057-
{
2058-
struct GPUTPCGMMergerMergeLoopers_comp {
2059-
GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b)
2060-
{
2061-
return CAMath::Abs(a.refz) < CAMath::Abs(b.refz);
2062-
}
2063-
};
2064-
} // anonymous namespace
2065-
} // namespace o2::gpu::internal
2066-
2067-
template <>
2068-
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(const krnlSetupTime& _xyz)
2069-
{
2070-
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp());
2071-
}
2072-
#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
2073-
20741954
GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
20751955
{
20761956
const MergeLooperParam* params = mLooperCandidates;

GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -93,21 +93,6 @@ GPUdii() void GPUTPCGMO2Output::Thread<GPUTPCGMO2Output::sort>(int32_t nBlocks,
9393
#endif
9494
}
9595

96-
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMO2Output::Thread<GPUTPCGMO2Output::sort>
97-
struct GPUTPCGMO2OutputSort_comp {
98-
GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b)
99-
{
100-
return (a.y > b.y);
101-
}
102-
};
103-
104-
template <>
105-
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(const krnlSetupTime& _xyz)
106-
{
107-
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp());
108-
}
109-
#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread<GPUTPCGMO2Output::sort>
110-
11196
template <>
11297
GPUdii() void GPUTPCGMO2Output::Thread<GPUTPCGMO2Output::output>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger)
11398
{

0 commit comments

Comments
 (0)