Skip to content

Commit c2aa2f5

Browse files
committed
GPU: Remove hack to disable synchronization in thrust::sort, which was not working any more
1 parent a31999e commit c2aa2f5

File tree

3 files changed

+7
-27
lines changed

3 files changed

+7
-27
lines changed

GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@
2222
namespace o2::gpu
2323
{
2424

25-
class ThrustVolatileAsyncAllocator
25+
class ThrustVolatileAllocator
2626
{
2727
public:
2828
typedef char value_type;
2929

30-
ThrustVolatileAsyncAllocator(GPUReconstruction* r) : mRec(r) {}
30+
ThrustVolatileAllocator(GPUReconstruction* r) : mRec(r) {}
3131
char* allocate(std::ptrdiff_t n) { return (char*)mRec->AllocateVolatileDeviceMemory(n); }
3232

3333
void deallocate(char* ptr, size_t) {}
@@ -38,24 +38,4 @@ class ThrustVolatileAsyncAllocator
3838

3939
} // namespace o2::gpu
4040

41-
#ifndef __HIPCC__
42-
// Override synchronize call at end of thrust algorithm running on stream, just don't run cudaStreamSynchronize
43-
namespace thrust::cuda_cub
44-
{
45-
46-
typedef thrust::cuda_cub::execution_policy<typeof(thrust::cuda::par(*(o2::gpu::ThrustVolatileAsyncAllocator*)nullptr).on(*(cudaStream_t*)nullptr))> thrustStreamPolicy;
47-
template <>
48-
__host__ __device__ inline cudaError_t synchronize<thrustStreamPolicy>(thrustStreamPolicy& policy)
49-
{
50-
#ifndef GPUCA_GPUCODE_DEVICE
51-
// Do not synchronize!
52-
return cudaSuccess;
53-
#else
54-
return synchronize_stream(derived_cast(policy));
55-
#endif
56-
}
57-
58-
} // namespace thrust::cuda_cub
59-
#endif // __HIPCC__
60-
6141
#endif // GPU_CUDATHRUSTHELPERS_H

GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,7 @@ template <>
773773
inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax)
774774
{
775775
thrust::device_ptr<GPUTPCGMBorderRange> p(range);
776-
ThrustVolatileAsyncAllocator alloc(this);
776+
ThrustVolatileAllocator alloc(this);
777777
if (cmpMax) {
778778
thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax());
779779
} else {
@@ -1878,15 +1878,15 @@ template <>
18781878
inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
18791879
{
18801880
thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess());
1881-
ThrustVolatileAsyncAllocator alloc(this);
1881+
ThrustVolatileAllocator alloc(this);
18821882
thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
18831883
}
18841884

18851885
template <>
18861886
inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
18871887
{
18881888
thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort());
1889-
ThrustVolatileAsyncAllocator alloc(this);
1889+
ThrustVolatileAllocator alloc(this);
18901890
thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
18911891
}
18921892
#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
@@ -2111,7 +2111,7 @@ template <>
21112111
inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(const krnlSetupTime& _xyz)
21122112
{
21132113
thrust::device_ptr<MergeLooperParam> params(mProcessorsShadow->tpcMerger.LooperCandidates());
2114-
ThrustVolatileAsyncAllocator alloc(this);
2114+
ThrustVolatileAllocator alloc(this);
21152115
thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), params, params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp());
21162116
}
21172117
#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt

GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ template <>
105105
inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(const krnlSetupTime& _xyz)
106106
{
107107
thrust::device_ptr<GPUTPCGMMerger::tmpSort> trackSort(mProcessorsShadow->tpcMerger.TrackSortO2());
108-
ThrustVolatileAsyncAllocator alloc(this);
108+
ThrustVolatileAllocator alloc(this);
109109
thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp());
110110
}
111111
#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread<GPUTPCGMO2Output::sort>

0 commit comments

Comments
 (0)