Skip to content

Commit 3f5b83e

Browse files
committed
GPU: Fix RecordMarker must operate on reference, since OpenCL will change the event pointer
1 parent 4728f06 commit 3f5b83e

10 files changed

+19
-14
lines changed

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
114114
virtual void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) {}
115115
virtual void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) {}
116116
virtual bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return true; }
117-
virtual void RecordMarker(deviceEvent ev, int32_t stream) {}
117+
virtual void RecordMarker(deviceEvent* ev, int32_t stream) {}
118118
virtual void SynchronizeGPU() {}
119119
virtual void ReleaseEvent(deviceEvent ev) {}
120120
virtual int32_t StartHelperThreads() { return 0; }

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s
548548
}
549549

550550
void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {}
551-
void GPUReconstructionCUDA::RecordMarker(deviceEvent ev, int32_t stream) { GPUFailedMsg(cudaEventRecord(ev.get<cudaEvent_t>(), mInternals->Streams[stream])); }
551+
void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream])); }
552552

553553
std::unique_ptr<GPUReconstruction::GPUThreadContext> GPUReconstructionCUDA::GetThreadContext()
554554
{

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
8484
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
8585
size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
8686
void ReleaseEvent(deviceEvent ev) override;
87-
void RecordMarker(deviceEvent ev, int32_t stream) override;
87+
void RecordMarker(deviceEvent* ev, int32_t stream) override;
8888

8989
void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame) override;
9090

GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,11 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime()
359359
mInternals = master->mInternals;
360360
}
361361

362+
for (uint32_t i = 0; i < mEvents.size(); i++) {
363+
cl_event* events = (cl_event*)mEvents[i].data();
364+
new (events) cl_event[mEvents[i].size()];
365+
}
366+
362367
return (0);
363368
}
364369

@@ -432,7 +437,7 @@ size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* sr
432437

433438
void GPUReconstructionOCL::ReleaseEvent(deviceEvent ev) { GPUFailedMsg(clReleaseEvent(ev.get<cl_event>())); }
434439

435-
void GPUReconstructionOCL::RecordMarker(deviceEvent ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev.getEventList<cl_event>())); }
440+
void GPUReconstructionOCL::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList<cl_event>())); }
436441

437442
int32_t GPUReconstructionOCL::DoStuckProtection(int32_t stream, deviceEvent event)
438443
{

GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class GPUReconstructionOCL : public GPUReconstructionDeviceBase
5252
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
5353
size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
5454
void ReleaseEvent(deviceEvent ev) override;
55-
void RecordMarker(deviceEvent ev, int32_t stream) override;
55+
void RecordMarker(deviceEvent* ev, int32_t stream) override;
5656

5757
virtual int32_t GetOCLPrograms() = 0;
5858
virtual bool CheckPlatform(uint32_t i) = 0;

GPU/GPUTracking/Global/GPUChain.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class GPUChain
101101
}
102102
}
103103
inline bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return mRec->IsEventDone(evList, nEvents); }
104-
inline void RecordMarker(deviceEvent ev, int32_t stream) { mRec->RecordMarker(ev, stream); }
104+
inline void RecordMarker(deviceEvent* ev, int32_t stream) { mRec->RecordMarker(ev, stream); }
105105
virtual inline std::unique_ptr<GPUReconstruction::GPUThreadContext> GetThreadContext() { return mRec->GetThreadContext(); }
106106
inline void SynchronizeGPU() { mRec->SynchronizeGPU(); }
107107
inline void ReleaseEvent(deviceEvent ev, bool doGPU = true)

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
934934
if (transferRunning[lane]) {
935935
ReleaseEvent(mEvents->stream[lane], doGPU);
936936
}
937-
RecordMarker(mEvents->stream[lane], mRec->NStreams() - 1);
937+
RecordMarker(&mEvents->stream[lane], mRec->NStreams() - 1);
938938
transferRunning[lane] = 1;
939939
}
940940

GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ int32_t GPUChainTracking::RunTPCCompression()
3737
GPUTPCCompression& CompressorShadow = doGPU ? processorsShadow()->tpcCompressor : Compressor;
3838
const auto& threadContext = GetThreadContext();
3939
if (mPipelineFinalizationCtx && GetProcessingSettings().doublePipelineClusterizer) {
40-
RecordMarker(mEvents->single, 0);
40+
RecordMarker(&mEvents->single, 0);
4141
}
4242

4343
if (GetProcessingSettings().tpcCompressionGatherMode == 3) {
@@ -124,7 +124,7 @@ int32_t GPUChainTracking::RunTPCCompression()
124124
return 1;
125125
}
126126
if (GetProcessingSettings().tpcCompressionGatherMode == 3) {
127-
RecordMarker(mEvents->stream[outputStream], outputStream);
127+
RecordMarker(&mEvents->stream[outputStream], outputStream);
128128
char* deviceFlatPts = (char*)Compressor.mOutput->qTotU;
129129
if (GetProcessingSettings().doublePipeline) {
130130
const size_t blockSize = CAMath::nextMultipleOf<1024>(copySize / 30);

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice
3333
uint32_t n = withinSlice == -1 ? NSLICES / 2 : NSLICES;
3434
if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPUall)) {
3535
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init);
36-
RecordMarker(mEvents->single, 0);
36+
RecordMarker(&mEvents->single, 0);
3737
for (uint32_t i = 0; i < n; i++) {
3838
int32_t stream = i % mRec->NStreams();
3939
runKernel<GPUTPCGMMergerMergeBorders, 0>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSlice, mergeMode);
@@ -55,7 +55,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice
5555
if (i == n - 1) { // Synchronize all execution on stream 0 with the last kernel
5656
ne = std::min<int32_t>(n, mRec->NStreams());
5757
for (int32_t j = 1; j < ne; j++) {
58-
RecordMarker(mEvents->slice[j], j);
58+
RecordMarker(&mEvents->slice[j], j);
5959
}
6060
e = &mEvents->slice[1];
6161
ne--;
@@ -251,7 +251,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
251251
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile);
252252

253253
if (doGPUall) {
254-
RecordMarker(mEvents->single, 0);
254+
RecordMarker(&mEvents->single, 0);
255255
auto* waitEvent = &mEvents->single;
256256
if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1 || mFractionalQAEnabled) {
257257
if (!(GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1)) {
@@ -317,7 +317,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
317317
TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
318318
runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::mc>(GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU));
319319
} else if (doGPUall) {
320-
RecordMarker(mEvents->single, 0);
320+
RecordMarker(&mEvents->single, 0);
321321
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2(), outputStream, nullptr, &mEvents->single);
322322
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2Clus(), outputStream);
323323
ReleaseEvent(mEvents->single);

GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal()
305305
SynchronizeGPU();
306306
} else {
307307
for (int32_t i = 0; i < mRec->NStreams(); i++) {
308-
RecordMarker(mEvents->stream[i], i);
308+
RecordMarker(&mEvents->stream[i], i);
309309
}
310310
runKernel<GPUTPCTrackletConstructor, 1>({GetGridAuto(0), krnlRunRangeNone, {&mEvents->single, mEvents->stream, mRec->NStreams()}});
311311
for (int32_t i = 0; i < mRec->NStreams(); i++) {

0 commit comments

Comments
 (0)