Skip to content

Commit e4c0849

Browse files
committed
GPU: Switch checkKernelFailures setting to more general serializeGPU
1 parent f41b7b8 commit e4c0849

File tree

8 files changed

+30
-34
lines changed

8 files changed

+30
-34
lines changed

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args
234234
}
235235
double deviceTimerTime = 0.;
236236
int32_t retVal = runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<S, I>(), cpuFallback, deviceTimerTime, std::forward<krnlSetup&&>(setup), std::forward<Args>(args)...);
237-
if (GPUDebug(GetKernelName<S, I>(), stream, mProcessingSettings.checkKernelFailures)) {
237+
if (GPUDebug(GetKernelName<S, I>(), stream, mProcessingSettings.serializeGPU & 1)) {
238238
throw std::runtime_error("kernel failure");
239239
}
240240
if (mProcessingSettings.debugLevel >= 1) {

GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,3 +325,17 @@ void GPUReconstructionDeviceBase::runConstantRegistrators()
325325
mDeviceConstantMemList.emplace_back(list[i]());
326326
}
327327
}
328+
329+
size_t GPUReconstructionDeviceBase::TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst)
330+
{
331+
if (!(res->Type() & GPUMemoryResource::MEMORY_GPU)) {
332+
if (mProcessingSettings.debugLevel >= 4) {
333+
GPUInfo("Skipped transfer of non-GPU memory resource: %s", res->Name());
334+
}
335+
return 0;
336+
}
337+
if (mProcessingSettings.debugLevel >= 3 && (strcmp(res->Name(), "ErrorCodes") || mProcessingSettings.debugLevel >= 4)) {
338+
GPUInfo("Copying to %s: %s - %ld bytes", toGPU ? "GPU" : "Host", res->Name(), (int64_t)res->Size());
339+
}
340+
return GPUMemCpy(dst, src, res->Size(), stream, toGPU, ev, evList, nEvents);
341+
}

GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU
5656
virtual const GPUTPCTracker* CPUTracker(int32_t iSlice) { return &processors()->tpcTrackers[iSlice]; }
5757

5858
int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override = 0;
59-
size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) override = 0;
59+
size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) override;
6060
size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override = 0;
6161
size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
6262
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override = 0;

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -519,21 +519,10 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size,
519519
if (ev) {
520520
GPUFailedMsg(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream == -1 ? 0 : stream]));
521521
}
522-
return size;
523-
}
524-
525-
size_t GPUReconstructionCUDA::TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst)
526-
{
527-
if (!(res->Type() & GPUMemoryResource::MEMORY_GPU)) {
528-
if (mProcessingSettings.debugLevel >= 4) {
529-
GPUInfo("Skipped transfer of non-GPU memory resource: %s", res->Name());
530-
}
531-
return 0;
532-
}
533-
if (mProcessingSettings.debugLevel >= 3 && (strcmp(res->Name(), "ErrorCodes") || mProcessingSettings.debugLevel >= 4)) {
534-
GPUInfo("Copying to %s: %s - %ld bytes", toGPU ? "GPU" : "Host", res->Name(), (int64_t)res->Size());
522+
if (mProcessingSettings.serializeGPU & 2) {
523+
GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true);
535524
}
536-
return GPUMemCpy(dst, src, res->Size(), stream, toGPU, ev, evList, nEvents);
525+
return size;
537526
}
538527

539528
size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev)
@@ -552,6 +541,9 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s
552541
if (ev && stream != -1) {
553542
GPUFailedMsg(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream]));
554543
}
544+
if (mProcessingSettings.serializeGPU & 2) {
545+
GPUDebug("WriteToConstantMemory", stream, true);
546+
}
555547
return size;
556548
}
557549

@@ -599,7 +591,7 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool
599591
cudaError cuErr;
600592
cuErr = cudaGetLastError();
601593
if (cuErr != cudaSuccess) {
602-
GPUError("CUDA Error %s while running kernel (%s) (Stream %d)", cudaGetErrorString(cuErr), state, stream);
594+
GPUError("CUDA Error %s while running (%s) (Stream %d)", cudaGetErrorString(cuErr), state, stream);
603595
return (1);
604596
}
605597
if (!force && mProcessingSettings.debugLevel <= 0) {

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
8282
int32_t unregisterMemoryForGPU_internal(const void* ptr) override;
8383

8484
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
85-
size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) override;
8685
size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
8786
void ReleaseEvent(deviceEvent ev) override;
8887
void RecordMarker(deviceEvent ev, int32_t stream) override;

GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -411,21 +411,10 @@ size_t GPUReconstructionOCL::GPUMemCpy(void* dst, const void* src, size_t size,
411411
} else {
412412
GPUFailedMsg(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
413413
}
414-
return size;
415-
}
416-
417-
size_t GPUReconstructionOCL::TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst)
418-
{
419-
if (!(res->Type() & GPUMemoryResource::MEMORY_GPU)) {
420-
if (mProcessingSettings.debugLevel >= 4) {
421-
GPUInfo("Skipped transfer of non-GPU memory resource: %s", res->Name());
422-
}
423-
return 0;
424-
}
425-
if (mProcessingSettings.debugLevel >= 3 && (strcmp(res->Name(), "ErrorCodes") || mProcessingSettings.debugLevel >= 4)) {
426-
GPUInfo("Copying to %s: %s - %ld bytes", toGPU ? "GPU" : "Host", res->Name(), (int64_t)res->Size());
414+
if (mProcessingSettings.serializeGPU & 2) {
415+
GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true);
427416
}
428-
return GPUMemCpy(dst, src, res->Size(), stream, toGPU, ev, evList, nEvents);
417+
return size;
429418
}
430419

431420
size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev)
@@ -434,6 +423,9 @@ size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* sr
434423
SynchronizeGPU();
435424
}
436425
GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList<cl_event>()));
426+
if (mProcessingSettings.serializeGPU & 2) {
427+
GPUDebug("WriteToConstantMemory", stream, true);
428+
}
437429
return size;
438430
}
439431

GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ class GPUReconstructionOCL : public GPUReconstructionDeviceBase
5050
bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) override;
5151

5252
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
53-
size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) override;
5453
size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
5554
void ReleaseEvent(deviceEvent ev) override;
5655
void RecordMarker(deviceEvent ev, int32_t stream) override;

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GP
227227
AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))")
228228
AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)")
229229
AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file")
230-
AddOption(checkKernelFailures, bool, false, "", 0, "Synchronize after each kernel call and identify failing kernels")
230+
AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures")
231231
AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6")
232232
AddOption(showOutputStat, bool, false, "", 0, "Print some track output statistics")
233233
AddOption(runCompressionStatistics, bool, false, "compressionStat", 0, "Run statistics and verification for cluster compression")

0 commit comments

Comments
 (0)