Skip to content

Commit e5e4e1c

Browse files
committed
GPU: Rename GPUFailedMsg to GPUChkErr
1 parent 8e8965d commit e5e4e1c

12 files changed

+152
-143
lines changed

GPU/Common/GPUCommonChkErr.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,19 @@
1212
/// \file GPUCommonChkErr.h
1313
/// \author David Rohr
1414

15+
// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user).
16+
// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code
17+
// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured.
18+
// GPUChkErrI will return 0 or 1, depending on whether an error has occurred.
19+
// The Macros must be called ona GPUReconstruction instance, e.g.:
20+
// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); }
21+
// gpuRecObj.GPUChkErr(cudaMalloc(...));
22+
1523
#ifndef GPUCOMMONCHKERR_H
1624
#define GPUCOMMONCHKERR_H
1725

18-
#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true)
19-
#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false)
26+
// Please #include "GPUReconstruction.h" in your code, if you use these 2!
27+
#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true)
28+
#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false)
2029

2130
#endif

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,9 +1078,9 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s
10781078
return retVal;
10791079
}
10801080

1081-
int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError)
1081+
int32_t GPUReconstruction::GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError)
10821082
{
1083-
if (error == 0 || !GPUFailedMsgInternal(error, file, line)) {
1083+
if (error == 0 || !GPUChkErrInternal(error, file, line)) {
10841084
return 0;
10851085
}
10861086
if (failOnError) {

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ class GPUReconstruction
143143
virtual void* getGPUPointer(void* ptr) { return ptr; }
144144
virtual void startGPUProfiling() {}
145145
virtual void endGPUProfiling() {}
146-
int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError);
146+
int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError);
147147
int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr);
148148
void RunPipelineWorker();
149149
void TerminatePipelineWorker();
@@ -247,7 +247,7 @@ class GPUReconstruction
247247
void UpdateMaxMemoryUsed();
248248
int32_t EnqueuePipeline(bool terminate = false);
249249
GPUChain* GetNextChainInQueue();
250-
virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; }
250+
virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; }
251251

252252
virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
253253
virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0;

GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU
4646
virtual int32_t InitDevice_Runtime() = 0;
4747
int32_t ExitDevice() override;
4848
virtual int32_t ExitDevice_Runtime() = 0;
49-
virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0;
49+
virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override = 0;
5050
int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override;
5151
int32_t unregisterMemoryForGPU_internal(const void* ptr) override;
5252
void unregisterRemainingRegisteredMemory();

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 77 additions & 77 deletions
Large diffs are not rendered by default.

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
3333
{
3434
public:
3535
~GPUReconstructionCUDABackend() override;
36-
static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line);
36+
static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line);
3737

3838
protected:
3939
GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg);
4040

4141
void PrintKernelOccupancies() override;
42-
virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); }
42+
virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); }
4343

4444
template <class T, int32_t I = 0, typename... Args>
4545
void runKernelBackend(const krnlSetupArgs<T, I, Args...>& args);

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ using namespace o2::gpu;
3636
#ifndef GPUCA_NO_CONSTANT_MEMORY
3737
static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() {
3838
void* retVal = nullptr;
39-
if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
39+
if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
4040
throw std::runtime_error("Could not obtain GPU constant memory symbol");
4141
}
4242
return retVal;

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class GPUDebugTiming
4949
{
5050
if (mDo) {
5151
if (mDeviceTimers) {
52-
mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[0].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));
52+
mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[0].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));
5353
} else {
5454
mTimer.ResetStart();
5555
}
@@ -59,13 +59,13 @@ class GPUDebugTiming
5959
{
6060
if (mDo && mXYZ.t == 0.) {
6161
if (mDeviceTimers) {
62-
mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[1].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));
63-
mRec->GPUFailedMsg(cudaEventSynchronize(mDeviceTimers[1].get<cudaEvent_t>()));
62+
mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[1].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));
63+
mRec->GPUChkErr(cudaEventSynchronize(mDeviceTimers[1].get<cudaEvent_t>()));
6464
float v;
65-
mRec->GPUFailedMsg(cudaEventElapsedTime(&v, mDeviceTimers[0].get<cudaEvent_t>(), mDeviceTimers[1].get<cudaEvent_t>()));
65+
mRec->GPUChkErr(cudaEventElapsedTime(&v, mDeviceTimers[0].get<cudaEvent_t>(), mDeviceTimers[1].get<cudaEvent_t>()));
6666
mXYZ.t = v * 1.e-3f;
6767
} else {
68-
mRec->GPUFailedMsg(cudaStreamSynchronize(mStreams[mXYZ.x.stream]));
68+
mRec->GPUChkErr(cudaStreamSynchronize(mStreams[mXYZ.x.stream]));
6969
mXYZ.t = mTimer.GetCurrentElapsedTime();
7070
}
7171
}

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUC
3434
template <>
3535
inline void GPUReconstructionCUDABackend::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
3636
{
37-
GPUFailedMsg(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream]));
37+
GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream]));
3838
}
3939

4040
template <class T, int32_t I, typename... Args>
@@ -56,7 +56,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet
5656
#endif
5757
pArgs[arg_offset] = &y.index;
5858
GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...);
59-
GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum<false, T, I>()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr));
59+
GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum<false, T, I>()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr));
6060
}
6161
}
6262

@@ -67,16 +67,16 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs<T, I, Ar
6767
auto& z = args.s.z;
6868
if (z.evList) {
6969
for (int32_t k = 0; k < z.nEvents; k++) {
70-
GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0));
70+
GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0));
7171
}
7272
}
7373
{
7474
GPUDebugTiming timer(mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this);
7575
std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal<T, I, Args...>(args.s, vals...); }, args.v);
7676
}
77-
GPUFailedMsg(cudaGetLastError());
77+
GPUChkErr(cudaGetLastError());
7878
if (z.ev) {
79-
GPUFailedMsg(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream]));
79+
GPUChkErr(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream]));
8080
}
8181
}
8282

@@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector<std::string>& k
138138
#ifndef GPUCA_NO_CONSTANT_MEMORY
139139
static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() {
140140
void* retVal = nullptr;
141-
if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
141+
if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
142142
throw std::runtime_error("Could not obtain GPU constant memory symbol");
143143
}
144144
return retVal;

0 commit comments

Comments
 (0)