Skip to content

Commit 95ae41e

Browse files
committed
GPU: Unify creation of list of kernel names and kernel numbers
1 parent f26e725 commit 95ae41e

10 files changed

+51
-56
lines changed

GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,39 @@ std::unique_ptr<gpu_reconstruction_kernels::threadContext> GPUReconstructionProc
124124

125125
gpu_reconstruction_kernels::threadContext::threadContext() = default;
126126
gpu_reconstruction_kernels::threadContext::~threadContext() = default;
127+
128+
template <class T, int32_t I>
129+
uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t k)
130+
{
131+
static int32_t num = k;
132+
if (num < 0) {
133+
throw std::runtime_error("Internal Error - Kernel Number not Set");
134+
}
135+
return num;
136+
}
137+
138+
namespace o2::gpu::internal
139+
{
140+
static std::vector<std::string> initKernelNames()
141+
{
142+
std::vector<std::string> retVal;
143+
#define GPUCA_KRNL(x_class, ...) \
144+
GPUReconstructionProcessing::GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(retVal.size()); \
145+
retVal.emplace_back(GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)));
146+
#include "GPUReconstructionKernelList.h"
147+
#undef GPUCA_KRNL
148+
return retVal;
149+
}
150+
} // namespace o2::gpu::internal
151+
152+
const std::vector<std::string> GPUReconstructionProcessing::mKernelNames = o2::gpu::internal::initKernelNames();
153+
154+
#define GPUCA_KRNL(x_class, ...) \
155+
template uint32_t GPUReconstructionProcessing::GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(int32_t); \
156+
template <> \
157+
const char* GPUReconstructionProcessing::GetKernelName<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
158+
{ \
159+
return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \
160+
}
161+
#include "GPUReconstructionKernelList.h"
162+
#undef GPUCA_KRNL

GPU/GPUTracking/Base/GPUReconstructionProcessing.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,10 @@ class GPUReconstructionProcessing : public GPUReconstruction
7474

7575
// Interface to query name of a kernel
7676
template <class T, int32_t I>
77-
constexpr static const char* GetKernelName();
77+
static const char* GetKernelName();
78+
const std::string& GetKernelName(int32_t i) const { return mKernelNames[i]; }
79+
template <class T, int32_t I = 0>
80+
static uint32_t GetKernelNum(int32_t k = -1);
7881

7982
// Public queries for timers
8083
auto& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)]; }
@@ -100,6 +103,8 @@ class GPUReconstructionProcessing : public GPUReconstruction
100103
GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {}
101104
using deviceEvent = gpu_reconstruction_kernels::deviceEvent;
102105

106+
static const std::vector<std::string> mKernelNames;
107+
103108
int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels
104109
uint32_t mNActiveThreadsOuterLoop = 1; // Number of threads currently running an outer loop
105110

@@ -174,15 +179,6 @@ HighResTimer& GPUReconstructionProcessing::getTimer(const char* name, int32_t nu
174179
return timer->timer[num];
175180
}
176181

177-
#define GPUCA_KRNL(x_class, ...) \
178-
template <> \
179-
constexpr const char* GPUReconstructionProcessing::GetKernelName<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
180-
{ \
181-
return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \
182-
}
183-
#include "GPUReconstructionKernelList.h"
184-
#undef GPUCA_KRNL
185-
186182
} // namespace o2::gpu
187183

188184
#endif

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -610,25 +610,25 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies()
610610
GPUChkErr(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0));
611611
GPUChkErr(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i]));
612612
GPUChkErr(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i]));
613-
GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", mInternals->kernelNames[i].c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem);
613+
GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", GetKernelName(i).c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem);
614614
}
615615
}
616616

617617
void GPUReconstructionCUDA::loadKernelModules(bool perKernel)
618618
{
619619
uint32_t j = 0;
620620
#define GPUCA_KRNL(x_class, ...) \
621-
getRTCkernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(mInternals->kernelFunctions.size()); \
621+
if (GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>() != j) { \
622+
GPUFatal("kernel numbers out of sync"); \
623+
} \
622624
mInternals->kernelFunctions.emplace_back(new CUfunction); \
623-
mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \
624625
if (mProcessingSettings.debugLevel >= 3) { \
625626
GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \
626627
} \
627628
GPUChkErr(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \
628629
j++;
629630
#include "GPUReconstructionKernelList.h"
630631
#undef GPUCA_KRNL
631-
632632
if (j != mInternals->kernelModules.size()) {
633633
GPUFatal("Did not load all kernels (%u < %u)", j, (uint32_t)mInternals->kernelModules.size());
634634
}

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
4646
void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args);
4747
template <class T, int32_t I = 0>
4848
gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend();
49-
template <class T, int32_t I>
50-
class backendInternal;
51-
52-
template <class T, int32_t I = 0>
53-
static int32_t getRTCkernelNum(int32_t k = -1);
5449

5550
void getRTCKernelCalls(std::vector<std::string>& kernels);
5651

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ namespace o2::gpu
3030
struct GPUReconstructionCUDAInternals {
3131
std::vector<std::unique_ptr<CUmodule>> kernelModules; // module for RTC compilation
3232
std::vector<std::unique_ptr<CUfunction>> kernelFunctions; // vector of ptrs to RTC kernels
33-
std::vector<std::string> kernelNames; // names of kernels
3433
cudaStream_t Streams[GPUCA_MAX_STREAMS]; // Pointer to array of CUDA Streams
3534

3635
static void getArgPtrs(const void** pArgs) {}

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet
5555
#endif
5656
pArgs[arg_offset] = &y.index;
5757
GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...);
58-
GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum<T, I>()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr));
58+
GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[GetKernelNum<T, I>()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr));
5959
}
6060
}
6161

@@ -111,20 +111,6 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs<T, I, Ar
111111
#include "GPUReconstructionKernelList.h"
112112
#undef GPUCA_KRNL
113113

114-
template <class T, int32_t I>
115-
int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k)
116-
{
117-
static int32_t num = k;
118-
if (num < 0) {
119-
throw std::runtime_error("Invalid kernel");
120-
}
121-
return num;
122-
}
123-
124-
#define GPUCA_KRNL(x_class, ...) template int32_t GPUReconstructionCUDABackend::getRTCkernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(int32_t k);
125-
#include "GPUReconstructionKernelList.h"
126-
#undef GPUCA_KRNL
127-
128114
void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector<std::string>& kernels)
129115
{
130116
#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__)));

GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ int32_t GPUReconstructionOCLBackend::ExitDevice_Runtime()
399399
clReleaseMemObject(mInternals->mem_gpu);
400400
clReleaseMemObject(mInternals->mem_constant);
401401
for (uint32_t i = 0; i < mInternals->kernels.size(); i++) {
402-
clReleaseKernel(mInternals->kernels[i].first);
402+
clReleaseKernel(mInternals->kernels[i]);
403403
}
404404
mInternals->kernels.clear();
405405
}

GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase
5656

5757
template <class T, int32_t I = 0>
5858
int32_t AddKernel();
59-
template <class T, int32_t I = 0>
60-
uint32_t FindKernel();
6159
template <class T, int32_t I = 0, typename... Args>
6260
void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args);
6361
template <class T, int32_t I = 0>

GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct GPUReconstructionOCLInternals {
4949
cl_mem mem_host;
5050
cl_program program;
5151

52-
std::vector<std::pair<cl_kernel, std::string>> kernels;
52+
std::vector<cl_kernel> kernels;
5353
};
5454
} // namespace o2::gpu
5555

GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,6 @@ void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs<T, I, Arg
5858
std::apply([this, &args](auto&... vals) { runKernelBackendInternal<T, I, Args...>(args.s, vals...); }, args.v);
5959
}
6060

61-
template <class T, int32_t I>
62-
inline uint32_t GPUReconstructionOCLBackend::FindKernel()
63-
{
64-
std::string name(GetKernelName<T, I>());
65-
66-
for (uint32_t k = 0; k < mInternals->kernels.size(); k++) {
67-
if (mInternals->kernels[k].second == name) {
68-
return (k);
69-
}
70-
}
71-
GPUError("Could not find OpenCL kernel %s", name.c_str());
72-
throw ::std::runtime_error("Requested unsupported OpenCL kernel");
73-
}
74-
7561
template <class T, int32_t I>
7662
int32_t GPUReconstructionOCLBackend::AddKernel()
7763
{
@@ -84,15 +70,14 @@ int32_t GPUReconstructionOCLBackend::AddKernel()
8470
GPUError("Error creating OPENCL Kernel: %s", name.c_str());
8571
return 1;
8672
}
87-
mInternals->kernels.emplace_back(krnl, name);
73+
mInternals->kernels.emplace_back(krnl);
8874
return 0;
8975
}
9076

9177
template <class S, class T, int32_t I>
9278
S& GPUReconstructionOCLBackend::getKernelObject()
9379
{
94-
static uint32_t krnl = FindKernel<T, I>();
95-
return mInternals->kernels[krnl].first;
80+
return mInternals->kernels[GetKernelNum<T, I>()];
9681
}
9782

9883
int32_t GPUReconstructionOCLBackend::AddKernels()

0 commit comments

Comments
 (0)