Skip to content

Commit 8c6dee2

Browse files
committed
GPU OpenCL: Reorganize OpenCL code and remove obsolete internals header
1 parent 92afdd8 commit 8c6dee2

File tree

4 files changed

+231
-252
lines changed

4 files changed

+231
-252
lines changed

GPU/GPUTracking/Base/opencl/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ set(OCL_DEFINECL "-D$<JOIN:$<TARGET_PROPERTY:O2::GPUTracking,COMPILE_DEFINITIONS
3737
)
3838

3939
set(SRCS GPUReconstructionOCL.cxx)
40-
set(HDRS GPUReconstructionOCL.h GPUReconstructionOCLInternals.h)
40+
set(HDRS GPUReconstructionOCL.h)
4141

4242
if (NOT DEFINED GPUCA_OCL_SPIRV_VERSION)
4343
set(GPUCA_OCL_SPIRV_VERSION 1.2)

GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx

Lines changed: 228 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,15 @@
1515
#define GPUCA_GPUTYPE_OPENCL
1616
#define __OPENCL_HOST__
1717

18+
#define CL_TARGET_OPENCL_VERSION 220
19+
#include <CL/opencl.h>
20+
#include <CL/cl_ext.h>
21+
#include <vector>
22+
#include <string>
23+
#include <memory>
24+
#include "GPULogging.h"
25+
1826
#include "GPUReconstructionOCL.h"
19-
#include "GPUReconstructionOCLInternals.h"
2027
#include "GPUReconstructionIncludes.h"
2128

2229
using namespace o2::gpu;
@@ -26,6 +33,154 @@ using namespace o2::gpu;
2633
#include <typeinfo>
2734
#include <cstdlib>
2835

36+
namespace o2::gpu
37+
{
38+
39+
static const char* opencl_error_string(int32_t errorcode)
40+
{
41+
switch (errorcode) {
42+
case CL_SUCCESS:
43+
return "Success!";
44+
case CL_DEVICE_NOT_FOUND:
45+
return "Device not found.";
46+
case CL_DEVICE_NOT_AVAILABLE:
47+
return "Device not available";
48+
case CL_COMPILER_NOT_AVAILABLE:
49+
return "Compiler not available";
50+
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
51+
return "Memory object allocation failure";
52+
case CL_OUT_OF_RESOURCES:
53+
return "Out of resources";
54+
case CL_OUT_OF_HOST_MEMORY:
55+
return "Out of host memory";
56+
case CL_PROFILING_INFO_NOT_AVAILABLE:
57+
return "Profiling information not available";
58+
case CL_MEM_COPY_OVERLAP:
59+
return "Memory copy overlap";
60+
case CL_IMAGE_FORMAT_MISMATCH:
61+
return "Image format mismatch";
62+
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
63+
return "Image format not supported";
64+
case CL_BUILD_PROGRAM_FAILURE:
65+
return "Program build failure";
66+
case CL_MAP_FAILURE:
67+
return "Map failure";
68+
case CL_INVALID_VALUE:
69+
return "Invalid value";
70+
case CL_INVALID_DEVICE_TYPE:
71+
return "Invalid device type";
72+
case CL_INVALID_PLATFORM:
73+
return "Invalid platform";
74+
case CL_INVALID_DEVICE:
75+
return "Invalid device";
76+
case CL_INVALID_CONTEXT:
77+
return "Invalid context";
78+
case CL_INVALID_QUEUE_PROPERTIES:
79+
return "Invalid queue properties";
80+
case CL_INVALID_COMMAND_QUEUE:
81+
return "Invalid command queue";
82+
case CL_INVALID_HOST_PTR:
83+
return "Invalid host pointer";
84+
case CL_INVALID_MEM_OBJECT:
85+
return "Invalid memory object";
86+
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
87+
return "Invalid image format descriptor";
88+
case CL_INVALID_IMAGE_SIZE:
89+
return "Invalid image size";
90+
case CL_INVALID_SAMPLER:
91+
return "Invalid sampler";
92+
case CL_INVALID_BINARY:
93+
return "Invalid binary";
94+
case CL_INVALID_BUILD_OPTIONS:
95+
return "Invalid build options";
96+
case CL_INVALID_PROGRAM:
97+
return "Invalid program";
98+
case CL_INVALID_PROGRAM_EXECUTABLE:
99+
return "Invalid program executable";
100+
case CL_INVALID_KERNEL_NAME:
101+
return "Invalid kernel name";
102+
case CL_INVALID_KERNEL_DEFINITION:
103+
return "Invalid kernel definition";
104+
case CL_INVALID_KERNEL:
105+
return "Invalid kernel";
106+
case CL_INVALID_ARG_INDEX:
107+
return "Invalid argument index";
108+
case CL_INVALID_ARG_VALUE:
109+
return "Invalid argument value";
110+
case CL_INVALID_ARG_SIZE:
111+
return "Invalid argument size";
112+
case CL_INVALID_KERNEL_ARGS:
113+
return "Invalid kernel arguments";
114+
case CL_INVALID_WORK_DIMENSION:
115+
return "Invalid work dimension";
116+
case CL_INVALID_WORK_GROUP_SIZE:
117+
return "Invalid work group size";
118+
case CL_INVALID_WORK_ITEM_SIZE:
119+
return "Invalid work item size";
120+
case CL_INVALID_GLOBAL_OFFSET:
121+
return "Invalid global offset";
122+
case CL_INVALID_EVENT_WAIT_LIST:
123+
return "Invalid event wait list";
124+
case CL_INVALID_EVENT:
125+
return "Invalid event";
126+
case CL_INVALID_OPERATION:
127+
return "Invalid operation";
128+
case CL_INVALID_GL_OBJECT:
129+
return "Invalid OpenGL object";
130+
case CL_INVALID_BUFFER_SIZE:
131+
return "Invalid buffer size";
132+
case CL_INVALID_MIP_LEVEL:
133+
return "Invalid mip-map level";
134+
default:
135+
return "Unknown Errorcode";
136+
}
137+
}
138+
139+
#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__)
140+
#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__)
141+
142+
static inline int64_t OCLsetKernelParameters_helper(cl_kernel& k, int32_t i)
143+
{
144+
return 0;
145+
}
146+
147+
template <typename T, typename... Args>
148+
static inline int64_t OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters)
149+
{
150+
int64_t retVal = clSetKernelArg(kernel, i, sizeof(T), &firstParameter);
151+
if (retVal) {
152+
return retVal;
153+
}
154+
return OCLsetKernelParameters_helper(kernel, i + 1, restOfParameters...);
155+
}
156+
157+
template <typename... Args>
158+
static inline int64_t OCLsetKernelParameters(cl_kernel& kernel, const Args&... args)
159+
{
160+
return OCLsetKernelParameters_helper(kernel, 0, args...);
161+
}
162+
163+
static inline int64_t clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait = nullptr, cl_int nWaitEvents = 1)
164+
{
165+
return clEnqueueNDRangeKernel(queue, krnl, 1, nullptr, &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent);
166+
}
167+
168+
struct GPUReconstructionOCLInternals {
169+
cl_platform_id platform;
170+
cl_device_id device;
171+
cl_context context;
172+
cl_command_queue command_queue[GPUCA_MAX_STREAMS];
173+
cl_mem mem_gpu;
174+
cl_mem mem_constant;
175+
cl_mem mem_host;
176+
cl_program program;
177+
178+
std::vector<std::pair<cl_kernel, std::string>> kernels;
179+
};
180+
181+
static_assert(std::is_convertible<cl_event, void*>::value, "OpenCL event type incompatible to deviceEvent");
182+
} // namespace o2::gpu
183+
29184
#define GPUErrorReturn(...) \
30185
{ \
31186
GPUError(__VA_ARGS__); \
@@ -43,6 +198,77 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_src);
43198
QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv);
44199
#endif
45200

201+
template <class T, int32_t I, typename... Args>
202+
inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args)
203+
{
204+
cl_kernel k = _xyz.y.num > 1 ? getKernelObject<cl_kernel, T, I, true>() : getKernelObject<cl_kernel, T, I, false>();
205+
auto& x = _xyz.x;
206+
auto& y = _xyz.y;
207+
auto& z = _xyz.z;
208+
if (y.num <= 1) {
209+
GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, args...));
210+
} else {
211+
GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, y.num, args...));
212+
}
213+
214+
cl_event ev;
215+
cl_event* evr;
216+
bool tmpEvent = false;
217+
if (z.ev == nullptr && mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) {
218+
evr = &ev;
219+
tmpEvent = true;
220+
} else {
221+
evr = (cl_event*)z.ev;
222+
}
223+
GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents));
224+
if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) {
225+
cl_ulong time_start, time_end;
226+
GPUFailedMsg(clWaitForEvents(1, evr));
227+
GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr));
228+
GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr));
229+
_xyz.t = (time_end - time_start) * 1.e-9f;
230+
if (tmpEvent) {
231+
GPUFailedMsg(clReleaseEvent(ev));
232+
}
233+
}
234+
}
235+
236+
template <class T, int32_t I>
237+
int32_t GPUReconstructionOCLBackend::AddKernel(bool multi)
238+
{
239+
std::string name(GetKernelName<T, I>());
240+
if (multi) {
241+
name += "_multi";
242+
}
243+
std::string kname("krnl_" + name);
244+
245+
cl_int ocl_error;
246+
cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error);
247+
if (GPUFailedMsgI(ocl_error)) {
248+
GPUError("Error creating OPENCL Kernel: %s", name.c_str());
249+
return 1;
250+
}
251+
mInternals->kernels.emplace_back(krnl, name);
252+
return 0;
253+
}
254+
255+
template <class T, int32_t I>
256+
inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num)
257+
{
258+
std::string name(GetKernelName<T, I>());
259+
if (num > 1) {
260+
name += "_multi";
261+
}
262+
263+
for (uint32_t k = 0; k < mInternals->kernels.size(); k++) {
264+
if (mInternals->kernels[k].second == name) {
265+
return (k);
266+
}
267+
}
268+
GPUError("Could not find OpenCL kernel %s", name.c_str());
269+
throw ::std::runtime_error("Requested unsupported OpenCL kernel");
270+
}
271+
46272
GPUReconstruction* GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL(cfg); }
47273

48274
GPUReconstructionOCLBackend::GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase))
@@ -561,8 +787,7 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream,
561787
template <class T, int32_t I, typename... Args>
562788
void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs<T, I, Args...>& args)
563789
{
564-
cl_kernel k = args.s.y.num > 1 ? getKernelObject<cl_kernel, T, I, true>() : getKernelObject<cl_kernel, T, I, false>();
565-
std::apply([this, &args, &k](auto&... vals) { runKernelBackendInternal(args.s, k, vals...); }, args.v);
790+
std::apply([this, &args](auto&... vals) { runKernelBackendInternal<T, I, Args...>(args.s, vals...); }, args.v);
566791
}
567792

568793
template <class S, class T, int32_t I, bool MULTI>

GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase
5959
int32_t AddKernel(bool multi = false);
6060
template <class T, int32_t I = 0>
6161
uint32_t FindKernel(int32_t num);
62-
template <typename K, typename... Args>
63-
void runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args);
62+
template <class T, int32_t I = 0, typename... Args>
63+
void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args);
6464
template <class T, int32_t I = 0>
6565
gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend();
6666

0 commit comments

Comments
 (0)