1515#define GPUCA_GPUTYPE_OPENCL
1616#define __OPENCL_HOST__
1717
18+ #define CL_TARGET_OPENCL_VERSION 220
19+ #include < CL/opencl.h>
20+ #include < CL/cl_ext.h>
21+ #include < vector>
22+ #include < string>
23+ #include < memory>
24+ #include " GPULogging.h"
25+
1826#include " GPUReconstructionOCL.h"
19- #include " GPUReconstructionOCLInternals.h"
2027#include " GPUReconstructionIncludes.h"
2128
2229using namespace o2 ::gpu;
@@ -26,6 +33,154 @@ using namespace o2::gpu;
2633#include < typeinfo>
2734#include < cstdlib>
2835
36+ namespace o2 ::gpu
37+ {
38+
39+ static const char * opencl_error_string (int32_t errorcode)
40+ {
41+ switch (errorcode) {
42+ case CL_SUCCESS:
43+ return " Success!" ;
44+ case CL_DEVICE_NOT_FOUND:
45+ return " Device not found." ;
46+ case CL_DEVICE_NOT_AVAILABLE:
47+ return " Device not available" ;
48+ case CL_COMPILER_NOT_AVAILABLE:
49+ return " Compiler not available" ;
50+ case CL_MEM_OBJECT_ALLOCATION_FAILURE:
51+ return " Memory object allocation failure" ;
52+ case CL_OUT_OF_RESOURCES:
53+ return " Out of resources" ;
54+ case CL_OUT_OF_HOST_MEMORY:
55+ return " Out of host memory" ;
56+ case CL_PROFILING_INFO_NOT_AVAILABLE:
57+ return " Profiling information not available" ;
58+ case CL_MEM_COPY_OVERLAP:
59+ return " Memory copy overlap" ;
60+ case CL_IMAGE_FORMAT_MISMATCH:
61+ return " Image format mismatch" ;
62+ case CL_IMAGE_FORMAT_NOT_SUPPORTED:
63+ return " Image format not supported" ;
64+ case CL_BUILD_PROGRAM_FAILURE:
65+ return " Program build failure" ;
66+ case CL_MAP_FAILURE:
67+ return " Map failure" ;
68+ case CL_INVALID_VALUE:
69+ return " Invalid value" ;
70+ case CL_INVALID_DEVICE_TYPE:
71+ return " Invalid device type" ;
72+ case CL_INVALID_PLATFORM:
73+ return " Invalid platform" ;
74+ case CL_INVALID_DEVICE:
75+ return " Invalid device" ;
76+ case CL_INVALID_CONTEXT:
77+ return " Invalid context" ;
78+ case CL_INVALID_QUEUE_PROPERTIES:
79+ return " Invalid queue properties" ;
80+ case CL_INVALID_COMMAND_QUEUE:
81+ return " Invalid command queue" ;
82+ case CL_INVALID_HOST_PTR:
83+ return " Invalid host pointer" ;
84+ case CL_INVALID_MEM_OBJECT:
85+ return " Invalid memory object" ;
86+ case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
87+ return " Invalid image format descriptor" ;
88+ case CL_INVALID_IMAGE_SIZE:
89+ return " Invalid image size" ;
90+ case CL_INVALID_SAMPLER:
91+ return " Invalid sampler" ;
92+ case CL_INVALID_BINARY:
93+ return " Invalid binary" ;
94+ case CL_INVALID_BUILD_OPTIONS:
95+ return " Invalid build options" ;
96+ case CL_INVALID_PROGRAM:
97+ return " Invalid program" ;
98+ case CL_INVALID_PROGRAM_EXECUTABLE:
99+ return " Invalid program executable" ;
100+ case CL_INVALID_KERNEL_NAME:
101+ return " Invalid kernel name" ;
102+ case CL_INVALID_KERNEL_DEFINITION:
103+ return " Invalid kernel definition" ;
104+ case CL_INVALID_KERNEL:
105+ return " Invalid kernel" ;
106+ case CL_INVALID_ARG_INDEX:
107+ return " Invalid argument index" ;
108+ case CL_INVALID_ARG_VALUE:
109+ return " Invalid argument value" ;
110+ case CL_INVALID_ARG_SIZE:
111+ return " Invalid argument size" ;
112+ case CL_INVALID_KERNEL_ARGS:
113+ return " Invalid kernel arguments" ;
114+ case CL_INVALID_WORK_DIMENSION:
115+ return " Invalid work dimension" ;
116+ case CL_INVALID_WORK_GROUP_SIZE:
117+ return " Invalid work group size" ;
118+ case CL_INVALID_WORK_ITEM_SIZE:
119+ return " Invalid work item size" ;
120+ case CL_INVALID_GLOBAL_OFFSET:
121+ return " Invalid global offset" ;
122+ case CL_INVALID_EVENT_WAIT_LIST:
123+ return " Invalid event wait list" ;
124+ case CL_INVALID_EVENT:
125+ return " Invalid event" ;
126+ case CL_INVALID_OPERATION:
127+ return " Invalid operation" ;
128+ case CL_INVALID_GL_OBJECT:
129+ return " Invalid OpenGL object" ;
130+ case CL_INVALID_BUFFER_SIZE:
131+ return " Invalid buffer size" ;
132+ case CL_INVALID_MIP_LEVEL:
133+ return " Invalid mip-map level" ;
134+ default :
135+ return " Unknown Errorcode" ;
136+ }
137+ }
138+
139+ #define GPUFailedMsg (x ) GPUFailedMsgA(x, __FILE__, __LINE__)
140+ #define GPUFailedMsgI (x ) GPUFailedMsgAI(x, __FILE__, __LINE__)
141+
142+ static inline int64_t OCLsetKernelParameters_helper (cl_kernel& k, int32_t i)
143+ {
144+ return 0 ;
145+ }
146+
147+ template <typename T, typename ... Args>
148+ static inline int64_t OCLsetKernelParameters_helper (cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters)
149+ {
150+ int64_t retVal = clSetKernelArg (kernel, i, sizeof (T), &firstParameter);
151+ if (retVal) {
152+ return retVal;
153+ }
154+ return OCLsetKernelParameters_helper (kernel, i + 1 , restOfParameters...);
155+ }
156+
157+ template <typename ... Args>
158+ static inline int64_t OCLsetKernelParameters (cl_kernel& kernel, const Args&... args)
159+ {
160+ return OCLsetKernelParameters_helper (kernel, 0 , args...);
161+ }
162+
163+ static inline int64_t clExecuteKernelA (cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait = nullptr , cl_int nWaitEvents = 1 )
164+ {
165+ return clEnqueueNDRangeKernel (queue, krnl, 1 , nullptr , &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent);
166+ }
167+
168+ struct GPUReconstructionOCLInternals {
169+ cl_platform_id platform;
170+ cl_device_id device;
171+ cl_context context;
172+ cl_command_queue command_queue[GPUCA_MAX_STREAMS];
173+ cl_mem mem_gpu;
174+ cl_mem mem_constant;
175+ cl_mem mem_host;
176+ cl_program program;
177+
178+ std::vector<std::pair<cl_kernel, std::string>> kernels;
179+ };
180+
181+ static_assert (std::is_convertible<cl_event, void *>::value, " OpenCL event type incompatible to deviceEvent" );
182+ } // namespace o2::gpu
183+
29184#define GPUErrorReturn (...) \
30185 { \
31186 GPUError (__VA_ARGS__); \
@@ -43,6 +198,77 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_src);
43198QGET_LD_BINARY_SYMBOLS (GPUReconstructionOCLCode_spirv);
44199#endif
45200
201+ template <class T , int32_t I, typename ... Args>
202+ inline void GPUReconstructionOCLBackend::runKernelBackendInternal (const krnlSetupTime& _xyz, const Args&... args)
203+ {
204+ cl_kernel k = _xyz.y .num > 1 ? getKernelObject<cl_kernel, T, I, true >() : getKernelObject<cl_kernel, T, I, false >();
205+ auto & x = _xyz.x ;
206+ auto & y = _xyz.y ;
207+ auto & z = _xyz.z ;
208+ if (y.num <= 1 ) {
209+ GPUFailedMsg (OCLsetKernelParameters (k, mInternals ->mem_gpu , mInternals ->mem_constant , y.start , args...));
210+ } else {
211+ GPUFailedMsg (OCLsetKernelParameters (k, mInternals ->mem_gpu , mInternals ->mem_constant , y.start , y.num , args...));
212+ }
213+
214+ cl_event ev;
215+ cl_event* evr;
216+ bool tmpEvent = false ;
217+ if (z.ev == nullptr && mProcessingSettings .deviceTimers && mProcessingSettings .debugLevel > 0 ) {
218+ evr = &ev;
219+ tmpEvent = true ;
220+ } else {
221+ evr = (cl_event*)z.ev ;
222+ }
223+ GPUFailedMsg (clExecuteKernelA (mInternals ->command_queue [x.stream ], k, x.nThreads , x.nThreads * x.nBlocks , evr, (cl_event*)z.evList , z.nEvents ));
224+ if (mProcessingSettings .deviceTimers && mProcessingSettings .debugLevel > 0 ) {
225+ cl_ulong time_start, time_end;
226+ GPUFailedMsg (clWaitForEvents (1 , evr));
227+ GPUFailedMsg (clGetEventProfilingInfo (*evr, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, nullptr ));
228+ GPUFailedMsg (clGetEventProfilingInfo (*evr, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, nullptr ));
229+ _xyz.t = (time_end - time_start) * 1 .e -9f ;
230+ if (tmpEvent) {
231+ GPUFailedMsg (clReleaseEvent (ev));
232+ }
233+ }
234+ }
235+
236+ template <class T , int32_t I>
237+ int32_t GPUReconstructionOCLBackend::AddKernel (bool multi)
238+ {
239+ std::string name (GetKernelName<T, I>());
240+ if (multi) {
241+ name += " _multi" ;
242+ }
243+ std::string kname (" krnl_" + name);
244+
245+ cl_int ocl_error;
246+ cl_kernel krnl = clCreateKernel (mInternals ->program , kname.c_str (), &ocl_error);
247+ if (GPUFailedMsgI (ocl_error)) {
248+ GPUError (" Error creating OPENCL Kernel: %s" , name.c_str ());
249+ return 1 ;
250+ }
251+ mInternals ->kernels .emplace_back (krnl, name);
252+ return 0 ;
253+ }
254+
255+ template <class T , int32_t I>
256+ inline uint32_t GPUReconstructionOCLBackend::FindKernel (int32_t num)
257+ {
258+ std::string name (GetKernelName<T, I>());
259+ if (num > 1 ) {
260+ name += " _multi" ;
261+ }
262+
263+ for (uint32_t k = 0 ; k < mInternals ->kernels .size (); k++) {
264+ if (mInternals ->kernels [k].second == name) {
265+ return (k);
266+ }
267+ }
268+ GPUError (" Could not find OpenCL kernel %s" , name.c_str ());
269+ throw ::std::runtime_error (" Requested unsupported OpenCL kernel" );
270+ }
271+
46272GPUReconstruction* GPUReconstruction_Create_OCL (const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL (cfg); }
47273
48274GPUReconstructionOCLBackend::GPUReconstructionOCLBackend (const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof (GPUReconstructionDeviceBase))
@@ -561,8 +787,7 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream,
561787template <class T , int32_t I, typename ... Args>
562788void GPUReconstructionOCLBackend::runKernelBackend (const krnlSetupArgs<T, I, Args...>& args)
563789{
564- cl_kernel k = args.s .y .num > 1 ? getKernelObject<cl_kernel, T, I, true >() : getKernelObject<cl_kernel, T, I, false >();
565- std::apply ([this , &args, &k](auto &... vals) { runKernelBackendInternal (args.s , k, vals...); }, args.v );
790+ std::apply ([this , &args](auto &... vals) { runKernelBackendInternal<T, I, Args...>(args.s , vals...); }, args.v );
566791}
567792
568793template <class S , class T , int32_t I, bool MULTI>
0 commit comments