@@ -23,13 +23,15 @@ using namespace o2::gpu;
2323#include " GPUReconstructionIncludesDeviceAll.h"
2424
2525#include " GPUReconstructionCUDAKernelsSpecialize.inc"
26+ #include " GPUReconstructionProcessingKernels.inc"
27+ template void GPUReconstructionProcessing::KernelInterface<GPUReconstructionCUDA, GPUReconstructionDeviceBase>::runKernelVirtual(const int num, const void * args);
2628
2729#if defined(__HIPCC__) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM)
2830__global__ void gGPUConstantMemBuffer_dummy (int32_t * p) { *p = *(int32_t *)&gGPUConstantMemBuffer ; }
2931#endif
3032
3133template <class T , int32_t I, typename ... Args>
32- inline void GPUReconstructionCUDA::runKernelBackendInternal (const krnlSetupTime& _xyz, const Args&... args)
34+ inline void GPUReconstructionCUDA::runKernelBackendTimed (const krnlSetupTime& _xyz, const Args&... args)
3335{
3436#if !defined(GPUCA_KERNEL_COMPILE_MODE) || GPUCA_KERNEL_COMPILE_MODE != 1
3537 if (!GetProcessingSettings ().rtc .enable ) {
@@ -52,18 +54,18 @@ inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime&
5254}
5355
5456template <class T , int32_t I, typename ... Args>
55- void GPUReconstructionCUDA::runKernelBackend (const krnlSetupArgs<T, I, Args...>& args)
57+ inline void GPUReconstructionCUDA::runKernelBackend (const krnlSetupTime& _xyz, const Args& ... args)
5658{
57- auto & x = args. s .x ;
58- auto & z = args. s .z ;
59+ auto & x = _xyz .x ;
60+ auto & z = _xyz .z ;
5961 if (z.evList ) {
6062 for (int32_t k = 0 ; k < z.nEvents ; k++) {
6163 GPUChkErr (cudaStreamWaitEvent (mInternals ->Streams [x.stream ], ((cudaEvent_t*)z.evList )[k], 0 ));
6264 }
6365 }
6466 {
65- GPUDebugTiming timer (GetProcessingSettings ().deviceTimers && GetProcessingSettings ().debugLevel > 0 , (deviceEvent*)mDebugEvents , mInternals ->Streams , args. s , this );
66- std::apply ([ this , &args]( auto &... vals) { this -> runKernelBackendInternal <T, I, Args...>(args. s , vals ...); }, args. v );
67+ GPUDebugTiming timer (GetProcessingSettings ().deviceTimers && GetProcessingSettings ().debugLevel > 0 , (deviceEvent*)mDebugEvents , mInternals ->Streams , _xyz , this );
68+ runKernelBackendTimed <T, I, Args...>(_xyz, args ...);
6769 }
6870 GPUChkErr (cudaGetLastError ());
6971 if (z.ev ) {
@@ -74,31 +76,29 @@ void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs<T, I, Args...>&
7476#undef GPUCA_KRNL_REG
7577#define GPUCA_KRNL_REG (args ) __launch_bounds__ (GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
7678
77- # if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 // ---------- COMPILE_MODE = perkernel ----------
78- #define GPUCA_KRNL (x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionCUDA::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>( const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args);
79- # else // ---------- COMPILE_MODE = onefile | rdc ----------
80- # if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2
81- # define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc
82- # endif
83-
84- # define GPUCA_KRNL (x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
85- GPUCA_KRNL_HOST (x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \
86- template void GPUReconstructionCUDA::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>( const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args);
87-
88- # ifndef __HIPCC__ // CUDA version
89- # define GPUCA_KRNL_CALL (x_class, ...) \
90- GPUCA_M_CAT (krnl_, GPUCA_M_KRNL_NAME(x_class))<<<x.nBlocks, x.nThreads, 0, me->mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...);
91- # else // HIP version
92- # undef GPUCA_KRNL_CUSTOM
93- # define GPUCA_KRNL_CUSTOM ( args ) GPUCA_M_STRIP( args)
94- # define GPUCA_KRNL_CALL (x_class, ...) \
95- hipLaunchKernelGGL (HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...);
96- # endif // __HIPCC__
97-
79+ // clang-format off
80+ #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE != 1 // ---------- COMPILE_MODE = perkernel ----------
81+ # if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2
82+ # define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc
83+ # endif
84+
85+ # define GPUCA_KRNL (x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
86+ GPUCA_KRNL_HOST (x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__)
87+
88+ #ifndef __HIPCC__ // CUDA version
89+ # define GPUCA_KRNL_CALL (x_class, ...) \
90+ GPUCA_M_CAT (krnl_, GPUCA_M_KRNL_NAME(x_class))<<<x.nBlocks, x.nThreads, 0, me->mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...);
91+ # else // HIP version
92+ # undef GPUCA_KRNL_CUSTOM
93+ # define GPUCA_KRNL_CUSTOM ( args ) GPUCA_M_STRIP(args)
94+ # define GPUCA_KRNL_CALL (x_class, ...) \
95+ hipLaunchKernelGGL (HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...);
96+ # endif // __HIPCC__
97+
98+ # include " GPUReconstructionKernelList.h "
99+ # undef GPUCA_KRNL
98100#endif // ---------- COMPILE_MODE = onefile | rdc ----------
99-
100- #include " GPUReconstructionKernelList.h"
101- #undef GPUCA_KRNL
101+ // clang-format on
102102
103103#ifndef GPUCA_NO_CONSTANT_MEMORY
104104static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol ([]() {
0 commit comments