GPU: Fix onefile compile mode after all the refactoring

davidrohr · davidrohr · commit 255597cf5251 · 2025-04-21T16:07:10.000+02:00
diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
@@ -62,20 +62,6 @@
   }
 #endif
 
-// GPU Host wrappers for kernel
-#define GPUCA_KRNL_HOST(x_class, ...) \
-  GPUCA_KRNLGPU(x_class, __VA_ARGS__) \
-  template <> class GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::backendInternal<GPUCA_M_KRNL_TEMPLATE(x_class)> { \
-   public: \
-    template <typename T, typename... Args> \
-    static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \
-    { \
-      auto& x = _xyz.x; \
-      auto& y = _xyz.y; \
-      GPUCA_KRNL_CALL(x_class, __VA_ARGS__) \
-    } \
-  };
-
 #endif // GPUCA_GPUCODE
 
 #define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__)
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
@@ -48,6 +48,8 @@ class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterfac
   void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args);
   template <class T, int32_t I = 0, typename... Args>
   void runKernelBackendTimed(const krnlSetupTime& _xyz, const Args&... args);
+  template <class T, int32_t I>
+  struct kernelBackendMacro;
 
   template <class T, class S>
   friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp);
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
@@ -35,7 +35,7 @@ inline void GPUReconstructionCUDA::runKernelBackendTimed(const krnlSetupTime& _x
 {
 #if !defined(GPUCA_KERNEL_COMPILE_MODE) || GPUCA_KERNEL_COMPILE_MODE != 1
   if (!GetProcessingSettings().rtc.enable) {
-    backendInternal<T, I>::runKernelBackendMacro(_xyz, this, args...);
+    kernelBackendMacro<T, I>::run(_xyz, this, args...);
   } else
 #endif
   {
@@ -82,9 +82,6 @@ inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, c
     #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc
   #endif
 
-  #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
-    GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__)
-
   #ifndef __HIPCC__ // CUDA version
     #define GPUCA_KRNL_CALL(x_class, ...) \
       GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<<x.nBlocks, x.nThreads, 0, me->mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...);
@@ -95,6 +92,18 @@ inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, c
       hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...);
   #endif // __HIPCC__
 
+  #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
+    GPUCA_KRNLGPU(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \
+    template <> struct GPUReconstructionCUDA::kernelBackendMacro<GPUCA_M_KRNL_TEMPLATE(x_class)> { \
+      template <typename... Args> \
+      static inline void run(const GPUReconstructionProcessing::krnlSetupTime& _xyz, auto* me, const Args&... args) \
+      { \
+        auto& x = _xyz.x; \
+        auto& y = _xyz.y; \
+        GPUCA_KRNL_CALL(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \
+      } \
+    };
+
   #include "GPUReconstructionKernelList.h"
   #undef GPUCA_KRNL
 #endif // ---------- COMPILE_MODE = onefile | rdc ----------