Fix for kernels.cmake and naming

ChSonnabend · ChSonnabend · commit e70e4d98bcf9 · 2025-06-10T09:40:52.000+02:00
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx
@@ -51,7 +51,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run
 }
 
 template <>
-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
 {
   uint32_t glo_idx = get_global_id(0);
   auto& clusterer = processors.tpcClusterer[sector];
@@ -111,7 +111,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
 }
 
 template <>
-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNSingleElement>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNSingleElement>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
 {
   uint32_t glo_idx = get_global_id(0);
   auto& clusterer = processors.tpcClusterer[sector];
@@ -182,7 +182,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
 }
 
 template <>
-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
 {
   uint32_t glo_idx = get_global_id(0);
   if (dtype == 0) {
@@ -193,7 +193,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
 }
 
 template <>
-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
 {
   auto& clustererNN = processors.tpcNNClusterer[sector];
   uint32_t glo_idx = get_global_id(0);
@@ -461,7 +461,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
 
 // ---------------------------------
 template <>
-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart)
 {
   // Implements identical publishing logic as the heuristic clusterizer and deconvolution kernel
   uint32_t idx = get_global_id(0);
diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake
@@ -111,7 +111,7 @@ o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart"            "= TPC
 o2_gpu_add_kernel("GPUTPCCFPeakFinder"                                    "= TPCCLUSTERFINDER"                                    LB)
 o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression"            "= TPCCLUSTERFINDER"                                    LB)
 o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks"                 "= TPCCLUSTERFINDER"                                    LB)
-o2_gpu_add_kernel("GPUTPCCFDeconvolution"                                 "= TPCCLUSTERFINDER"                                    LB, bool overwriteCharge)
+o2_gpu_add_kernel("GPUTPCCFDeconvolution"                                 "= TPCCLUSTERFINDER"                                    LB bool overwriteCharge)
 o2_gpu_add_kernel("GPUTPCCFClusterizer"                                   "= TPCCLUSTERFINDER"                                    LB int8_t onlyMC)
 o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets"               "= TPCCLUSTERFINDER")
 o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten"                     "= TPCCLUSTERFINDER"                                    NO GPUTPCLinearLabels* out)
@@ -127,14 +127,14 @@ o2_gpu_add_kernel("GPUTPCCFGather"                                        "="
 o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU"                    "= GLOBALREFIT "                                        LB)
 o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov"            "= GLOBALREFIT "                                        LB)
 if(onnxruntime_FOUND)
-o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer"          "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart)
-o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN"               "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart)
-o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNNSingleElement"  "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart)
-o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels"     "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart)
-o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels"     "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart)
-o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression"   "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart)
-o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression"   "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart)
-o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishDeconvolutionFlags" "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart)
+o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer"          "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart)
+o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN"               "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart)
+o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNNSingleElement"  "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart)
+o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels"     "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart)
+o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels"     "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart)
+o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression"   "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart)
+o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression"   "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart)
+o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishDeconvolutionFlags" "= TPCNNCLUSTERFINDER"                                  LB uint8_t sector int8_t dtype int8_t withMC uint32_t batchStart)
 endif()
 
 o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run`
`51`	`51`	`}`
`52`	`52`
`53`	`53`	`template <>`
`54`		`-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)`
	`54`	`+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)`
`55`	`55`	`{`
`56`	`56`	`uint32_t glo_idx = get_global_id(0);`
`57`	`57`	`auto& clusterer = processors.tpcClusterer[sector];`
`@@ -111,7 +111,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil`
`111`	`111`	`}`
`112`	`112`
`113`	`113`	`template <>`
`114`		`-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNSingleElement>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)`
	`114`	`+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNSingleElement>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)`
`115`	`115`	`{`
`116`	`116`	`uint32_t glo_idx = get_global_id(0);`
`117`	`117`	`auto& clusterer = processors.tpcClusterer[sector];`
`@@ -182,7 +182,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil`
`182`	`182`	`}`
`183`	`183`
`184`	`184`	`template <>`
`185`		`-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)`
	`185`	`+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)`
`186`	`186`	`{`
`187`	`187`	`uint32_t glo_idx = get_global_id(0);`
`188`	`188`	`if (dtype == 0) {`
`@@ -193,7 +193,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det`
`193`	`193`	`}`
`194`	`194`
`195`	`195`	`template <>`
`196`		`-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)`
	`196`	`+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)`
`197`	`197`	`{`
`198`	`198`	`auto& clustererNN = processors.tpcNNClusterer[sector];`
`199`	`199`	`uint32_t glo_idx = get_global_id(0);`
`@@ -461,7 +461,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub`
`461`	`461`
`462`	`462`	`// ---------------------------------`
`463`	`463`	`template <>`
`464`		`-GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)`
	`464`	`+GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart)`
`465`	`465`	`{`
`466`	`466`	`// Implements identical publishing logic as the heuristic clusterizer and deconvolution kernel`
`467`	`467`	`uint32_t idx = get_global_id(0);`