Fixing build issues

ChSonnabend · ChSonnabend · commit 3c4c5874def3 · 2025-02-07T11:25:15.000+01:00
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx
@@ -22,11 +22,11 @@
 #include "MCLabelAccumulator.h"
 #endif
 
-using namespace GPUCA_NAMESPACE::gpu;
-using namespace GPUCA_NAMESPACE::gpu::tpccf;
+using namespace o2::gpu;
+using namespace o2::gpu::tpccf;
 
 template <>
-GPUdii() void GPUTPCNNClusterizer::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer, char onlyMC)
+GPUdii() void GPUTPCNNClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t onlyMC)
 {
   Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
   CPU_ONLY(
@@ -91,19 +91,19 @@ bool GPUTPCNNClusterizer::isBoundary(int row, int pad, int global_shift, const G
 }
 
 template <class T>
-GPUd() void GPUTPCNNClusterizer::nn_clusterizer(int nBlocks, int nThreads, int iBlock, int iThread,
+GPUd() void GPUTPCNNClusterizer::nn_clusterizer(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
                                                 processorType& clusterer,
                                                 const CfFragment& fragment,
                                                 GPUSharedMemory& smem,
                                                 const Array2D<PackedCharge>& chargeMap,
                                                 const ChargePos* filteredPeakPositions,
                                                 const GPUSettingsRec& calib,
                                                 MCLabelAccumulator* labelAcc,
-                                                uint clusternum,
-                                                uint maxClusterPerRow,
-                                                uint* clusterInRow,
+                                                uint32_t clusternum,
+                                                uint32_t maxClusterPerRow,
+                                                uint32_t* clusterInRow,
                                                 tpc::ClusterNative* clusterByRow,
-                                                uint* clusterPosInRow)
+                                                uint32_t* clusterPosInRow)
 {
 
   uint glo_idx = get_global_id(0) * clusterer.nnClusterizerBatchedMode;
@@ -422,21 +422,21 @@ GPUd() void GPUTPCNNClusterizer::nn_clusterizer(int nBlocks, int nThreads, int i
   }
 }
 
-GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads, int iBlock, int iThread,
+GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
                                                        processorType& clusterer,
                                                        const CfFragment& fragment,
                                                        GPUSharedMemory& smem,
                                                        const Array2D<PackedCharge>& chargeMap,
                                                        const ChargePos* filteredPeakPositions,
                                                        const GPUSettingsRec& calib,
                                                        MCLabelAccumulator* labelAcc,
-                                                       uint clusternum,
-                                                       uint maxClusterPerRow,
-                                                       uint* clusterInRow,
+                                                       uint32_t clusternum,
+                                                       uint32_t maxClusterPerRow,
+                                                       uint32_t* clusterInRow,
                                                        tpc::ClusterNative* clusterByRow,
-                                                       uint* clusterPosInRow)
+                                                       uint32_t* clusterPosInRow)
 {
-  uint idx = get_global_id(0);
+  uint32_t idx = get_global_id(0);
 
   // For certain configurations dummy work items are added, so the total
   // number of work items is dividable by 64.
@@ -478,7 +478,7 @@ GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads
     return;
   }
 
-  uint rowIndex = 0;
+  uint32_t rowIndex = 0;
   if (clusterByRow != nullptr) {
     rowIndex = sortIntoBuckets(
       clusterer,
@@ -499,8 +499,8 @@ GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads
 
 GPUdii() void GPUTPCNNClusterizer::updateClusterInner(
   const GPUSettingsRec& calib,
-  ushort lid,
-  ushort N,
+  uint16_t lid,
+  uint16_t N,
   const PackedCharge* buf,
   const ChargePos& pos,
   ClusterAccumulator* cluster,
@@ -510,15 +510,14 @@ GPUdii() void GPUTPCNNClusterizer::updateClusterInner(
   uint8_t aboveThreshold = 0;
 
   GPUCA_UNROLL(U(), U())
-  for (ushort i = 0; i < N; i++) {
+  for (uint16_t i = 0; i < N; i++) {
     Delta2 d = cfconsts::InnerNeighbors[i];
 
     PackedCharge p = buf[N * lid + i];
 
     Charge q = cluster->updateInner(p, d);
 
-    CPU_ONLY(
-      labelAcc->collect(pos.delta(d), q));
+    CPU_ONLY(labelAcc->collect(pos.delta(d), q));
 
     aboveThreshold |= (uint8_t(q > calib.tpc.cfInnerThreshold) << i);
   }
@@ -529,26 +528,25 @@ GPUdii() void GPUTPCNNClusterizer::updateClusterInner(
 }
 
 GPUdii() void GPUTPCNNClusterizer::updateClusterOuter(
-  ushort lid,
-  ushort N,
-  ushort M,
-  ushort offset,
+  uint16_t lid,
+  uint16_t N,
+  uint16_t M,
+  uint16_t offset,
   const PackedCharge* buf,
   const ChargePos& pos,
   ClusterAccumulator* cluster,
   MCLabelAccumulator* labelAcc)
 {
   GPUCA_UNROLL(U(), U())
-  for (ushort i = offset; i < M + offset; i++) {
+  for (uint16_t i = offset; i < M + offset; i++) {
     PackedCharge p = buf[N * lid + i];
 
     Delta2 d = cfconsts::OuterNeighbors[i];
 
     Charge q = cluster->updateOuter(p, d);
     static_cast<void>(q); // Avoid unused varible warning on GPU.
 
-    CPU_ONLY(
-      labelAcc->collect(pos.delta(d), q));
+    CPU_ONLY(labelAcc->collect(pos.delta(d), q));
   }
 }
 
@@ -562,7 +560,7 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
   ClusterAccumulator* myCluster,
   MCLabelAccumulator* labelAcc)
 {
-  ushort ll = get_local_id(0);
+  uint16_t ll = get_local_id(0);
 
   posBcast[ll] = pos;
   GPUbarrier();
@@ -587,11 +585,11 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
     labelAcc,
     innerAboveThreshold);
 
-  ushort wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2;
+  uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2;
 
   bool inGroup1 = ll < wgSizeHalf;
 
-  ushort llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
+  uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
 
   CfUtils::condBlockLoad(
     chargeMap,
@@ -643,14 +641,14 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
 #endif
 }
 
-GPUd() uint GPUTPCNNClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint row, uint maxElemsPerBucket, uint* elemsInBucket, tpc::ClusterNative* buckets)
+GPUd() uint32_t GPUTPCNNClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t* elemsInBucket, tpc::ClusterNative* buckets)
 {
-  uint index = CAMath::AtomicAdd(&elemsInBucket[row], 1u);
+  uint32_t index = CAMath::AtomicAdd(&elemsInBucket[row], 1u);
   if (index < maxElemsPerBucket) {
     buckets[maxElemsPerBucket * row + index] = cluster;
   } else {
     clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISlice * 1000 + row, index, maxElemsPerBucket);
     CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket);
   }
   return index;
-}
+}
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h
@@ -27,7 +27,7 @@ namespace o2::tpc
 struct ClusterNative;
 } // namespace o2::tpc
 
-namespace GPUCA_NAMESPACE::gpu
+namespace o2::gpu
 {
 
 class ClusterAccumulator;
@@ -43,54 +43,49 @@ class GPUTPCNNClusterizer : public GPUKernelTemplate
     uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE];
   };
 
-#ifdef GPUCA_HAVE_O2HEADERS
   typedef GPUTPCClusterFinder processorType;
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcClusterer;
   }
-#endif
 
   GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep()
   {
     return GPUDataTypes::RecoStep::TPCClusterFinding;
   }
 
-  template <int iKernel = defaultKernel>
-  GPUd() static void Thread(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer, char);
+  template <int32_t iKernel = defaultKernel>
+  GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t);
 
-  static GPUd() void computeClustersImpl(int, int, int, int, processorType&, const CfFragment&, GPUSharedMemory&, const Array2D<PackedCharge>&, const ChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint, uint, uint*, tpc::ClusterNative*, uint*);
+  static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const Array2D<PackedCharge>&, const ChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*);
 
-  static GPUd() void exec(int, int, int, int, GPUSharedMemory&, processorType&, char);
   static int padOffset(int, int, const GPUTPCGeometry&);
   static int rowOffset(int, int);
   static bool isBoundary(int, int, int, const GPUTPCGeometry&);
 
   template <class T>
-  static GPUd() void nn_clusterizer(int, int, int, int,
+  static GPUd() void nn_clusterizer(int32_t, int32_t, int32_t, int32_t,
                                     processorType&,
                                     const CfFragment&,
                                     GPUSharedMemory&,
                                     const Array2D<PackedCharge>&,
                                     const ChargePos*,
                                     const GPUSettingsRec&,
                                     MCLabelAccumulator*,
-                                    uint,
-                                    uint,
-                                    uint*,
+                                    uint32_t,
+                                    uint32_t,
+                                    uint32_t*,
                                     tpc::ClusterNative*,
-                                    uint*);
+                                    uint32_t*);
 
  private:
-  // ---------------------------------
+  static GPUd() void updateClusterInner(const GPUSettingsRec&, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*);
 
-  static GPUd() void updateClusterInner(const GPUSettingsRec&, ushort, ushort, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*);
-
-  static GPUd() void updateClusterOuter(ushort, ushort, ushort, ushort, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*);
+  static GPUd() void updateClusterOuter(uint16_t, uint16_t, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*);
 
   static GPUd() void buildCluster(const GPUSettingsRec&, const Array2D<PackedCharge>&, ChargePos, ChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*);
 
-  static GPUd() uint sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint, uint, uint*, tpc::ClusterNative*);
+  static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*);
 };
 
 } // namespace GPUCA_NAMESPACE::gpu