Adjusting CMakeLIsts and other bugs

ChSonnabend · ChSonnabend · commit 337743578f0a · 2025-03-13T13:17:23.000+01:00
diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt
@@ -324,7 +324,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
     ${targetName}
     PRIVATE $<TARGET_PROPERTY:O2::Framework,INTERFACE_INCLUDE_DIRECTORIES>)
 
-  target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2)
+  target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX=1)
 
   o2_target_root_dictionary(${MODULE}
                             HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL}
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx
@@ -36,237 +36,4 @@ GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
   tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow;
 
   GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow);
-<<<<<<< HEAD
 }
-=======
-}
-
-GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
-                                                       processorType& clusterer,
-                                                       const CfFragment& fragment,
-                                                       GPUSharedMemory& smem,
-                                                       const Array2D<PackedCharge>& chargeMap,
-                                                       const ChargePos* filteredPeakPositions,
-                                                       const GPUSettingsRec& calib,
-                                                       MCLabelAccumulator* labelAcc,
-                                                       uint32_t clusternum,
-                                                       uint32_t maxClusterPerRow,
-                                                       uint32_t* clusterInRow,
-                                                       tpc::ClusterNative* clusterByRow,
-                                                       uint32_t* clusterPosInRow)
-{
-  uint32_t idx = get_global_id(0);
-
-  // For certain configurations dummy work items are added, so the total
-  // number of work items is dividable by 64.
-  // These dummy items also compute the last cluster but discard the result.
-  ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)];
-  Charge charge = chargeMap[pos].unpack();
-
-  ClusterAccumulator pc;
-  CPU_ONLY(labelAcc->collect(pos, charge));
-
-  buildCluster(
-    calib,
-    chargeMap,
-    pos,
-    smem.posBcast,
-    smem.buf,
-    smem.innerAboveThreshold,
-    &pc,
-    labelAcc);
-
-  if (idx >= clusternum) {
-    return;
-  }
-  if (fragment.isOverlap(pos.time())) {
-    if (clusterPosInRow) {
-      clusterPosInRow[idx] = maxClusterPerRow;
-    }
-    return;
-  }
-  tpc::ClusterNative myCluster;
-  bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap);
-
-  if (rejectCluster) {
-    if (clusterPosInRow) {
-      clusterPosInRow[idx] = maxClusterPerRow;
-    }
-    return;
-  }
-
-  uint32_t rowIndex = 0;
-  if (clusterByRow != nullptr) {
-    rowIndex = sortIntoBuckets(
-      clusterer,
-      myCluster,
-      pos.row(),
-      maxClusterPerRow,
-      clusterInRow,
-      clusterByRow);
-    if (clusterPosInRow != nullptr) {
-      clusterPosInRow[idx] = rowIndex;
-    }
-  } else if (clusterPosInRow) {
-    rowIndex = clusterPosInRow[idx];
-  }
-
-  CPU_ONLY(labelAcc->commit(pos.row(), rowIndex, maxClusterPerRow));
-}
-
-GPUdii() void GPUTPCCFClusterizer::updateClusterInner(
-  const GPUSettingsRec& calib,
-  uint16_t lid,
-  uint16_t N,
-  const PackedCharge* buf,
-  const ChargePos& pos,
-  ClusterAccumulator* cluster,
-  MCLabelAccumulator* labelAcc,
-  uint8_t* innerAboveThreshold)
-{
-  uint8_t aboveThreshold = 0;
-
-  GPUCA_UNROLL(U(), U())
-  for (uint16_t i = 0; i < N; i++) {
-    Delta2 d = cfconsts::InnerNeighbors[i];
-
-    PackedCharge p = buf[N * lid + i];
-
-    Charge q = cluster->updateInner(p, d);
-
-    CPU_ONLY(labelAcc->collect(pos.delta(d), q));
-
-    aboveThreshold |= (uint8_t(q > calib.tpc.cfInnerThreshold) << i);
-  }
-
-  innerAboveThreshold[lid] = aboveThreshold;
-
-  GPUbarrier();
-}
-
-GPUdii() void GPUTPCCFClusterizer::updateClusterOuter(
-  uint16_t lid,
-  uint16_t N,
-  uint16_t M,
-  uint16_t offset,
-  const PackedCharge* buf,
-  const ChargePos& pos,
-  ClusterAccumulator* cluster,
-  MCLabelAccumulator* labelAcc)
-{
-  GPUCA_UNROLL(U(), U())
-  for (uint16_t i = offset; i < M + offset; i++) {
-    PackedCharge p = buf[N * lid + i];
-
-    Delta2 d = cfconsts::OuterNeighbors[i];
-
-    Charge q = cluster->updateOuter(p, d);
-    static_cast<void>(q); // Avoid unused varible warning on GPU.
-
-    CPU_ONLY(labelAcc->collect(pos.delta(d), q));
-  }
-}
-
-GPUdii() void GPUTPCCFClusterizer::buildCluster(
-  const GPUSettingsRec& calib,
-  const Array2D<PackedCharge>& chargeMap,
-  ChargePos pos,
-  ChargePos* posBcast,
-  PackedCharge* buf,
-  uint8_t* innerAboveThreshold,
-  ClusterAccumulator* myCluster,
-  MCLabelAccumulator* labelAcc)
-{
-  uint16_t ll = get_local_id(0);
-
-  posBcast[ll] = pos;
-  GPUbarrier();
-
-  CfUtils::blockLoad<PackedCharge>(
-    chargeMap,
-    SCRATCH_PAD_WORK_GROUP_SIZE,
-    SCRATCH_PAD_WORK_GROUP_SIZE,
-    ll,
-    0,
-    8,
-    cfconsts::InnerNeighbors,
-    posBcast,
-    buf);
-  updateClusterInner(
-    calib,
-    ll,
-    8,
-    buf,
-    pos,
-    myCluster,
-    labelAcc,
-    innerAboveThreshold);
-
-  uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2;
-
-  bool inGroup1 = ll < wgSizeHalf;
-
-  uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
-
-  CfUtils::condBlockLoad(
-    chargeMap,
-    wgSizeHalf,
-    SCRATCH_PAD_WORK_GROUP_SIZE,
-    ll,
-    0,
-    16,
-    cfconsts::OuterNeighbors,
-    posBcast,
-    innerAboveThreshold,
-    buf);
-
-  if (inGroup1) {
-    updateClusterOuter(
-      llhalf,
-      16,
-      16,
-      0,
-      buf,
-      pos,
-      myCluster,
-      labelAcc);
-  }
-
-#if defined(GPUCA_GPUCODE)
-  CfUtils::condBlockLoad(
-    chargeMap,
-    wgSizeHalf,
-    SCRATCH_PAD_WORK_GROUP_SIZE,
-    ll,
-    0,
-    16,
-    cfconsts::OuterNeighbors,
-    posBcast + wgSizeHalf,
-    innerAboveThreshold + wgSizeHalf,
-    buf);
-  if (!inGroup1) {
-    updateClusterOuter(
-      llhalf,
-      16,
-      16,
-      0,
-      buf,
-      pos,
-      myCluster,
-      labelAcc);
-  }
-#endif
-}
-
-GPUd() uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t* elemsInBucket, tpc::ClusterNative* buckets)
-{
-  uint32_t index = CAMath::AtomicAdd(&elemsInBucket[row], 1u);
-  if (index < maxElemsPerBucket) {
-    buckets[maxElemsPerBucket * row + index] = cluster;
-  } else {
-    clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket);
-    CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket);
-  }
-  return index;
-}
->>>>>>> dev
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc
@@ -59,7 +59,6 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t
     }
     return;
   }
-
   tpc::ClusterNative myCluster;
   bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap);
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx
@@ -38,7 +38,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run
 {
   uint glo_idx = get_global_id(0);
   auto& clusterer = processors.tpcClusterer[sector];
-  if (processors.tpcNNClusterer[sector].outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices
+  auto& clustererNN = processors.tpcNNClusterer[sector];
+  if (clustererNN.outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices
     return;
   }
   Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));

Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,6 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t`
`59`	`59`	`}`
`60`	`60`	`return;`
`61`	`61`	`}`
`62`		`-`
`63`	`62`	`tpc::ClusterNative myCluster;`
`64`	`63`	`bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap);`
`65`	`64`
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run`
`38`	`38`	`{`
`39`	`39`	`uint glo_idx = get_global_id(0);`
`40`	`40`	`auto& clusterer = processors.tpcClusterer[sector];`
`41`		`- if (processors.tpcNNClusterer[sector].outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices`
	`41`	`+ auto& clustererNN = processors.tpcNNClusterer[sector];`
	`42`	`+ if (clustererNN.outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices`
`42`	`43`	`return;`
`43`	`44`	`}`
`44`	`45`	`Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));`