Please consider the following formatting changes

alibuild · alibuild · commit db0c83650d21 · 2025-03-07T09:37:48.000Z
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
@@ -893,7 +893,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
           (clusterer.nnInternals)->nnClusterizerElementSize = ((2 * (clusterer.nnInternals)->nnClusterizerSizeInputRow + 1) * (2 * (clusterer.nnInternals)->nnClusterizerSizeInputPad + 1) * (2 * (clusterer.nnInternals)->nnClusterizerSizeInputTime + 1)) + ((clusterer.nnInternals)->nnClusterizerAddIndexData ? 3 : 0);
           (clusterer.nnInternals)->nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode;
           (clusterer.nnInternals)->nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue;
-          if (nn_settings.nnClusterizerVerbosity < 0){
+          if (nn_settings.nnClusterizerVerbosity < 0) {
             (clusterer.nnInternals)->nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity;
           } else {
             (clusterer.nnInternals)->nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity;
@@ -929,7 +929,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
               (clusterer.nnInternals)->model_reg_2.init((clusterer.nnInternals)->OrtOptions);
             }
           }
-          
+
           if ((clusterer.nnInternals)->nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
             runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}});
             DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
@@ -944,15 +944,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
           int evalDtype = (clusterer.nnInternals)->OrtOptions["dtype"].find("32") != std::string::npos;
           (clusterer.nnInternals)->outputDataClass.resize(clusterer.mPmemory->counters.nClusters, -1);
 
-          for(int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / (clusterer.nnInternals)->nnClusterizerBatchedMode); batch++) {
+          for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / (clusterer.nnInternals)->nnClusterizerBatchedMode); batch++) {
             uint batchStart = batch * (clusterer.nnInternals)->nnClusterizerBatchedMode;
             uint iSize = CAMath::Min((uint)(clusterer.nnInternals)->nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
 
             (clusterer.nnInternals)->clusterFlags.clear();
             (clusterer.nnInternals)->peakPositions.clear();
             (clusterer.nnInternals)->centralCharges.clear();
 
-            (clusterer.nnInternals)->clusterFlags.resize(iSize, {0,0});
+            (clusterer.nnInternals)->clusterFlags.resize(iSize, {0, 0});
             (clusterer.nnInternals)->peakPositions.resize(iSize);
             (clusterer.nnInternals)->centralCharges.resize(iSize);
 
@@ -968,7 +968,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
 
             auto start1 = std::chrono::high_resolution_clock::now();
             GPUTPCNNClusterizer::inferenceNetworkClass(clusterer, evalDtype);
-            if ((clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1] == 1){
+            if ((clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1] == 1) {
               runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass1Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, batchStart); // Assigning class labels
             } else {
               runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass2Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, batchStart); // Assigning class labels
@@ -989,7 +989,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
           }
 
           auto start1 = std::chrono::high_resolution_clock::now();
-          if((clusterer.nnInternals)->nnClusterizerUseCfRegression) {
+          if ((clusterer.nnInternals)->nnClusterizerUseCfRegression) {
             runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
           }
           auto stop1 = std::chrono::high_resolution_clock::now();
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx
@@ -61,8 +61,8 @@ GPUdii() void GPUTPCNNClusterizer::Thread<GPUTPCNNClusterizer::determineClass2La
   uint elem_iterator = glo_idx * (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1];
   float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
   uint class_label = 0;
-  for(float pIdx = elem_iterator; pIdx < elem_iterator + (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1]; pIdx++) {
-    if(pIdx == elem_iterator) {
+  for (float pIdx = elem_iterator; pIdx < elem_iterator + (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1]; pIdx++) {
+    if (pIdx == elem_iterator) {
       current_max_prob = (clusterer.nnInternals)->modelProbabilities[pIdx];
     } else {
       class_label = ((clusterer.nnInternals)->modelProbabilities[pIdx] > current_max_prob ? pIdx : class_label);
@@ -93,24 +93,27 @@ GPUdii() void GPUTPCNNClusterizer::Thread<GPUTPCNNClusterizer::publishClass2Regr
 }
 
 // Apply the neural network to the input data. Note: These are not GPU kernels. We let ONNX take care of that
-void GPUTPCNNClusterizer::inferenceNetworkClass(processorType& clusterer, int8_t dtype, uint batch_idx) {
-  if(dtype == 0){
+void GPUTPCNNClusterizer::inferenceNetworkClass(processorType& clusterer, int8_t dtype, uint batch_idx)
+{
+  if (dtype == 0) {
     (clusterer.nnInternals)->modelProbabilities = (clusterer.nnInternals)->model_class.inference<OrtDataType::Float16_t, float>((clusterer.nnInternals)->inputData16);
   } else {
     (clusterer.nnInternals)->modelProbabilities = (clusterer.nnInternals)->model_class.inference<float, float>((clusterer.nnInternals)->inputData32);
   }
 }
 
-void GPUTPCNNClusterizer::inferenceNetworkReg1(processorType& clusterer, int8_t dtype) {
-  if(dtype == 0){
+void GPUTPCNNClusterizer::inferenceNetworkReg1(processorType& clusterer, int8_t dtype)
+{
+  if (dtype == 0) {
     (clusterer.nnInternals)->outputDataReg1 = (clusterer.nnInternals)->model_reg_1.inference<OrtDataType::Float16_t, float>((clusterer.nnInternals)->inputData16);
   } else {
     (clusterer.nnInternals)->outputDataReg1 = (clusterer.nnInternals)->model_reg_1.inference<float, float>((clusterer.nnInternals)->inputData32);
   }
 }
 
-void GPUTPCNNClusterizer::inferenceNetworkReg2(processorType& clusterer, int8_t dtype) {
-  if(dtype == 0){
+void GPUTPCNNClusterizer::inferenceNetworkReg2(processorType& clusterer, int8_t dtype)
+{
+  if (dtype == 0) {
     (clusterer.nnInternals)->outputDataReg2 = (clusterer.nnInternals)->model_reg_2.inference<OrtDataType::Float16_t, float>((clusterer.nnInternals)->inputData16);
   } else {
     (clusterer.nnInternals)->outputDataReg2 = (clusterer.nnInternals)->model_reg_2.inference<float, float>((clusterer.nnInternals)->inputData32);
@@ -171,18 +174,18 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
       for (int t = -(clusterer.nnInternals)->nnClusterizerSizeInputTime; t <= (clusterer.nnInternals)->nnClusterizerSizeInputTime; t++) {
         if (!is_boundary) {
           ChargePos tmp_pos(row + r, pad + p, time + t);
-          if (r == 0 && !(clusterer.nnInternals)->clusterFlags[glo_idx][0] && std::abs(p) < 3 && std::abs(t) < 3 && p!=0 && t!=0) { // ordering is done for short circuit optimization
+          if (r == 0 && !(clusterer.nnInternals)->clusterFlags[glo_idx][0] && std::abs(p) < 3 && std::abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
             (clusterer.nnInternals)->clusterFlags[glo_idx][0] = CfUtils::isPeak(isPeakMap[tmp_pos]);
             (clusterer.nnInternals)->clusterFlags[glo_idx][1] = (clusterer.nnInternals)->clusterFlags[glo_idx][0];
           }
-          if(dtype == 0){
+          if (dtype == 0) {
             (clusterer.nnInternals)->inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
           } else {
             (clusterer.nnInternals)->inputData32[write_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
           }
         } else {
           // Filling boundary just to make sure that no values are left unintentionally
-          if(dtype == 0){
+          if (dtype == 0) {
             (clusterer.nnInternals)->inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>((clusterer.nnInternals)->nnClusterizerBoundaryFillValue));
           } else {
             (clusterer.nnInternals)->inputData32[write_idx] = static_cast<float>((clusterer.nnInternals)->nnClusterizerBoundaryFillValue);
@@ -193,7 +196,7 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
     }
   }
   if ((clusterer.nnInternals)->nnClusterizerAddIndexData) {
-    if(dtype == 0){
+    if (dtype == 0) {
       (clusterer.nnInternals)->inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISlice / 36.f);
       (clusterer.nnInternals)->inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f);
       (clusterer.nnInternals)->inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast<float>(pad) / clusterer.Param().tpcGeometry.NPads(row));
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h
@@ -74,12 +74,11 @@ class GPUTPCNNClusterizer : public GPUKernelTemplate
   static void inferenceNetworkClass(processorType&, int8_t = 0, uint = 0);
   static void inferenceNetworkReg1(processorType&, int8_t = 0);
   static void inferenceNetworkReg2(processorType&, int8_t = 0);
-  
-  private:
 
-    static int padOffset(int, int, const GPUTPCGeometry&);
-    static int rowOffset(int, int);
-    static bool isBoundary(int, int, int, const GPUTPCGeometry&);
+ private:
+  static int padOffset(int, int, const GPUTPCGeometry&);
+  static int rowOffset(int, int);
+  static bool isBoundary(int, int, int, const GPUTPCGeometry&);
 };
 
 } // namespace o2::gpu
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerInternals.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerInternals.h
@@ -22,34 +22,35 @@
 namespace o2::gpu
 {
 
-class GPUTPCNNClusterizerInternals {
-    public:
-        int nnClusterizerSizeInputRow = 3;
-        int nnClusterizerSizeInputPad = 3;
-        int nnClusterizerSizeInputTime = 3;
-        int nnClusterizerElementSize = -1;
-        bool nnClusterizerAddIndexData = true;
-        float nnClassThreshold = 0.16;
-        bool nnSigmoidTrafoClassThreshold = 1;
-        int nnClusterizerUseCfRegression = 0;
-        int nnClusterizerBatchedMode = 1;
-        int nnClusterizerVerbosity = 0;
-        int nnClusterizerBoundaryFillValue = -1;
-        int nnClusterizerDumpDigits = 0;
-        int nnClusterizerApplyCfDeconvolution = 0;
-    
-        // Memory allocation for neural network
-        uint class2_elements = 0;
-        std::vector<float> inputData32;
-        std::vector<OrtDataType::Float16_t> inputData16;
-        std::vector<float> outputDataClass, modelProbabilities, outputDataReg1, outputDataReg2;
-    
-        std::vector<ChargePos> peakPositions;
-        std::vector<std::vector<bool>> clusterFlags; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cxx
-        std::vector<float> centralCharges;
-    
-        std::unordered_map<std::string, std::string> OrtOptions;
-        o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
+class GPUTPCNNClusterizerInternals
+{
+ public:
+  int nnClusterizerSizeInputRow = 3;
+  int nnClusterizerSizeInputPad = 3;
+  int nnClusterizerSizeInputTime = 3;
+  int nnClusterizerElementSize = -1;
+  bool nnClusterizerAddIndexData = true;
+  float nnClassThreshold = 0.16;
+  bool nnSigmoidTrafoClassThreshold = 1;
+  int nnClusterizerUseCfRegression = 0;
+  int nnClusterizerBatchedMode = 1;
+  int nnClusterizerVerbosity = 0;
+  int nnClusterizerBoundaryFillValue = -1;
+  int nnClusterizerDumpDigits = 0;
+  int nnClusterizerApplyCfDeconvolution = 0;
+
+  // Memory allocation for neural network
+  uint class2_elements = 0;
+  std::vector<float> inputData32;
+  std::vector<OrtDataType::Float16_t> inputData16;
+  std::vector<float> outputDataClass, modelProbabilities, outputDataReg1, outputDataReg2;
+
+  std::vector<ChargePos> peakPositions;
+  std::vector<std::vector<bool>> clusterFlags; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cxx
+  std::vector<float> centralCharges;
+
+  std::unordered_map<std::string, std::string> OrtOptions;
+  o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
 }; // class GPUTPCNNClusterizerInternals
 
 } // namespace o2::gpu