Merge pull request #14 from alibuild/alibot-cleanup-13981

ChSonnabend · web-flow · commit 24bf10426eb2 · 2025-03-12T00:10:57.000+01:00
Please consider the following formatting changes to #13981
diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx
@@ -163,7 +163,7 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
                  [&](const std::string& str) { return str.c_str(); });
   }
   if (loggingLevel < 2) {
-    LOG(info) << "(ORT) Model loaded successfully! (input: " <<  printShape(mInputShapes[0]) << ", output: " << printShape(mOutputShapes[0]) << ")";
+    LOG(info) << "(ORT) Model loaded successfully! (input: " << printShape(mInputShapes[0]) << ", output: " << printShape(mOutputShapes[0]) << ")";
   }
 }
 
@@ -197,9 +197,9 @@ std::string OrtModel::printShape(const std::vector<int64_t>& v)
   return ss.str();
 }
 
-
-template <class I, class O> 
-std::vector<O> OrtModel::inference(std::vector<I>& input) {
+template <class I, class O>
+std::vector<O> OrtModel::inference(std::vector<I>& input)
+{
   std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
   std::vector<Ort::Value> inputTensor;
   if constexpr (std::is_same_v<I, OrtDataType::Float16_t>) {
@@ -221,7 +221,6 @@ template std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(s
 
 template std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>&);
 
-
 template <class I, class O>
 void OrtModel::inference(I* input, size_t input_size, O* output)
 {
@@ -232,22 +231,19 @@ void OrtModel::inference(I* input, size_t input_size, O* output)
   } else {
     inputTensor = Ort::Value::CreateTensor<I>(pImplOrt->memoryInfo, input, input_size, inputShape.data(), inputShape.size());
   }
-  
+
   std::vector<int64_t> outputShape{inputShape[0], mOutputShapes[0][1]};
   size_t outputSize = (int64_t)(inputShape[0] * mOutputShapes[0][1]);
   Ort::Value outputTensor = Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, output, outputSize, outputShape.data(), outputShape.size());
-  
-  (pImplOrt->session)->Run(pImplOrt->runOptions, 
-                           inputNamesChar.data(), &inputTensor, 1,
-                           outputNamesChar.data(), &outputTensor, 1);
+
+  (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, 1);
 }
 
 template void OrtModel::inference<OrtDataType::Float16_t, float>(OrtDataType::Float16_t*, size_t, float*);
 
 template void OrtModel::inference<float, float>(float*, size_t, float*);
 
-
-template <class I, class O> 
+template <class I, class O>
 std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input)
 {
   std::vector<Ort::Value> inputTensor;
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx
@@ -18,28 +18,29 @@
 
 using namespace o2::gpu;
 
-void GPUTPCNNClusterizer::InitializeProcessor(){}
+void GPUTPCNNClusterizer::InitializeProcessor() {}
 
-void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io){}
+void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {}
 
-void* GPUTPCNNClusterizer::setIOPointers(void* mem) {
-  if (nnClusterizerDtype == 0 && nnClusterizerElementSize > 0){
+void* GPUTPCNNClusterizer::setIOPointers(void* mem)
+{
+  if (nnClusterizerDtype == 0 && nnClusterizerElementSize > 0) {
     computePointerWithAlignment(mem, inputData16, nnClusterizerBatchedMode * nnClusterizerElementSize);
-  } else if (nnClusterizerDtype == 1 && nnClusterizerElementSize > 0){
+  } else if (nnClusterizerDtype == 1 && nnClusterizerElementSize > 0) {
     computePointerWithAlignment(mem, inputData32, nnClusterizerBatchedMode * nnClusterizerElementSize);
   }
   computePointerWithAlignment(mem, peakPositions, nnClusterizerBatchedMode);
-  computePointerWithAlignment(mem, clusterFlags, 2*nnClusterizerBatchedMode);
+  computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode);
   computePointerWithAlignment(mem, centralCharges, nnClusterizerBatchedMode);
   computePointerWithAlignment(mem, outputDataClass, nnClusterizerBatchedMode);
-  if(nnClusterizerModelClassNumOutputNodes > 0) {
+  if (nnClusterizerModelClassNumOutputNodes > 0) {
     computePointerWithAlignment(mem, modelProbabilities, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes);
   }
   if (!nnClusterizerUseCfRegression) {
-    if(nnClusterizerModelReg1NumOutputNodes > 0) {
+    if (nnClusterizerModelReg1NumOutputNodes > 0) {
       computePointerWithAlignment(mem, outputDataReg1, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes);
     }
-    if(nnClusterizerModelReg2NumOutputNodes > 0) {
+    if (nnClusterizerModelReg2NumOutputNodes > 0) {
       computePointerWithAlignment(mem, outputDataReg2, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes);
     }
   }
@@ -49,7 +50,8 @@ void* GPUTPCNNClusterizer::setIOPointers(void* mem) {
   return mem;
 }
 
-void GPUTPCNNClusterizer::RegisterMemoryAllocation() {
+void GPUTPCNNClusterizer::RegisterMemoryAllocation()
+{
   AllocateAndInitializeLate();
   int32_t memType = GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK;
   mMemoryId = mRec->RegisterMemoryAllocation(this, &GPUTPCNNClusterizer::setIOPointers, memType, "TPCNNClusterer", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::NNClusterer, (uint16_t)(mISector % mRec->GetProcessingSettings().nTPCClustererLanes)});
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h
@@ -20,7 +20,7 @@
 
 namespace o2::OrtDataType
 {
-  struct Float16_t;
+struct Float16_t;
 }
 
 namespace o2::gpu
@@ -58,16 +58,16 @@ class GPUTPCNNClusterizer : public GPUProcessor
 
   // Memory allocation for neural network
   uint class2_elements = 0;
-  float* inputData32=nullptr;
-  OrtDataType::Float16_t* inputData16=nullptr;
-  float* outputDataClass=nullptr;
-  float* modelProbabilities=nullptr;
-  float* outputDataReg1=nullptr;
-  float* outputDataReg2=nullptr;
+  float* inputData32 = nullptr;
+  OrtDataType::Float16_t* inputData16 = nullptr;
+  float* outputDataClass = nullptr;
+  float* modelProbabilities = nullptr;
+  float* outputDataReg1 = nullptr;
+  float* outputDataReg2 = nullptr;
 
-  ChargePos* peakPositions=nullptr;
-  bool* clusterFlags=nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptrx
-  float* centralCharges=nullptr;
+  ChargePos* peakPositions = nullptr;
+  bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptrx
+  float* centralCharges = nullptr;
   int16_t mMemoryId = -1;
 }; // class GPUTPCNNClusterizer
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx
@@ -19,7 +19,8 @@
 
 using namespace o2::gpu;
 
-GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clusterer) {
+GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clusterer)
+{
   OrtOptions = {
     {"model-path", settings.nnClassificationPath},
     {"device", settings.nnInferenceDevice},
@@ -30,8 +31,7 @@ GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNcl
     {"enable-optimizations", std::to_string(settings.nnInferenceEnableOrtOptimization)},
     {"enable-profiling", std::to_string(settings.nnInferenceOrtProfiling)},
     {"profiling-output-path", settings.nnInferenceOrtProfilingPath},
-    {"logging-level", std::to_string(settings.nnInferenceVerbosity)}
-  };
+    {"logging-level", std::to_string(settings.nnInferenceVerbosity)}};
 
   model_class.init(OrtOptions);
   clusterer.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1];
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h
@@ -24,7 +24,7 @@ using namespace o2::ml;
 
 namespace o2::OrtDataType
 {
-  struct Float16_t;
+struct Float16_t;
 }
 
 namespace o2::gpu
@@ -45,17 +45,18 @@ class GPUTPCNNClusterizerHost
   std::unordered_map<std::string, std::string> OrtOptions;
   o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
   std::vector<std::string> reg_model_paths;
- private:
 
+ private:
   // Avoid including CommonUtils/StringUtils.h
-  std::vector<std::string> splitString(const std::string& input, const std::string& delimiter) {
+  std::vector<std::string> splitString(const std::string& input, const std::string& delimiter)
+  {
     std::vector<std::string> tokens;
     std::size_t pos = 0;
     std::size_t found;
 
     while ((found = input.find(delimiter, pos)) != std::string::npos) {
-        tokens.push_back(input.substr(pos, found - pos));
-        pos = found + delimiter.length();
+      tokens.push_back(input.substr(pos, found - pos));
+      pos = found + delimiter.length();
     }
     tokens.push_back(input.substr(pos));
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx
@@ -148,9 +148,9 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n
       for (int t = -clustererNN.nnClusterizerSizeInputTime; t <= clustererNN.nnClusterizerSizeInputTime; t++) {
         if (!is_boundary) {
           ChargePos tmp_pos(row + r, pad + p, time + t);
-          if (r == 0 && !clustererNN.clusterFlags[2*glo_idx] && std::abs(p) < 3 && std::abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
-            clustererNN.clusterFlags[2*glo_idx] = CfUtils::isPeak(isPeakMap[tmp_pos]);
-            clustererNN.clusterFlags[2*glo_idx + 1] = clustererNN.clusterFlags[2*glo_idx];
+          if (r == 0 && !clustererNN.clusterFlags[2 * glo_idx] && std::abs(p) < 3 && std::abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
+            clustererNN.clusterFlags[2 * glo_idx] = CfUtils::isPeak(isPeakMap[tmp_pos]);
+            clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx];
           }
           if (dtype == 0) {
             clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
@@ -222,12 +222,12 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha
     }
 
     pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg1[model_output_index + 4],
-      static_cast<float>(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg1[model_output_index],
-      clustererNN.outputDataReg1[model_output_index + 2],
-      static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg1[model_output_index + 1],
-      clustererNN.outputDataReg1[model_output_index + 3],
-      clustererNN.clusterFlags[2*glo_idx],
-      clustererNN.clusterFlags[2*glo_idx + 1]);
+               static_cast<float>(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg1[model_output_index],
+               clustererNN.outputDataReg1[model_output_index + 2],
+               static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg1[model_output_index + 1],
+               clustererNN.outputDataReg1[model_output_index + 3],
+               clustererNN.clusterFlags[2 * glo_idx],
+               clustererNN.clusterFlags[2 * glo_idx + 1]);
 
     tpc::ClusterNative myCluster;
     bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param());
@@ -302,12 +302,12 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha
 
     // Cluster 1
     pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 8],
-      static_cast<float>(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index],
-      clustererNN.outputDataReg2[model_output_index + 4],
-      static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 2],
-      clustererNN.outputDataReg2[model_output_index + 6],
-      clustererNN.clusterFlags[2*glo_idx],
-      clustererNN.clusterFlags[2*glo_idx + 1]);
+               static_cast<float>(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index],
+               clustererNN.outputDataReg2[model_output_index + 4],
+               static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 2],
+               clustererNN.outputDataReg2[model_output_index + 6],
+               clustererNN.clusterFlags[2 * glo_idx],
+               clustererNN.clusterFlags[2 * glo_idx + 1]);
 
     tpc::ClusterNative myCluster;
     bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param());
@@ -337,12 +337,12 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha
 
     // Cluster 2
     pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 9],
-      static_cast<float>(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index + 1],
-      clustererNN.outputDataReg2[model_output_index + 5],
-      static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 3],
-      clustererNN.outputDataReg2[model_output_index + 7],
-      clustererNN.clusterFlags[2*glo_idx],
-      clustererNN.clusterFlags[2*glo_idx + 1]);
+               static_cast<float>(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index + 1],
+               clustererNN.outputDataReg2[model_output_index + 5],
+               static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 3],
+               clustererNN.outputDataReg2[model_output_index + 7],
+               clustererNN.clusterFlags[2 * glo_idx],
+               clustererNN.clusterFlags[2 * glo_idx + 1]);
 
     rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param());
     if (rejectCluster) {