Please consider the following formatting changes

alibuild · alibuild · commit 1ca9fa0155a1 · 2025-03-05T13:38:21.000Z
diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx
@@ -44,7 +44,7 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
   if (!optionsMap.contains("model-path")) {
     LOG(fatal) << "(ORT) Model path cannot be empty!";
   }
-  
+
   if (!optionsMap["model-path"].empty()) {
     modelPath = optionsMap["model-path"];
     device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
@@ -83,85 +83,84 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
 #endif
 #endif
 
-    if (allocateDeviceMemory) {
-      pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
-      LOG(info) << "(ORT) Memory info set to on-device memory";
-    }
+  if (allocateDeviceMemory) {
+    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
+    LOG(info) << "(ORT) Memory info set to on-device memory";
+  }
 
-    if (device == "CPU") {
-      (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
-      if (intraOpNumThreads > 1) {
-        (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
-      } else if (intraOpNumThreads == 1) {
-        (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
-      }
-      if (loggingLevel < 2) {
-        LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
-      }
+  if (device == "CPU") {
+    (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
+    if (intraOpNumThreads > 1) {
+      (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
+    } else if (intraOpNumThreads == 1) {
+      (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
+    }
+    if (loggingLevel < 2) {
+      LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
     }
+  }
 
-    (pImplOrt->sessionOptions).DisableMemPattern();
-    (pImplOrt->sessionOptions).DisableCpuMemArena();
+  (pImplOrt->sessionOptions).DisableMemPattern();
+  (pImplOrt->sessionOptions).DisableCpuMemArena();
 
-    if (enableProfiling) {
-      if (optionsMap.contains("profiling-output-path")) {
-        (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str());
-      } else {
-        LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now.";
-        (pImplOrt->sessionOptions).DisableProfiling();
-      }
+  if (enableProfiling) {
+    if (optionsMap.contains("profiling-output-path")) {
+      (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str());
     } else {
+      LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now.";
       (pImplOrt->sessionOptions).DisableProfiling();
     }
+  } else {
+    (pImplOrt->sessionOptions).DisableProfiling();
+  }
 
-    mInitialized = true;
-
-    (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations));
-    (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel));
-
-    pImplOrt->env = std::make_shared<Ort::Env>(
-      OrtLoggingLevel(loggingLevel),
-      (optionsMap["onnx-environment-name"].empty() ? "onnx_model_inference" : optionsMap["onnx-environment-name"].c_str()),
-      // Integrate ORT logging into Fairlogger
-      [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) {
-        if (severity == ORT_LOGGING_LEVEL_VERBOSE) {
-          LOG(debug) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-        } else if (severity == ORT_LOGGING_LEVEL_INFO) {
-          LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-        } else if (severity == ORT_LOGGING_LEVEL_WARNING) {
-          LOG(warning) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-        } else if (severity == ORT_LOGGING_LEVEL_ERROR) {
-          LOG(error) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-        } else if (severity == ORT_LOGGING_LEVEL_FATAL) {
-          LOG(fatal) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-        } else {
-          LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-        }
-      },
-      (void*)3);
-    (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
-    pImplOrt->session = std::make_shared<Ort::Session>(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions);
-
-    for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
-      mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
-    }
-    for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
-      mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
-    }
-    for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
-      mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
-    }
-    for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
-      mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
-    }
+  mInitialized = true;
+
+  (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations));
+  (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel));
+
+  pImplOrt->env = std::make_shared<Ort::Env>(
+    OrtLoggingLevel(loggingLevel),
+    (optionsMap["onnx-environment-name"].empty() ? "onnx_model_inference" : optionsMap["onnx-environment-name"].c_str()),
+    // Integrate ORT logging into Fairlogger
+    [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) {
+      if (severity == ORT_LOGGING_LEVEL_VERBOSE) {
+        LOG(debug) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+      } else if (severity == ORT_LOGGING_LEVEL_INFO) {
+        LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+      } else if (severity == ORT_LOGGING_LEVEL_WARNING) {
+        LOG(warning) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+      } else if (severity == ORT_LOGGING_LEVEL_ERROR) {
+        LOG(error) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+      } else if (severity == ORT_LOGGING_LEVEL_FATAL) {
+        LOG(fatal) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+      } else {
+        LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+      }
+    },
+    (void*)3);
+  (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
+  pImplOrt->session = std::make_shared<Ort::Session>(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions);
 
-    inputNamesChar.resize(mInputNames.size(), nullptr);
-    std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
-                  [&](const std::string& str) { return str.c_str(); });
-    outputNamesChar.resize(mOutputNames.size(), nullptr);
-    std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
-                  [&](const std::string& str) { return str.c_str(); });
+  for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
+    mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
+  }
+  for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
+    mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
+  }
+  for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
+    mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
+  }
+  for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
+    mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
+  }
 
+  inputNamesChar.resize(mInputNames.size(), nullptr);
+  std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
+                 [&](const std::string& str) { return str.c_str(); });
+  outputNamesChar.resize(mOutputNames.size(), nullptr);
+  std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
+                 [&](const std::string& str) { return str.c_str(); });
   }
 }
 
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
@@ -897,7 +897,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
           clusterer.nnClusterizerElementSize = ((2 * clusterer.nnClusterizerSizeInputRow + 1) * (2 * clusterer.nnClusterizerSizeInputPad + 1) * (2 * clusterer.nnClusterizerSizeInputTime + 1)) + (clusterer.nnClusterizerAddIndexData ? 3 : 0);
           clusterer.nnClusterizerBatchedMode = GetProcessingSettings().nnClusterizerBatchedMode;
           clusterer.nnClusterizerBoundaryFillValue = GetProcessingSettings().nnClusterizerBoundaryFillValue;
-          if (GetProcessingSettings().nnClusterizerVerbosity < 0){
+          if (GetProcessingSettings().nnClusterizerVerbosity < 0) {
             clusterer.nnClusterizerVerbosity = GetProcessingSettings().nnInferenceVerbosity;
           } else {
             clusterer.nnClusterizerVerbosity = GetProcessingSettings().nnClusterizerVerbosity;
@@ -933,7 +933,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
               clusterer.model_reg_2.init(clusterer.OrtOptions);
             }
           }
-          
+
           if (clusterer.nnClusterizerUseCFregression || (int)(GetProcessingSettings().nnClusterizerApplyCfDeconvolution)) {
             runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}});
             DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
@@ -943,12 +943,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
             // Inverse sigmoid transformation
             clusterer.nnClassThreshold = (float)std::log(clusterer.nnClassThreshold / (1.f - clusterer.nnClassThreshold));
           }
-          
+
           float time_clusterizer = 0, time_fill = 0;
           int evalDtype = clusterer.OrtOptions["dtype"].find("32") != std::string::npos;
           clusterer.outputDataClass.resize(clusterer.mPmemory->counters.nClusters, -1);
 
-          for(int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clusterer.nnClusterizerBatchedMode); batch++) {
+          for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clusterer.nnClusterizerBatchedMode); batch++) {
             uint batchStart = batch * clusterer.nnClusterizerBatchedMode;
             uint iSize = CAMath::Min((uint)clusterer.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
 
@@ -967,7 +967,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
 
             auto start1 = std::chrono::high_resolution_clock::now();
             GPUTPCNNClusterizer::applyNetworkClass(clusterer, evalDtype);
-            if (clusterer.model_class.getNumOutputNodes()[0][1] == 1){
+            if (clusterer.model_class.getNumOutputNodes()[0][1] == 1) {
               runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass1Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, batchStart); // Assigning class labels
             } else {
               runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass2Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, batchStart); // Assigning class labels
@@ -985,11 +985,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
 
             time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
             time_fill += std::chrono::duration_cast<std::chrono::nanoseconds>(stop0 - start0).count() / 1e9;
-
           }
 
           auto start1 = std::chrono::high_resolution_clock::now();
-          if(clusterer.nnClusterizerUseCFregression) {
+          if (clusterer.nnClusterizerUseCFregression) {
             runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
           }
           auto stop1 = std::chrono::high_resolution_clock::now();
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx
@@ -83,25 +83,27 @@ GPUdii() void GPUTPCNNClusterizer::Thread<GPUTPCNNClusterizer::publishClass2Regr
   GPUTPCNNClusterizer::publishClustersReg2(glo_idx, smem, clusterer, dtype, onlyMC, batchStart);
 }
 
-
-void GPUTPCNNClusterizer::applyNetworkClass(processorType& clusterer, int8_t dtype, uint batch_idx) {
-  if(dtype == 0){
+void GPUTPCNNClusterizer::applyNetworkClass(processorType& clusterer, int8_t dtype, uint batch_idx)
+{
+  if (dtype == 0) {
     clusterer.modelProbabilities = clusterer.model_class.inference<OrtDataType::Float16_t, float>(clusterer.inputData16);
   } else {
     clusterer.modelProbabilities = clusterer.model_class.inference<float, float>(clusterer.inputData32);
   }
 }
 
-void GPUTPCNNClusterizer::applyNetworkReg1(processorType& clusterer, int8_t dtype) {
-  if(dtype == 0){
+void GPUTPCNNClusterizer::applyNetworkReg1(processorType& clusterer, int8_t dtype)
+{
+  if (dtype == 0) {
     clusterer.outputDataReg1 = clusterer.model_reg_1.inference<OrtDataType::Float16_t, float>(clusterer.inputData16);
   } else {
     clusterer.outputDataReg1 = clusterer.model_reg_1.inference<float, float>(clusterer.inputData32);
   }
 }
 
-void GPUTPCNNClusterizer::applyNetworkReg2(processorType& clusterer, int8_t dtype) {
-  if(dtype == 0){
+void GPUTPCNNClusterizer::applyNetworkReg2(processorType& clusterer, int8_t dtype)
+{
+  if (dtype == 0) {
     clusterer.outputDataReg2 = clusterer.model_reg_2.inference<OrtDataType::Float16_t, float>(clusterer.inputData16);
   } else {
     clusterer.outputDataReg2 = clusterer.model_reg_2.inference<float, float>(clusterer.inputData32);
@@ -161,14 +163,14 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
       for (int t = -clusterer.nnClusterizerSizeInputTime; t <= clusterer.nnClusterizerSizeInputTime; t++) {
         if (!is_boundary) {
           ChargePos tmp_pos(row + r, pad + p, time + t);
-          if(dtype == 0){
+          if (dtype == 0) {
             clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
           } else {
             clusterer.inputData32[write_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
           }
         } else {
           // Filling boundary just to make sure that no values are left unintentionally
-          if(dtype == 0){
+          if (dtype == 0) {
             clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(clusterer.nnClusterizerBoundaryFillValue));
           } else {
             clusterer.inputData32[write_idx] = static_cast<float>(clusterer.nnClusterizerBoundaryFillValue);
@@ -179,7 +181,7 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
     }
   }
   if (clusterer.nnClusterizerAddIndexData) {
-    if(dtype == 0){
+    if (dtype == 0) {
       clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISlice / 36.f);
       clusterer.inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f);
       clusterer.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast<float>(pad) / clusterer.Param().tpcGeometry.NPads(row));
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h
@@ -75,14 +75,12 @@ class GPUTPCNNClusterizer : public GPUKernelTemplate
   static void applyNetworkReg1(processorType&, int8_t = 0);
   static void applyNetworkReg2(processorType&, int8_t = 0);
 
-  
-  private:
-
-    static int padOffset(int, int, const GPUTPCGeometry&);
-    static int rowOffset(int, int);
-    static bool isBoundary(int, int, int, const GPUTPCGeometry&);
+ private:
+  static int padOffset(int, int, const GPUTPCGeometry&);
+  static int rowOffset(int, int);
+  static bool isBoundary(int, int, int, const GPUTPCGeometry&);
 };
 
-} // namespace GPUCA_NAMESPACE::gpu
+} // namespace o2::gpu
 
 #endif