Using const ref and moving CCDB calls to host initialization

ChSonnabend · ChSonnabend · commit 81c646be0d2e · 2025-03-24T11:04:57.000+01:00
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
@@ -40,7 +40,6 @@
 #endif
 
 #ifdef GPUCA_HAS_ONNX
-#include <CommonUtils/StringUtils.h>
 #include "GPUTPCNNClusterizerKernels.h"
 #include "GPUTPCNNClusterizerHost.h"
 #endif
@@ -613,51 +612,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
   }
 
 #ifdef GPUCA_HAS_ONNX
-  GPUSettingsProcessingNNclusterizer nn_settings = GetProcessingSettings().nn;
+  const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
   GPUTPCNNClusterizerHost nnApplication; // potentially this needs to be GPUTPCNNClusterizerHost nnApplication[NSECTORS]; Technically ONNX ->Run() is threadsafe at inference time since its read-only
   if (GetProcessingSettings().nn.applyNNclusterizer) {
-    if(nn_settings.nnLoadFromCCDB) {
-      std::map<std::string, std::string> ccdbSettings = {
-        {"nnCCDBURL", nn_settings.nnCCDBURL},
-        {"nnCCDBPath", nn_settings.nnCCDBPath},
-        {"inputDType", nn_settings.nnInferenceInputDType},
-        {"outputDType", nn_settings.nnInferenceOutputDType},
-        {"nnCCDBWithMomentum", std::to_string(nn_settings.nnCCDBWithMomentum)},
-        {"nnCCDBBeamType", nn_settings.nnCCDBBeamType},
-        {"nnCCDBInteractionRate", std::to_string(nn_settings.nnCCDBInteractionRate)}
-      };
-
-      std::string nnFetchFolder = "";
-      std::vector<std::string> fetchMode = o2::utils::Str::tokenize(nn_settings.nnCCDBFetchMode, ':');
-      std::map<std::string, std::string> networkRetrieval = ccdbSettings;
-
-      if (fetchMode[0] == "c1") {
-        networkRetrieval["nnCCDBLayerType"] = nn_settings.nnCCDBClassificationLayerType;
-        networkRetrieval["nnCCDBEvalType"] = "classification_c1";
-        networkRetrieval["outputFile"] = nnFetchFolder + "net_classification_c1.onnx";
-        nnApplication.loadFromCCDB(networkRetrieval);
-      } else if (fetchMode[0] == "c2") {
-        networkRetrieval["nnCCDBLayerType"] = nn_settings.nnCCDBClassificationLayerType;
-        networkRetrieval["nnCCDBEvalType"] = "classification_c2";
-        networkRetrieval["outputFile"] = nnFetchFolder + "net_classification_c2.onnx";
-        nnApplication.loadFromCCDB(networkRetrieval);
-      }
-      nn_settings.nnClassificationPath = networkRetrieval["outputFile"]; // Setting the proper path from the where the models will be initialized locally
-
-      networkRetrieval["nnCCDBLayerType"] = nn_settings.nnCCDBRegressionLayerType;
-      networkRetrieval["nnCCDBEvalType"] = "regression_c1";
-      networkRetrieval["outputFile"] = nnFetchFolder + "net_regression_c1.onnx";
-      nnApplication.loadFromCCDB(networkRetrieval);
-      nn_settings.nnRegressionPath = networkRetrieval["outputFile"];
-      if (fetchMode[1] == "r2") {
-        networkRetrieval["nnCCDBLayerType"] = nn_settings.nnCCDBRegressionLayerType;
-        networkRetrieval["nnCCDBEvalType"] = "regression_c2";
-        networkRetrieval["outputFile"] = nnFetchFolder + "net_regression_c2.onnx";
-        nnApplication.loadFromCCDB(networkRetrieval);
-        nn_settings.nnRegressionPath += ":", networkRetrieval["outputFile"];
-      }
-    }
-
     uint32_t maxClusters = 0;
     nnApplication.init(nn_settings);
     for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
@@ -988,7 +945,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
             if (!clustererNN.nnClusterizerUseCfRegression) {
               nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNN.outputDataReg1, clustererNN.nnClusterizerDtype);
               runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, withMC, batchStart); // Running the NN for regression class 1
-              if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.reg_model_paths.size() > 1) {
+              if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) {
                 nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNN.outputDataReg2, clustererNN.nnClusterizerDtype);
                 runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, withMC, batchStart); // Running the NN for regression class 2
               }
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx
@@ -22,7 +22,7 @@
 
 using namespace o2::gpu;
 
-GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(GPUSettingsProcessingNNclusterizer settings)
+GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings)
 {
   init(settings);
 }
@@ -53,10 +53,55 @@ void GPUTPCNNClusterizerHost::loadFromCCDB(std::map<std::string, std::string> se
   }
 }
 
-void GPUTPCNNClusterizerHost::init(GPUSettingsProcessingNNclusterizer settings)
+void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& settings)
 {
+  std::string class_model_path = settings.nnClassificationPath, reg_model_path = settings.nnRegressionPath;
+  std::vector<std::string> reg_model_paths;
+
+  if(settings.nnLoadFromCCDB) {
+    std::map<std::string, std::string> ccdbSettings = {
+      {"nnCCDBURL", settings.nnCCDBURL},
+      {"nnCCDBPath", settings.nnCCDBPath},
+      {"inputDType", settings.nnInferenceInputDType},
+      {"outputDType", settings.nnInferenceOutputDType},
+      {"nnCCDBWithMomentum", std::to_string(settings.nnCCDBWithMomentum)},
+      {"nnCCDBBeamType", settings.nnCCDBBeamType},
+      {"nnCCDBInteractionRate", std::to_string(settings.nnCCDBInteractionRate)}
+    };
+
+    std::string nnFetchFolder = "";
+    std::vector<std::string> fetchMode = o2::utils::Str::tokenize(settings.nnCCDBFetchMode, ':');
+    std::map<std::string, std::string> networkRetrieval = ccdbSettings;
+
+    if (fetchMode[0] == "c1") {
+      networkRetrieval["nnCCDBLayerType"] = settings.nnCCDBClassificationLayerType;
+      networkRetrieval["nnCCDBEvalType"] = "classification_c1";
+      networkRetrieval["outputFile"] = nnFetchFolder + "net_classification_c1.onnx";
+      loadFromCCDB(networkRetrieval);
+    } else if (fetchMode[0] == "c2") {
+      networkRetrieval["nnCCDBLayerType"] = settings.nnCCDBClassificationLayerType;
+      networkRetrieval["nnCCDBEvalType"] = "classification_c2";
+      networkRetrieval["outputFile"] = nnFetchFolder + "net_classification_c2.onnx";
+      loadFromCCDB(networkRetrieval);
+    }
+    class_model_path = networkRetrieval["outputFile"]; // Setting the proper path from the where the models will be initialized locally
+
+    networkRetrieval["nnCCDBLayerType"] = settings.nnCCDBRegressionLayerType;
+    networkRetrieval["nnCCDBEvalType"] = "regression_c1";
+    networkRetrieval["outputFile"] = nnFetchFolder + "net_regression_c1.onnx";
+    loadFromCCDB(networkRetrieval);
+    reg_model_path = networkRetrieval["outputFile"];
+    if (fetchMode[1] == "r2") {
+      networkRetrieval["nnCCDBLayerType"] = settings.nnCCDBRegressionLayerType;
+      networkRetrieval["nnCCDBEvalType"] = "regression_c2";
+      networkRetrieval["outputFile"] = nnFetchFolder + "net_regression_c2.onnx";
+      loadFromCCDB(networkRetrieval);
+      reg_model_path += ":", networkRetrieval["outputFile"];
+    }
+  }
+
   OrtOptions = {
-    {"model-path", settings.nnClassificationPath},
+    {"model-path", class_model_path},
     {"device", settings.nnInferenceDevice},
     {"device-id", std::to_string(settings.nnInferenceDeviceId)},
     {"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)},
@@ -70,7 +115,7 @@ void GPUTPCNNClusterizerHost::init(GPUSettingsProcessingNNclusterizer settings)
 
   model_class.init(OrtOptions);
 
-  reg_model_paths = o2::utils::Str::tokenize(settings.nnRegressionPath, ':');
+  reg_model_paths = o2::utils::Str::tokenize(reg_model_path, ':');
 
   if (!settings.nnClusterizerUseCfRegression) {
     if (model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) {
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h
@@ -37,9 +37,9 @@ class GPUTPCNNClusterizerHost
 {
  public:
   GPUTPCNNClusterizerHost() = default;
-  GPUTPCNNClusterizerHost(GPUSettingsProcessingNNclusterizer);
+  GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer&);
 
-  void init(GPUSettingsProcessingNNclusterizer);
+  void init(const GPUSettingsProcessingNNclusterizer&);
   void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&);
   void loadFromCCDB(std::map<std::string, std::string>);