AliceO2Group
diff --git a/‎Common/ML/include/ML/OrtInterface.h‎
Lines changed: 4 additions & 4 deletions b/‎Common/ML/include/ML/OrtInterface.h‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎Common/ML/src/OrtInterface.cxx‎
Lines changed: 68 additions & 35 deletions b/‎Common/ML/src/OrtInterface.cxx‎
Lines changed: 68 additions & 35 deletions
diff --git a/‎GPU/GPUTracking/Definitions/GPUSettingsList.h‎
Lines changed: 12 additions & 1 deletion b/‎GPU/GPUTracking/Definitions/GPUSettingsList.h‎
Lines changed: 12 additions & 1 deletion
@@ -58,13 +58,13 @@ class OrtModel
 
   // Inferencing
   template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h
-  std::vector<O> inference(std::vector<I>&);
+  std::vector<O> inference(std::vector<I>&, int32_t = -1);
 
   template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
-  std::vector<O> inference(std::vector<std::vector<I>>&);
+  std::vector<O> inference(std::vector<std::vector<I>>&, int32_t = -1);
 
   template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h
-  void inference(I*, size_t, O*);
+  void inference(I*, size_t, O*, int32_t = -1);
 
   // template<class I, class T, class O> // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type
   // std::vector<O> inference(std::vector<I>&);
@@ -92,7 +92,7 @@ class OrtModel
   // Environment settings
   bool mInitialized = false;
   std::string modelPath, device = "cpu", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda
-  int intraOpNumThreads = 1, interOpNumThreads = 1, streamId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
+  int intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
 
   std::string printShape(const std::vector<int64_t>&);
 };
 
@@ -58,7 +58,7 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
   if (!optionsMap["model-path"].empty()) {
     modelPath = optionsMap["model-path"];
     device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
-    streamId = (optionsMap.contains("stream-id") ? std::stoi(optionsMap["stream-id"]) : 0);
+    deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
     allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
     intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
     interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0);
@@ -68,40 +68,26 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
 
 // #if defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1
 //   if (device == "ROCM") {
-//     // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, streamId));
-//     SetONNXGPUStream(pImplOrt->sessionOptions, streamId);
+//     // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId));
+//     SetONNXGPUStream(pImplOrt->sessionOptions, deviceId);
 //     LOG(info) << "(ORT) ROCM execution provider set";
 //   }
 // #endif
 // #if defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1
 //   if (device == "MIGRAPHX") {
-//     Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, streamId));
+//     Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId));
 //     LOG(info) << "(ORT) MIGraphX execution provider set";
 //   }
 // #endif
 // #if defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1
 //   if (device == "CUDA") {
-//     // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, streamId));
-//     SetONNXGPUStream(pImplOrt->sessionOptions, streamId);
+//     // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId));
+//     SetONNXGPUStream(pImplOrt->sessionOptions, deviceId);
 //     LOG(info) << "(ORT) CUDA execution provider set";
 //     dev_mem_str = "Cuda";
 //   }
 // #endif
 
-#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
-  if (allocateDeviceMemory) {
-    std::string dev_mem_str = "";
-    if (device == "ROCM") {
-      dev_mem_str = "Hip";
-    }
-    if (device == "CUDA") {
-      dev_mem_str = "Cuda";
-    }
-    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, streamId, OrtMemType::OrtMemTypeDefault);
-    LOG(info) << "(ORT) Memory info set to on-device memory";
-  }
-#endif
-
   if (device == "CPU") {
     (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
     (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads);
@@ -213,8 +199,24 @@ std::string OrtModel::printShape(const std::vector<int64_t>& v)
 }
 
 template <class I, class O>
-std::vector<O> OrtModel::inference(std::vector<I>& input)
+std::vector<O> OrtModel::inference(std::vector<I>& input, int32_t deviceIndex)
 {
+#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
+  if (allocateDeviceMemory) {
+    if (deviceIndex >= 0) {
+      deviceId = deviceIndex;
+    }
+    std::string dev_mem_str = "";
+    if (device == "ROCM") {
+      dev_mem_str = "Hip";
+    }
+    if (device == "CUDA") {
+      dev_mem_str = "Cuda";
+    }
+    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
+    LOG(info) << "(ORT) Memory info set to on-device memory";
+  }
+#endif
   std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
   std::vector<Ort::Value> inputTensor;
   if constexpr (std::is_same_v<I, OrtDataType::Float16_t>) {
@@ -230,37 +232,68 @@ std::vector<O> OrtModel::inference(std::vector<I>& input)
   return outputValuesVec;
 }
 
-template std::vector<float> OrtModel::inference<float, float>(std::vector<float>&);
+template std::vector<float> OrtModel::inference<float, float>(std::vector<float>&, int32_t);
 
-template std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(std::vector<OrtDataType::Float16_t>&);
+template std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(std::vector<OrtDataType::Float16_t>&, int32_t);
 
-template std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>&);
+template std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>&, int32_t);
 
 template <class I, class O>
-void OrtModel::inference(I* input, size_t input_size, O* output)
+void OrtModel::inference(I* input, size_t input_size, O* output, int32_t deviceIndex)
 {
-  std::vector<int64_t> inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
+#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
+  if (allocateDeviceMemory) {
+    if (deviceIndex >= 0) {
+      deviceId = deviceIndex;
+    }
+    std::string dev_mem_str = "";
+    if (device == "ROCM") {
+      dev_mem_str = "Hip";
+    }
+    if (device == "CUDA") {
+      dev_mem_str = "Cuda";
+    }
+    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
+    LOG(info) << "(ORT) Memory info set to on-device memory";
+  }
+#endif
+  std::vector<int64_t> inputShape{input_size, (int64_t)mInputShapes[0][1]};
   Ort::Value inputTensor = Ort::Value(nullptr);
   if constexpr (std::is_same_v<I, OrtDataType::Float16_t>) {
-    inputTensor = Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input), input_size, inputShape.data(), inputShape.size());
+    inputTensor = Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input), input_size * mInputShapes[0][1], inputShape.data(), inputShape.size());
   } else {
-    inputTensor = Ort::Value::CreateTensor<I>(pImplOrt->memoryInfo, input, input_size, inputShape.data(), inputShape.size());
+    inputTensor = Ort::Value::CreateTensor<I>(pImplOrt->memoryInfo, input, input_size * mInputShapes[0][1], inputShape.data(), inputShape.size());
   }
 
-  std::vector<int64_t> outputShape{inputShape[0], mOutputShapes[0][1]};
-  size_t outputSize = (int64_t)(input_size * mOutputShapes[0][1] / mInputShapes[0][1]);
-  Ort::Value outputTensor = Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, output, outputSize, outputShape.data(), outputShape.size());
+  std::vector<int64_t> outputShape{input_size, mOutputShapes[0][1]};
+  Ort::Value outputTensor = Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
 
-  (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size()); // TODO: Not sure if 1 is correct here
+  (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size()); // TODO: Not sure if 1 is always correct here
 }
 
-template void OrtModel::inference<OrtDataType::Float16_t, float>(OrtDataType::Float16_t*, size_t, float*);
+template void OrtModel::inference<OrtDataType::Float16_t, float>(OrtDataType::Float16_t*, size_t, float*, int32_t);
 
-template void OrtModel::inference<float, float>(float*, size_t, float*);
+template void OrtModel::inference<float, float>(float*, size_t, float*, int32_t);
 
 template <class I, class O>
-std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input)
+std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input, int32_t deviceIndex)
 {
+#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
+  if (allocateDeviceMemory) {
+    if (deviceIndex >= 0) {
+       deviceId = deviceIndex;
+    }
+    std::string dev_mem_str = "";
+    if (device == "ROCM") {
+      dev_mem_str = "Hip";
+    }
+    if (device == "CUDA") {
+      dev_mem_str = "Cuda";
+    }
+    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
+    LOG(info) << "(ORT) Memory info set to on-device memory";
+  }
+#endif
   std::vector<Ort::Value> inputTensor;
   for (auto i : input) {
     std::vector<int64_t> inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
 
@@ -229,7 +229,8 @@ AddOption(applyNNclusterizer, int, 0, "", 0, "(bool, default = 0), if the neural
 AddOption(nnInferenceDevice, std::string, "CPU", "", 0, "(std::string) Specify inference device (cpu (default), rocm, cuda)")
 AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify inference device id")
 AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference")
-AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16
+AddOption(nnInferenceInputDType, std::string, "FP32", "", 0, "(std::string) Specify the datatype for which inference is performed (FP32: default, fp16)") // fp32 or fp16
+AddOption(nnInferenceOutputDType, std::string, "FP32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16
 AddOption(nnInferenceIntraOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetIntraOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.")
 AddOption(nnInferenceInterOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetInterOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.")
 AddOption(nnInferenceEnableOrtOptimization, unsigned int, 99, "", 0, "Enables graph optimizations in ONNX Runtime. Can be [0, 1, 2, 99] -> see https://github.com/microsoft/onnxruntime/blob/3f71d637a83dc3540753a8bb06740f67e926dc13/include/onnxruntime/core/session/onnxruntime_c_api.h#L347")
@@ -250,6 +251,16 @@ AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The c
 AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
 AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path")
 AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).")
+// CCDB
+AddOption(nnLoadFromCCDB, int, 1, "", 0, "If 1 networks are fetched from ccdb, else locally")
+AddOption(nnCCDBURL, std::string, "http://ccdb-test.cern.ch:8080", "", 0, "The CCDB URL from where the network files are fetched")
+AddOption(nnCCDBPath, std::string, "Users/c/csonnabe/TPC/Clusterization", "", 0, "Folder path containing the networks")
+AddOption(nnCCDBFetchMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)")
+AddOption(nnCCDBWithMomentum, int, 1, "", 0, "Distinguishes between the network with and without momentum output for the regression")
+AddOption(nnCCDBClassificationLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
+AddOption(nnCCDBRegressionLayerType, std::string, "CNN", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
+AddOption(nnCCDBBeamType, std::string, "PbPb", "", 0, "Distinguishes between networks trained for different beam types. Options: PbPb, pp")
+AddOption(nnCCDBInteractionRate, int, 50, "", 0, "Distinguishes between networks for different interaction rates [kHz].")
 AddHelp("help", 'h')
 EndConfig()