AliceO2Group
diff --git a/‎Common/ML/include/ML/OrtInterface.h‎
Lines changed: 27 additions & 30 deletions b/‎Common/ML/include/ML/OrtInterface.h‎
Lines changed: 27 additions & 30 deletions
diff --git a/‎Common/ML/src/OrtInterface.cxx‎
Lines changed: 79 additions & 89 deletions b/‎Common/ML/src/OrtInterface.cxx‎
Lines changed: 79 additions & 89 deletions
diff --git a/‎GPU/GPUTracking/Base/GPUReconstructionProcessing.h‎
Lines changed: 1 addition & 1 deletion b/‎GPU/GPUTracking/Base/GPUReconstructionProcessing.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu‎
Lines changed: 2 additions & 2 deletions b/‎GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu‎
Lines changed: 2 additions & 2 deletions
@@ -41,54 +41,51 @@ class OrtModel
 {
 
  public:
-  // Constructor
+  // Constructors & destructors
   OrtModel() = default;
-  OrtModel(std::unordered_map<std::string, std::string> optionsMap) {
-    initOptions(optionsMap);
-    initEnvironment();
-  }
+  OrtModel(std::unordered_map<std::string, std::string> optionsMap) { init(optionsMap); }
   void init(std::unordered_map<std::string, std::string> optionsMap) {
     initOptions(optionsMap);
     initEnvironment();
   }
+  virtual ~OrtModel() = default;
+
+  // General purpose
   void initOptions(std::unordered_map<std::string, std::string> optionsMap);
   void initEnvironment();
+  void memoryOnDevice(int32_t = 0);
   bool isInitialized() { return mInitialized; }
-  Ort::SessionOptions& updateSessionOptions();
-  Ort::MemoryInfo& updateMemoryInfo();
-  void setIO();
+  void resetSession();
 
-  virtual ~OrtModel() = default;
+  // Getters
+  std::vector<std::vector<int64_t>> getNumInputNodes() const { return mInputShapes; }
+  std::vector<std::vector<int64_t>> getNumOutputNodes() const { return mOutputShapes; }
+  std::vector<std::string> getInputNames() const { return mInputNames; }
+  std::vector<std::string> getOutputNames() const { return mOutputNames; }
+  Ort::SessionOptions& getSessionOptions();
+  Ort::MemoryInfo& getMemoryInfo();
+
+  // Setters
+  void setDeviceId(int32_t id) { deviceId = id; }
+  void setIO();
+  void setActiveThreads(int threads) { intraOpNumThreads = threads; }
 
   // Conversion
   template <class I, class O>
   std::vector<O> v2v(std::vector<I>&, bool = true);
 
   // Inferencing
   template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h
-  std::vector<O> inference(std::vector<I>&, int32_t = -1);
-
-  template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
-  std::vector<O> inference(std::vector<std::vector<I>>&, int32_t = -1);
-
-  template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h
-  void inference(I*, size_t, O*, int32_t = -1);
-
-  // template<class I, class T, class O> // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type
-  // std::vector<O> inference(std::vector<I>&);
-
-  // Reset session
-  void resetSession();
+  std::vector<O> inference(std::vector<I>&);
 
-  std::vector<std::vector<int64_t>> getNumInputNodes() const { return mInputShapes; }
-  std::vector<std::vector<int64_t>> getNumOutputNodes() const { return mOutputShapes; }
-  std::vector<std::string> getInputNames() const { return mInputNames; }
-  std::vector<std::string> getOutputNames() const { return mOutputNames; }
+  template <class I, class O>
+  std::vector<O> inference(std::vector<std::vector<I>>&);
 
-  void setActiveThreads(int threads) { intraOpNumThreads = threads; }
+  template <class I, class O>
+  void inference(I*, size_t, O*);
 
  private:
-  // ORT variables -> need to be hidden as Pimpl
+  // ORT variables -> need to be hidden as pImpl
   struct OrtVariables;
   OrtVariables* pImplOrt;
 
@@ -99,8 +96,8 @@ class OrtModel
 
   // Environment settings
   bool mInitialized = false;
-  std::string modelPath, envName = "", device = "cpu", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda
-  int intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
+  std::string modelPath, envName = "", deviceType = "CPU", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda
+  int32_t intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = -1, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
 
   std::string printShape(const std::vector<int64_t>&);
 };
 
@@ -35,16 +35,7 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c
   Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault);
 };
 
-Ort::SessionOptions& OrtModel::updateSessionOptions()
-{
-  return pImplOrt->sessionOptions;
-}
-
-Ort::MemoryInfo& OrtModel::updateMemoryInfo()
-{
-  return pImplOrt->memoryInfo;
-}
-
+// General purpose
 void OrtModel::initOptions(std::unordered_map<std::string, std::string> optionsMap)
 {
   pImplOrt = new OrtVariables();
@@ -56,7 +47,8 @@ void OrtModel::initOptions(std::unordered_map<std::string, std::string> optionsM
 
   if (!optionsMap["model-path"].empty()) {
     modelPath = optionsMap["model-path"];
-    device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
+    deviceType = (optionsMap.contains("device-type") ? optionsMap["device-type"] : "CPU");
+    deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : -1);
     allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
     intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
     interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0);
@@ -65,7 +57,7 @@ void OrtModel::initOptions(std::unordered_map<std::string, std::string> optionsM
     enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);
     envName = (optionsMap.contains("onnx-environment-name") ? optionsMap["onnx-environment-name"] : "onnx_model_inference");
 
-    if (device == "CPU") {
+    if (deviceType == "CPU") {
       (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
       (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads);
       if (intraOpNumThreads > 1 || interOpNumThreads > 1) {
@@ -97,14 +89,18 @@ void OrtModel::initOptions(std::unordered_map<std::string, std::string> optionsM
 
     (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations));
     (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel));
+
+    mInitialized = true;
   } else {
     LOG(fatal) << "(ORT) Model path cannot be empty!";
   }
 }
 
 void OrtModel::initEnvironment()
 {
-  mInitialized = true;
+  if(allocateDeviceMemory) {
+    memoryOnDevice(deviceId);
+  }
   pImplOrt->env = std::make_shared<Ort::Env>(
     OrtLoggingLevel(loggingLevel),
     (envName.empty() ? "ORT" : envName.c_str()),
@@ -128,39 +124,48 @@ void OrtModel::initEnvironment()
   (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
   pImplOrt->session = std::make_shared<Ort::Session>(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions);
 
+  if (loggingLevel < 2) {
+    LOG(info) << "(ORT) Model loaded successfully! (input: " << printShape(mInputShapes[0]) << ", output: " << printShape(mOutputShapes[0]) << ")";
+  }
+
   setIO();
 }
 
-void OrtModel::setIO() {
-  for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
-    mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
-  }
-  for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
-    mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
-  }
-  for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
-    mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
-  }
-  for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
-    mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
-  }
-
-  inputNamesChar.resize(mInputNames.size(), nullptr);
-  std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
-                 [&](const std::string& str) { return str.c_str(); });
-  outputNamesChar.resize(mOutputNames.size(), nullptr);
-  std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
-                 [&](const std::string& str) { return str.c_str(); });
-  if (loggingLevel < 2) {
-    LOG(info) << "(ORT) Model loaded successfully! (input: " << printShape(mInputShapes[0]) << ", output: " << printShape(mOutputShapes[0]) << ")";
+void OrtModel::memoryOnDevice(int32_t deviceIndex)
+{
+#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
+  if (deviceIndex >= 0) {
+    std::string dev_mem_str = "";
+    if (deviceType == "ROCM") {
+      dev_mem_str = "Hip";
+    }
+    if (deviceType == "CUDA") {
+      dev_mem_str = "Cuda";
+    }
+    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault);
+    if (loggingLevel < 2) {
+      LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex;
+    }
   }
+#endif
 }
 
 void OrtModel::resetSession()
 {
   pImplOrt->session = std::make_shared<Ort::Session>(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions);
 }
 
+// Getters
+Ort::SessionOptions& OrtModel::getSessionOptions()
+{
+  return pImplOrt->sessionOptions;
+}
+
+Ort::MemoryInfo& OrtModel::getMemoryInfo()
+{
+  return pImplOrt->memoryInfo;
+}
+
 template <class I, class O>
 std::vector<O> OrtModel::v2v(std::vector<I>& input, bool clearInput)
 {
@@ -176,32 +181,32 @@ std::vector<O> OrtModel::v2v(std::vector<I>& input, bool clearInput)
   }
 }
 
-std::string OrtModel::printShape(const std::vector<int64_t>& v)
-{
-  std::stringstream ss("");
-  for (size_t i = 0; i < v.size() - 1; i++) {
-    ss << v[i] << "x";
+void OrtModel::setIO() {
+  for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
+    mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
   }
-  ss << v[v.size() - 1];
-  return ss.str();
+  for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
+    mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
+  }
+  for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
+    mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
+  }
+  for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
+    mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
+  }
+
+  inputNamesChar.resize(mInputNames.size(), nullptr);
+  std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
+                 [&](const std::string& str) { return str.c_str(); });
+  outputNamesChar.resize(mOutputNames.size(), nullptr);
+  std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
+                 [&](const std::string& str) { return str.c_str(); });
 }
 
+// Inference
 template <class I, class O>
-std::vector<O> OrtModel::inference(std::vector<I>& input, int32_t deviceIndex)
+std::vector<O> OrtModel::inference(std::vector<I>& input)
 {
-#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
-  if (allocateDeviceMemory) {
-    std::string dev_mem_str = "";
-    if (device == "ROCM") {
-      dev_mem_str = "Hip";
-    }
-    if (device == "CUDA") {
-      dev_mem_str = "Cuda";
-    }
-    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault);
-    LOG(info) << "(ORT) Memory info set to on-device memory for device " << device << " with ID "<< deviceIndex;
-  }
-#endif
   std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
   std::vector<Ort::Value> inputTensor;
   if constexpr (std::is_same_v<I, OrtDataType::Float16_t>) {
@@ -217,32 +222,19 @@ std::vector<O> OrtModel::inference(std::vector<I>& input, int32_t deviceIndex)
   return outputValuesVec;
 }
 
-template std::vector<float> OrtModel::inference<float, float>(std::vector<float>&, int32_t);
+template std::vector<float> OrtModel::inference<float, float>(std::vector<float>&);
 
-template std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(std::vector<OrtDataType::Float16_t>&, int32_t);
+template std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(std::vector<OrtDataType::Float16_t>&);
 
-template std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>&, int32_t);
+template std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>&);
 
 template <class I, class O>
-void OrtModel::inference(I* input, size_t input_size, O* output, int32_t deviceIndex)
+void OrtModel::inference(I* input, size_t input_size, O* output)
 {
   // std::vector<std::string> providers = Ort::GetAvailableProviders();
   // for (const auto& provider : providers) {
   //     LOG(info) << "Available Execution Provider: " << provider;
   // }
-#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
-  if (allocateDeviceMemory) {
-    std::string dev_mem_str = "";
-    if (device == "ROCM") {
-      dev_mem_str = "Hip";
-    }
-    if (device == "CUDA") {
-      dev_mem_str = "Cuda";
-    }
-    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault);
-    LOG(info) << "(ORT) Memory info set to on-device memory for device " << device << " with ID "<< deviceIndex;
-  }
-#endif
   std::vector<int64_t> inputShape{input_size, (int64_t)mInputShapes[0][1]};
   Ort::Value inputTensor = Ort::Value(nullptr);
   if constexpr (std::is_same_v<I, OrtDataType::Float16_t>) {
@@ -257,26 +249,13 @@ void OrtModel::inference(I* input, size_t input_size, O* output, int32_t deviceI
   (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size());
 }
 
-template void OrtModel::inference<OrtDataType::Float16_t, float>(OrtDataType::Float16_t*, size_t, float*, int32_t);
+template void OrtModel::inference<OrtDataType::Float16_t, float>(OrtDataType::Float16_t*, size_t, float*);
 
-template void OrtModel::inference<float, float>(float*, size_t, float*, int32_t);
+template void OrtModel::inference<float, float>(float*, size_t, float*);
 
 template <class I, class O>
-std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input, int32_t deviceIndex)
+std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input)
 {
-#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
-  if (allocateDeviceMemory) {
-    std::string dev_mem_str = "";
-    if (device == "ROCM") {
-      dev_mem_str = "Hip";
-    }
-    if (device == "CUDA") {
-      dev_mem_str = "Cuda";
-    }
-    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault);
-    LOG(info) << "(ORT) Memory info set to on-device memory for device " << device << " with ID " << deviceIndex;
-  }
-#endif
   std::vector<Ort::Value> inputTensor;
   for (auto i : input) {
     std::vector<int64_t> inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
@@ -294,6 +273,17 @@ std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input, int32_t d
   return outputValuesVec;
 }
 
+// private
+std::string OrtModel::printShape(const std::vector<int64_t>& v)
+{
+  std::stringstream ss("");
+  for (size_t i = 0; i < v.size() - 1; i++) {
+    ss << v[i] << "x";
+  }
+  ss << v[v.size() - 1];
+  return ss.str();
+}
+
 } // namespace ml
 
 } // namespace o2
@@ -92,7 +92,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
   void AddGPUEvents(T*& events);
 
   virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() override;
-  virtual void SetONNXGPUStream(Ort::SessionOptions&, int32_t, int32_t*) {}
+  // virtual void SetONNXGPUStream(Ort::SessionOptions&, int32_t, int32_t*) {}
 
   struct RecoStepTimerMeta {
     HighResTimer timerToGPU;
 
@@ -673,7 +673,7 @@ void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_option
   // UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size());
 
   // this implicitly sets "has_user_compute_stream"
-  UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", &mInternals->Streams[stream]);
+  UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream]);
   session_options.AppendExecutionProvider_CUDA_V2(cuda_options);
 
   // Finally, don't forget to release the provider options
@@ -694,7 +694,7 @@ void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options
 {
   // Create ROCm provider options
   cudaGetDevice(deviceId);
-  const auto& api = Ort::GetApi();
+  // const auto& api = Ort::GetApi();
   // api.GetCurrentGpuDeviceId(deviceId);
   OrtROCMProviderOptions rocm_options;
   rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream