Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions Common/ML/include/ML/OrtInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@
// O2 includes
#include "Framework/Logger.h"

namespace Ort {
struct SessionOptions;
struct MemoryInfo;
}
namespace Ort
{
struct SessionOptions;
struct MemoryInfo;
} // namespace Ort

namespace o2
{
Expand All @@ -44,7 +45,8 @@ class OrtModel
// Constructors & destructors
OrtModel() = default;
OrtModel(std::unordered_map<std::string, std::string> optionsMap) { init(optionsMap); }
void init(std::unordered_map<std::string, std::string> optionsMap) {
void init(std::unordered_map<std::string, std::string> optionsMap)
{
initOptions(optionsMap);
initEnvironment();
}
Expand All @@ -71,8 +73,18 @@ class OrtModel
void setDeviceId(int32_t id) { deviceId = id; }
void setIO();
void setActiveThreads(int threads) { intraOpNumThreads = threads; }
void setIntraOpNumThreads(int threads) { if(deviceType == "CPU") { intraOpNumThreads = threads; } }
void setInterOpNumThreads(int threads) { if(deviceType == "CPU") { interOpNumThreads = threads; } }
void setIntraOpNumThreads(int threads)
{
if (deviceType == "CPU") {
intraOpNumThreads = threads;
}
}
void setInterOpNumThreads(int threads)
{
if (deviceType == "CPU") {
interOpNumThreads = threads;
}
}

// Conversion
template <class I, class O>
Expand Down Expand Up @@ -102,8 +114,8 @@ class OrtModel
std::vector<const char*> inputNamesChar, outputNamesChar;
std::vector<std::string> mInputNames, mOutputNames;
std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes, inputShapesCopy, outputShapesCopy; // Input shapes
std::vector<int64_t> inputSizePerNode, outputSizePerNode; // Output shapes
int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs
std::vector<int64_t> inputSizePerNode, outputSizePerNode; // Output shapes
int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs

// Environment settings
bool mInitialized = false;
Expand Down
116 changes: 58 additions & 58 deletions Common/ML/src/OrtInterface.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void OrtModel::initOptions(std::unordered_map<std::string, std::string> optionsM

void OrtModel::initEnvironment()
{
if(allocateDeviceMemory) {
if (allocateDeviceMemory) {
memoryOnDevice(deviceId);
}
pImplOrt->env = std::make_shared<Ort::Env>(
Expand Down Expand Up @@ -184,7 +184,8 @@ std::vector<O> OrtModel::v2v(std::vector<I>& input, bool clearInput)
}
}

void OrtModel::setIO() {
void OrtModel::setIO()
{
for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
}
Expand All @@ -211,7 +212,7 @@ void OrtModel::setIO() {
outputSizePerNode.resize(mOutputShapes.size(), 1);
mInputsTotal = 1;
for (size_t i = 0; i < mInputShapes.size(); ++i) {
if(mInputShapes[i].size() > 0) {
if (mInputShapes[i].size() > 0) {
for (size_t j = 1; j < mInputShapes[i].size(); ++j) {
if (mInputShapes[i][j] > 0) {
mInputsTotal *= mInputShapes[i][j];
Expand All @@ -222,7 +223,7 @@ void OrtModel::setIO() {
}
mOutputsTotal = 1;
for (size_t i = 0; i < mOutputShapes.size(); ++i) {
if(mOutputShapes[i].size() > 0) {
if (mOutputShapes[i].size() > 0) {
for (size_t j = 1; j < mOutputShapes[i].size(); ++j) {
if (mOutputShapes[i][j] > 0) {
mOutputsTotal *= mOutputShapes[i][j];
Expand All @@ -239,8 +240,7 @@ std::vector<O> OrtModel::inference(std::vector<I>& input)
{
std::vector<int64_t> inputShape = mInputShapes[0];
inputShape[0] = input.size();
for (size_t i = 1; i < mInputShapes[0].size(); ++i)
{
for (size_t i = 1; i < mInputShapes[0].size(); ++i) {
inputShape[0] /= mInputShapes[0][i];
}
std::vector<Ort::Value> inputTensor;
Expand Down Expand Up @@ -295,28 +295,29 @@ template void OrtModel::inference<float, OrtDataType::Float16_t>(float*, size_t,
template void OrtModel::inference<float, float>(float*, size_t, float*);

template <class I, class O>
void OrtModel::inference(I** input, size_t input_size, O* output) {
void OrtModel::inference(I** input, size_t input_size, O* output)
{
std::vector<Ort::Value> inputTensors(inputShapesCopy.size());

for (size_t i = 0; i < inputShapesCopy.size(); ++i) {

inputShapesCopy[i][0] = input_size; // batch-size
inputShapesCopy[i][0] = input_size; // batch-size
outputShapesCopy[i][0] = input_size; // batch-size

if constexpr (std::is_same_v<I, OrtDataType::Float16_t>) {
inputTensors[i] = Ort::Value::CreateTensor<Ort::Float16_t>(
pImplOrt->memoryInfo,
reinterpret_cast<Ort::Float16_t*>(input[i]),
inputSizePerNode[i]*input_size,
inputShapesCopy[i].data(),
inputShapesCopy[i].size());
pImplOrt->memoryInfo,
reinterpret_cast<Ort::Float16_t*>(input[i]),
inputSizePerNode[i] * input_size,
inputShapesCopy[i].data(),
inputShapesCopy[i].size());
} else {
inputTensors[i] = Ort::Value::CreateTensor<I>(
pImplOrt->memoryInfo,
input[i],
inputSizePerNode[i]*input_size,
inputShapesCopy[i].data(),
inputShapesCopy[i].size());
pImplOrt->memoryInfo,
input[i],
inputSizePerNode[i] * input_size,
inputShapesCopy[i].data(),
inputShapesCopy[i].size());
}
}

Expand All @@ -325,14 +326,14 @@ void OrtModel::inference(I** input, size_t input_size, O* output) {
outputTensor = Ort::Value::CreateTensor<Ort::Float16_t>(
pImplOrt->memoryInfo,
reinterpret_cast<Ort::Float16_t*>(output),
outputSizePerNode[0]*input_size, // assumes that there is only one output node
outputSizePerNode[0] * input_size, // assumes that there is only one output node
outputShapesCopy[0].data(),
outputShapesCopy[0].size());
} else {
outputTensor = Ort::Value::CreateTensor<O>(
pImplOrt->memoryInfo,
output,
outputSizePerNode[0]*input_size, // assumes that there is only one output node
outputSizePerNode[0] * input_size, // assumes that there is only one output node
outputShapesCopy[0].data(),
outputShapesCopy[0].size());
}
Expand All @@ -345,8 +346,7 @@ void OrtModel::inference(I** input, size_t input_size, O* output) {
inputNamesChar.size(),
outputNamesChar.data(),
&outputTensor,
outputNamesChar.size()
);
outputNamesChar.size());
}

template void OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(OrtDataType::Float16_t**, size_t, OrtDataType::Float16_t*);
Expand All @@ -357,47 +357,47 @@ template void OrtModel::inference<float, float>(float**, size_t, float*);
template <class I, class O>
std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& inputs)
{
std::vector<Ort::Value> input_tensors;
std::vector<Ort::Value> input_tensors;

for (size_t i = 0; i < inputs.size(); ++i) {
for (size_t i = 0; i < inputs.size(); ++i) {

inputShapesCopy[i][0] = inputs[i].size() / inputSizePerNode[i]; // batch-size
inputShapesCopy[i][0] = inputs[i].size() / inputSizePerNode[i]; // batch-size

if constexpr (std::is_same_v<I, OrtDataType::Float16_t>) {
input_tensors.emplace_back(
Ort::Value::CreateTensor<Ort::Float16_t>(
pImplOrt->memoryInfo,
reinterpret_cast<Ort::Float16_t*>(inputs[i].data()),
inputSizePerNode[i]*inputShapesCopy[i][0],
inputShapesCopy[i].data(),
inputShapesCopy[i].size()));
} else {
input_tensors.emplace_back(
Ort::Value::CreateTensor<I>(
pImplOrt->memoryInfo,
inputs[i].data(),
inputSizePerNode[i]*inputShapesCopy[i][0],
inputShapesCopy[i].data(),
inputShapesCopy[i].size()));
}
if constexpr (std::is_same_v<I, OrtDataType::Float16_t>) {
input_tensors.emplace_back(
Ort::Value::CreateTensor<Ort::Float16_t>(
pImplOrt->memoryInfo,
reinterpret_cast<Ort::Float16_t*>(inputs[i].data()),
inputSizePerNode[i] * inputShapesCopy[i][0],
inputShapesCopy[i].data(),
inputShapesCopy[i].size()));
} else {
input_tensors.emplace_back(
Ort::Value::CreateTensor<I>(
pImplOrt->memoryInfo,
inputs[i].data(),
inputSizePerNode[i] * inputShapesCopy[i][0],
inputShapesCopy[i].data(),
inputShapesCopy[i].size()));
}
}

int32_t totalOutputSize = mOutputsTotal * inputShapesCopy[0][0];

// === Run inference ===
auto output_tensors = pImplOrt->session->Run(
pImplOrt->runOptions,
inputNamesChar.data(),
input_tensors.data(),
input_tensors.size(),
outputNamesChar.data(),
outputNamesChar.size());

int32_t totalOutputSize = mOutputsTotal*inputShapesCopy[0][0];

// === Run inference ===
auto output_tensors = pImplOrt->session->Run(
pImplOrt->runOptions,
inputNamesChar.data(),
input_tensors.data(),
input_tensors.size(),
outputNamesChar.data(),
outputNamesChar.size());

// === Extract output values ===
O* output_data = output_tensors[0].template GetTensorMutableData<O>();
std::vector<O> output_vec(output_data, output_data + totalOutputSize);
output_tensors.clear();
return output_vec;
// === Extract output values ===
O* output_data = output_tensors[0].template GetTensorMutableData<O>();
std::vector<O> output_vec(output_data, output_data + totalOutputSize);
output_tensors.clear();
return output_vec;
}

template std::vector<float> OrtModel::inference<float, float>(std::vector<std::vector<float>>&);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,15 @@ namespace o2::tpc

class NeuralNetworkClusterizer
{
public:
NeuralNetworkClusterizer() = default;
void initCcdbApi(std::string url);
void loadIndividualFromCCDB(std::map<std::string, std::string> settings);

private:
o2::ccdb::CcdbApi ccdbApi;
std::map<std::string, std::string> metadata;
std::map<std::string, std::string> headers;

public:
NeuralNetworkClusterizer() = default;
void initCcdbApi(std::string url);
void loadIndividualFromCCDB(std::map<std::string, std::string> settings);

private:
o2::ccdb::CcdbApi ccdbApi;
std::map<std::string, std::string> metadata;
std::map<std::string, std::string> headers;
};

} // namespace o2::tpc
Expand Down
3 changes: 2 additions & 1 deletion Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

using namespace o2::tpc;

void NeuralNetworkClusterizer::initCcdbApi(std::string url) {
void NeuralNetworkClusterizer::initCcdbApi(std::string url)
{
ccdbApi.init(url);
}

Expand Down
5 changes: 3 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
#include "GPUReconstructionKernelIncludes.h"
#include "GPUReconstructionKernels.h"

namespace Ort {
struct SessionOptions;
namespace Ort
{
struct SessionOptions;
}

namespace o2::gpu
Expand Down
3 changes: 2 additions & 1 deletion GPU/GPUTracking/Base/GPUReconstructionProcessing.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
#include <functional>
#include <atomic>

namespace Ort {
namespace Ort
{
struct SessionOptions;
}

Expand Down
5 changes: 3 additions & 2 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ extern "C" __declspec(dllexport) o2::gpu::GPUReconstruction* GPUReconstruction_C
extern "C" o2::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend& cfg);
#endif

namespace Ort {
struct SessionOptions;
namespace Ort
{
struct SessionOptions;
}

namespace o2::gpu
Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -665,16 +665,16 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
clustererNN.mISector = sector;
clustererNN.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters;
nnApplications[lane].initClusterizer(nn_settings, clustererNN);
if (doGPU){
if (doGPU) {
clustererNNShadow.deviceId = deviceId;
clustererNNShadow.mISector = sector;
clustererNNShadow.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters;
nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow);
}
AllocateRegisteredMemory(clustererNN.mMemoryId);
});
if (doGPU){
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer)*NSECTORS, mRec->NStreams() - 1, &mEvents->init);
if (doGPU) {
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
}
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class GPUTPCNNClusterizer : public GPUProcessor
int nnClusterizerModelClassNumOutputNodes = -1;
int nnClusterizerModelReg1NumOutputNodes = -1;
int nnClusterizerModelReg2NumOutputNodes = -1;
int nnInferenceInputDType = 0; // 0: float16, 1: float32
int nnInferenceInputDType = 0; // 0: float16, 1: float32
int nnInferenceOutputDType = 0; // 0: float16, 1: float32
int mISector = -1;
int deviceId = -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
std::vector<std::string> reg_model_paths;
std::vector<std::string> evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':');

if(settings.nnLoadFromCCDB) {
if (settings.nnLoadFromCCDB) {
reg_model_path = settings.nnLocalFolder + "/net_regression_c1.onnx"; // Needs to be set identical to NeuralNetworkClusterizer.cxx, otherwise the networks might be loaded from the wrong place
if (evalMode[0] == "c1") {
class_model_path = settings.nnLocalFolder + "/net_classification_c1.onnx";
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class GPUTPCNNClusterizerHost

std::unordered_map<std::string, std::string> OrtOptions;
o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
std::vector<bool> modelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2
std::vector<bool> modelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2
int32_t deviceId = -1;
std::vector<std::string> reg_model_paths;
}; // class GPUTPCNNClusterizerHost
Expand Down
Loading