Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 37 additions & 39 deletions Common/ML/include/ML/ort_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,60 +35,58 @@ namespace ml
class OrtModel
{

public:
// Constructor
OrtModel() = default;
OrtModel(std::unordered_map<std::string, std::string> optionsMap){ reset(optionsMap); }
void init(std::unordered_map<std::string, std::string> optionsMap){ reset(optionsMap); }
void reset(std::unordered_map<std::string, std::string>);
public:
// Constructor
OrtModel() = default;
OrtModel(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
void init(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
void reset(std::unordered_map<std::string, std::string>);

virtual ~OrtModel() = default;
virtual ~OrtModel() = default;

// Conversion
template<class I, class O>
std::vector<O> v2v(std::vector<I>&, bool = true);
// Conversion
template <class I, class O>
std::vector<O> v2v(std::vector<I>&, bool = true);

// Inferencing
template<class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h
std::vector<O> inference(std::vector<I>&);
// Inferencing
template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h
std::vector<O> inference(std::vector<I>&);

template<class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> inference(std::vector<std::vector<I>>&);
template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> inference(std::vector<std::vector<I>>&);

// template<class I, class T, class O> // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type
// std::vector<O> inference(std::vector<I>&);
// template<class I, class T, class O> // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type
// std::vector<O> inference(std::vector<I>&);

// Reset session
void resetSession();
// Reset session
void resetSession();

std::vector<std::vector<int64_t>> getNumInputNodes() const { return mInputShapes; }
std::vector<std::vector<int64_t>> getNumOutputNodes() const { return mOutputShapes; }
std::vector<std::string> getInputNames() const { return mInputNames; }
std::vector<std::string> getOutputNames() const { return mOutputNames; }
std::vector<std::vector<int64_t>> getNumInputNodes() const { return mInputShapes; }
std::vector<std::vector<int64_t>> getNumOutputNodes() const { return mOutputShapes; }
std::vector<std::string> getInputNames() const { return mInputNames; }
std::vector<std::string> getOutputNames() const { return mOutputNames; }

void setActiveThreads(int threads) { intraOpNumThreads = threads; }
void setActiveThreads(int threads) { intraOpNumThreads = threads; }

private:
private:
// ORT variables -> need to be hidden as Pimpl
struct OrtVariables;
OrtVariables* pImplOrt;

// ORT variables -> need to be hidden as Pimpl
struct OrtVariables;
OrtVariables* pImplOrt;
// Input & Output specifications of the loaded network
std::vector<const char*> inputNamesChar, outputNamesChar;
std::vector<std::string> mInputNames, mOutputNames;
std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes;

// Input & Output specifications of the loaded network
std::vector<const char*> inputNamesChar, outputNamesChar;
std::vector<std::string> mInputNames, mOutputNames;
std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes;

// Environment settings
std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda
int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;

std::string printShape(const std::vector<int64_t>&);
// Environment settings
std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda
int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;

std::string printShape(const std::vector<int64_t>&);
};

} // namespace ml

} // namespace ml
} // namespace o2

#endif // O2_ML_ORT_INTERFACE_H
88 changes: 52 additions & 36 deletions Common/ML/src/ort_interface.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace o2
namespace ml
{

struct OrtModel::OrtVariables { // The actual implementation is hidden in the .cxx file
struct OrtModel::OrtVariables { // The actual implementation is hidden in the .cxx file
// ORT runtime objects
Ort::RunOptions runOptions;
std::shared_ptr<Ort::Env> env = nullptr;
Expand All @@ -35,55 +35,56 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault);
};

void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap){
void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
{

pImplOrt = new OrtVariables();

// Load from options map
if(!optionsMap.contains("model-path")){
if (!optionsMap.contains("model-path")) {
LOG(fatal) << "(ORT) Model path cannot be empty!";
}
modelPath = optionsMap["model-path"];
device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float");
deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0);
enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0);
enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);

std::string dev_mem_str = "Hip";
#ifdef ORT_ROCM_BUILD
if(device == "ROCM") {
if (device == "ROCM") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId));
LOG(info) << "(ORT) ROCM execution provider set";
}
#endif
#ifdef ORT_MIGRAPHX_BUILD
if(device == "MIGRAPHX") {
if (device == "MIGRAPHX") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId));
LOG(info) << "(ORT) MIGraphX execution provider set";
}
#endif
#ifdef ORT_CUDA_BUILD
if(device == "CUDA") {
if (device == "CUDA") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId));
LOG(info) << "(ORT) CUDA execution provider set";
dev_mem_str = "Cuda";
}
#endif

if(allocateDeviceMemory){
if (allocateDeviceMemory) {
pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
LOG(info) << "(ORT) Memory info set to on-device memory";
}

if(device == "CPU") {
if (device == "CPU") {
(pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
if(intraOpNumThreads > 1){
if (intraOpNumThreads > 1) {
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
} else if(intraOpNumThreads == 1){
} else if (intraOpNumThreads == 1) {
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
}
LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
Expand All @@ -92,8 +93,8 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap){
(pImplOrt->sessionOptions).DisableMemPattern();
(pImplOrt->sessionOptions).DisableCpuMemArena();

if(enableProfiling){
if(optionsMap.contains("profiling-output-path")){
if (enableProfiling) {
if (optionsMap.contains("profiling-output-path")) {
(pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str());
} else {
LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now.";
Expand All @@ -109,27 +110,27 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap){
(pImplOrt->session).reset(new Ort::Session{*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions});

for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
}
for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
}
for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
}
for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
}

inputNamesChar.resize(mInputNames.size(), nullptr);
std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
[&](const std::string& str) { return str.c_str(); });
[&](const std::string& str) { return str.c_str(); });
outputNamesChar.resize(mOutputNames.size(), nullptr);
std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
[&](const std::string& str) { return str.c_str(); });
[&](const std::string& str) { return str.c_str(); });

// Print names
if(loggingLevel > 1) {
if (loggingLevel > 1) {
LOG(info) << "Input Nodes:";
for (size_t i = 0; i < mInputNames.size(); i++) {
LOG(info) << "\t" << mInputNames[i] << " : " << printShape(mInputShapes[i]);
Expand All @@ -142,24 +143,28 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap){
}
}

void OrtModel::resetSession() {
void OrtModel::resetSession()
{
(pImplOrt->session).reset(new Ort::Session{*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions});
}

template<class I, class O>
std::vector<O> OrtModel::v2v(std::vector<I>& input, bool clearInput) {
if constexpr (std::is_same_v<I,O>){
template <class I, class O>
std::vector<O> OrtModel::v2v(std::vector<I>& input, bool clearInput)
{
if constexpr (std::is_same_v<I, O>) {
return input;
} else {
std::vector<O> output(input.size());
std::transform(std::begin(input), std::end(input), std::begin(output), [](I f) { return O(f); });
if(clearInput) input.clear();
if (clearInput)
input.clear();
return output;
}
}

template<class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> OrtModel::inference(std::vector<I>& input){
template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> OrtModel::inference(std::vector<I>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, (reinterpret_cast<O*>(input)).data(), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -171,10 +176,11 @@ std::vector<O> OrtModel::inference(std::vector<I>& input){
return outputValuesVec;
}

template<class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input){
template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input)
{
std::vector<Ort::Value> inputTensor;
for(auto i : input){
for (auto i : input) {
std::vector<int64_t> inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
inputTensor.emplace_back(Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, (reinterpret_cast<O*>(i)).data(), i.size(), inputShape.data(), inputShape.size()));
}
Expand All @@ -195,7 +201,9 @@ std::string OrtModel::printShape(const std::vector<int64_t>& v)
return ss.str();
}

template <> std::vector<float> OrtModel::inference<float, float>(std::vector<float>& input) {
template <>
std::vector<float> OrtModel::inference<float, float>(std::vector<float>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<float>(pImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -207,7 +215,9 @@ template <> std::vector<float> OrtModel::inference<float, float>(std::vector<flo
return outputValuesVec;
}

template <> std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(std::vector<OrtDataType::Float16_t>& input) {
template <>
std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(std::vector<OrtDataType::Float16_t>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input.data()), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -219,7 +229,9 @@ template <> std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float
return outputValuesVec;
}

template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>& input) {
template <>
std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input.data()), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -231,7 +243,9 @@ template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType:
return outputValuesVec;
}

template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<float, OrtDataType::Float16_t>(std::vector<float>& input) {
template <>
std::vector<OrtDataType::Float16_t> OrtModel::inference<float, OrtDataType::Float16_t>(std::vector<float>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input.data()), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -243,9 +257,11 @@ template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<float, OrtDa
return outputValuesVec;
}

template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<std::vector<OrtDataType::Float16_t>>& input) {
template <>
std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<std::vector<OrtDataType::Float16_t>>& input)
{
std::vector<Ort::Value> inputTensor;
for(auto i : input){
for (auto i : input) {
std::vector<int64_t> inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
inputTensor.emplace_back(Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(i.data()), i.size(), inputShape.data(), inputShape.size()));
}
Expand Down