Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions Common/ML/include/ML/OrtInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,23 @@ class OrtModel
Ort::SessionOptions* getSessionOptions();
Ort::MemoryInfo* getMemoryInfo();
Ort::Env* getEnv();
int32_t getIntraOpNumThreads() const { return intraOpNumThreads; }
int32_t getInterOpNumThreads() const { return interOpNumThreads; }
int32_t getIntraOpNumThreads() const { return mIntraOpNumThreads; }
int32_t getInterOpNumThreads() const { return mInterOpNumThreads; }

// Setters
void setDeviceId(int32_t id) { deviceId = id; }
void setDeviceId(int32_t id) { mDeviceId = id; }
void setIO();
void setActiveThreads(int threads) { intraOpNumThreads = threads; }
void setActiveThreads(int threads) { mIntraOpNumThreads = threads; }
void setIntraOpNumThreads(int threads)
{
if (deviceType == "CPU") {
intraOpNumThreads = threads;
if (mDeviceType == "CPU") {
mIntraOpNumThreads = threads;
}
}
void setInterOpNumThreads(int threads)
{
if (deviceType == "CPU") {
interOpNumThreads = threads;
if (mDeviceType == "CPU") {
mInterOpNumThreads = threads;
}
}
void setEnv(Ort::Env*);
Expand All @@ -113,19 +113,19 @@ class OrtModel
private:
// ORT variables -> need to be hidden as pImpl
struct OrtVariables;
OrtVariables* pImplOrt;
OrtVariables* mPImplOrt;

// Input & Output specifications of the loaded network
std::vector<const char*> inputNamesChar, outputNamesChar;
std::vector<const char*> mInputNamesChar, mOutputNamesChar;
std::vector<std::string> mInputNames, mOutputNames;
std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes, inputShapesCopy, outputShapesCopy; // Input shapes
std::vector<int64_t> inputSizePerNode, outputSizePerNode; // Output shapes
int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs
std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes, mInputShapesCopy, mOutputShapesCopy; // Input shapes
std::vector<int64_t> mInputSizePerNode, mOutputSizePerNode; // Output shapes
int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs

// Environment settings
bool mInitialized = false;
std::string modelPath, envName = "", deviceType = "CPU", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda
int32_t intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = -1, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
std::string mModelPath, mEnvName = "", mDeviceType = "CPU", mThreadAffinity = ""; // device options should be cpu, rocm, migraphx, cuda
int32_t mIntraOpNumThreads = 1, mInterOpNumThreads = 1, mDeviceId = -1, mEnableProfiling = 0, mLoggingLevel = 0, mAllocateDeviceMemory = 0, mEnableOptimizations = 0;

std::string printShape(const std::vector<int64_t>&);
std::string printShape(const std::vector<std::vector<int64_t>>&, std::vector<std::string>&);
Expand Down
250 changes: 125 additions & 125 deletions Common/ML/src/OrtInterface.cxx

Large diffs are not rendered by default.

172 changes: 86 additions & 86 deletions GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Large diffs are not rendered by default.

78 changes: 39 additions & 39 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,69 +25,69 @@ void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {}

void* GPUTPCNNClusterizer::setIOPointers(void* mem)
{
if (nnClusterizerBatchedMode > 0) {
if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) {
computePointerWithAlignment(mem, inputData_16, nnClusterizerBatchedMode * nnClusterizerElementSize);
} else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) {
computePointerWithAlignment(mem, inputData_32, nnClusterizerBatchedMode * nnClusterizerElementSize);
if (mNnClusterizerBatchedMode > 0) {
if (mNnInferenceInputDType == 0 && mNnClusterizerElementSize > 0) {
computePointerWithAlignment(mem, mInputData_16, mNnClusterizerBatchedMode * mNnClusterizerElementSize);
} else if (mNnInferenceInputDType == 1 && mNnClusterizerElementSize > 0) {
computePointerWithAlignment(mem, mInputData_32, mNnClusterizerBatchedMode * mNnClusterizerElementSize);
}
computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode);
computePointerWithAlignment(mem, mClusterFlags, 2 * mNnClusterizerBatchedMode);

if (nnInferenceOutputDType == 0 && nnClusterizerElementSize > 0) {
if (nnClusterizerModelClassNumOutputNodes > 0) {
computePointerWithAlignment(mem, modelProbabilities_16, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes);
if (mNnInferenceOutputDType == 0 && mNnClusterizerElementSize > 0) {
if (mNnClusterizerModelClassNumOutputNodes > 0) {
computePointerWithAlignment(mem, mModelProbabilities_16, mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes);
}
if (!nnClusterizerUseCfRegression) {
if (nnClusterizerModelReg1NumOutputNodes > 0) {
computePointerWithAlignment(mem, outputDataReg1_16, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes);
if (!mNnClusterizerUseCfRegression) {
if (mNnClusterizerModelReg1NumOutputNodes > 0) {
computePointerWithAlignment(mem, mOutputDataReg1_16, mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes);
}
if (nnClusterizerModelReg2NumOutputNodes > 0) {
computePointerWithAlignment(mem, outputDataReg2_16, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes);
if (mNnClusterizerModelReg2NumOutputNodes > 0) {
computePointerWithAlignment(mem, mOutputDataReg2_16, mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes);
}
}
} else if (nnInferenceOutputDType == 1 && nnClusterizerElementSize > 0) {
if (nnClusterizerModelClassNumOutputNodes > 0) {
computePointerWithAlignment(mem, modelProbabilities_32, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes);
} else if (mNnInferenceOutputDType == 1 && mNnClusterizerElementSize > 0) {
if (mNnClusterizerModelClassNumOutputNodes > 0) {
computePointerWithAlignment(mem, mModelProbabilities_32, mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes);
}
if (!nnClusterizerUseCfRegression) {
if (nnClusterizerModelReg1NumOutputNodes > 0) {
computePointerWithAlignment(mem, outputDataReg1_32, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes);
if (!mNnClusterizerUseCfRegression) {
if (mNnClusterizerModelReg1NumOutputNodes > 0) {
computePointerWithAlignment(mem, mOutputDataReg1_32, mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes);
}
if (nnClusterizerModelReg2NumOutputNodes > 0) {
computePointerWithAlignment(mem, outputDataReg2_32, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes);
if (mNnClusterizerModelReg2NumOutputNodes > 0) {
computePointerWithAlignment(mem, mOutputDataReg2_32, mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes);
}
}
}
}
if (nnClusterizerTotalClusters > 0) {
computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters);
if (mNnClusterizerTotalClusters > 0) {
computePointerWithAlignment(mem, mOutputDataClass, mNnClusterizerTotalClusters);
}
return mem;
}

// std::vector<int32_t> GPUTPCNNClusterizer::pointerSizes() {
// std::vector<int32_t> sizes(7, -1);
// if (nnClusterizerBatchedMode > 0) {
// if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) {
// sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16
// } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) {
// sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32
// if (mNnClusterizerBatchedMode > 0) {
// if (mNnInferenceInputDType == 0 && mNnClusterizerElementSize > 0) {
// sizes[0] = mNnClusterizerBatchedMode * mNnClusterizerElementSize; // inputData16
// } else if (mNnInferenceInputDType == 1 && mNnClusterizerElementSize > 0) {
// sizes[1] = mNnClusterizerBatchedMode * mNnClusterizerElementSize; // inputData32
// }
// sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags
// if (nnClusterizerModelClassNumOutputNodes > 0) {
// sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities
// sizes[2] = 2 * mNnClusterizerBatchedMode; // mClusterFlags
// if (mNnClusterizerModelClassNumOutputNodes > 0) {
// sizes[3] = mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes; // modelProbabilities
// }
// if (!nnClusterizerUseCfRegression) {
// if (nnClusterizerModelReg1NumOutputNodes > 0) {
// sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1
// if (!mNnClusterizerUseCfRegression) {
// if (mNnClusterizerModelReg1NumOutputNodes > 0) {
// sizes[4] = mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes; // outputDataReg1
// }
// if (nnClusterizerModelReg2NumOutputNodes > 0) {
// sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2
// if (mNnClusterizerModelReg2NumOutputNodes > 0) {
// sizes[5] = mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes; // outputDataReg2
// }
// }
// }
// if (nnClusterizerTotalClusters > 0) {
// sizes[6] = nnClusterizerTotalClusters; // outputDataClass
// if (mNnClusterizerTotalClusters > 0) {
// sizes[6] = mNnClusterizerTotalClusters; // mOutputDataClass
// }
// return sizes;
// }
Expand Down
56 changes: 28 additions & 28 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,42 +37,42 @@ class GPUTPCNNClusterizer : public GPUProcessor

// Neural network clusterization

int nnClusterizerSizeInputRow = 3;
int nnClusterizerSizeInputPad = 3;
int nnClusterizerSizeInputTime = 3;
int nnClusterizerElementSize = -1;
bool nnClusterizerAddIndexData = true;
float nnClassThreshold = 0.01;
bool nnSigmoidTrafoClassThreshold = 1;
int nnClusterizerUseCfRegression = 0;
int nnClusterizerBatchedMode = 1;
int nnClusterizerTotalClusters = 1;
int nnClusterizerVerbosity = 0;
int nnClusterizerBoundaryFillValue = -1;
int nnClusterizerModelClassNumOutputNodes = -1;
int nnClusterizerModelReg1NumOutputNodes = -1;
int nnClusterizerModelReg2NumOutputNodes = -1;
int nnInferenceInputDType = 0; // 0: float16, 1: float32
int nnInferenceOutputDType = 0; // 0: float16, 1: float32
int mNnClusterizerSizeInputRow = 3;
int mNnClusterizerSizeInputPad = 3;
int mNnClusterizerSizeInputTime = 3;
int mNnClusterizerElementSize = -1;
bool mNnClusterizerAddIndexData = true;
float mNnClassThreshold = 0.01;
bool mNnSigmoidTrafoClassThreshold = 1;
int mNnClusterizerUseCfRegression = 0;
int mNnClusterizerBatchedMode = 1;
int mNnClusterizerTotalClusters = 1;
int mNnClusterizerVerbosity = 0;
int mNnClusterizerBoundaryFillValue = -1;
int mNnClusterizerModelClassNumOutputNodes = -1;
int mNnClusterizerModelReg1NumOutputNodes = -1;
int mNnClusterizerModelReg2NumOutputNodes = -1;
int mNnInferenceInputDType = 0; // 0: float16, 1: float32
int mNnInferenceOutputDType = 0; // 0: float16, 1: float32
int mISector = -1;
int deviceId = -1;
int mDeviceId = -1;

// Memory allocation for neural network

bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptr
int* outputDataClass = nullptr;
bool* mClusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptr
int* mOutputDataClass = nullptr;

// FP32
float* inputData_32 = nullptr;
float* modelProbabilities_32 = nullptr;
float* outputDataReg1_32 = nullptr;
float* outputDataReg2_32 = nullptr;
float* mInputData_32 = nullptr;
float* mModelProbabilities_32 = nullptr;
float* mOutputDataReg1_32 = nullptr;
float* mOutputDataReg2_32 = nullptr;

// FP16
OrtDataType::Float16_t* inputData_16 = nullptr;
OrtDataType::Float16_t* modelProbabilities_16 = nullptr;
OrtDataType::Float16_t* outputDataReg1_16 = nullptr;
OrtDataType::Float16_t* outputDataReg2_16 = nullptr;
OrtDataType::Float16_t* mInputData_16 = nullptr;
OrtDataType::Float16_t* mModelProbabilities_16 = nullptr;
OrtDataType::Float16_t* mOutputDataReg1_16 = nullptr;
OrtDataType::Float16_t* mOutputDataReg2_16 = nullptr;

int16_t mMemoryId = -1;
}; // class GPUTPCNNClusterizer
Expand Down
82 changes: 41 additions & 41 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
}
}

OrtOptions = {
mOrtOptions = {
{"model-path", class_model_path},
{"device-type", settings.nnInferenceDevice},
{"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)},
Expand All @@ -57,60 +57,60 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
{"logging-level", std::to_string(settings.nnInferenceVerbosity)},
{"onnx-environment-name", "c1"}};

model_class.initOptions(OrtOptions);
modelsUsed[0] = true;
mModelClass.initOptions(mOrtOptions);
mModelsUsed[0] = true;

reg_model_paths_local = o2::utils::Str::tokenize(reg_model_path, ':');

if (!settings.nnClusterizerUseCfRegression) {
if (reg_model_paths_local.size() == 1) {
OrtOptions["model-path"] = reg_model_paths_local[0];
OrtOptions["onnx-environment-name"] = "r1";
model_reg_1.initOptions(OrtOptions);
modelsUsed[1] = true;
mOrtOptions["model-path"] = reg_model_paths_local[0];
mOrtOptions["onnx-environment-name"] = "r1";
mModelReg1.initOptions(mOrtOptions);
mModelsUsed[1] = true;
} else {
OrtOptions["model-path"] = reg_model_paths_local[0];
OrtOptions["onnx-environment-name"] = "r1";
model_reg_1.initOptions(OrtOptions);
modelsUsed[1] = true;
OrtOptions["model-path"] = reg_model_paths_local[1];
OrtOptions["onnx-environment-name"] = "r2";
model_reg_2.initOptions(OrtOptions);
modelsUsed[2] = true;
mOrtOptions["model-path"] = reg_model_paths_local[0];
mOrtOptions["onnx-environment-name"] = "r1";
mModelReg1.initOptions(mOrtOptions);
mModelsUsed[1] = true;
mOrtOptions["model-path"] = reg_model_paths_local[1];
mOrtOptions["onnx-environment-name"] = "r2";
mModelReg2.initOptions(mOrtOptions);
mModelsUsed[2] = true;
}
}
}

void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN)
{
clustererNN.nnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression;
clustererNN.nnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow;
clustererNN.nnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad;
clustererNN.nnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime;
clustererNN.nnClusterizerAddIndexData = settings.nnClusterizerAddIndexData;
clustererNN.nnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0);
clustererNN.nnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
clustererNN.nnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
clustererNN.nnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
if (clustererNN.nnSigmoidTrafoClassThreshold) {
clustererNN.nnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
clustererNN.mNnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression;
clustererNN.mNnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow;
clustererNN.mNnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad;
clustererNN.mNnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime;
clustererNN.mNnClusterizerAddIndexData = settings.nnClusterizerAddIndexData;
clustererNN.mNnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0);
clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
if (clustererNN.mNnSigmoidTrafoClassThreshold) {
clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
} else {
clustererNN.nnClassThreshold = settings.nnClassThreshold;
clustererNN.mNnClassThreshold = settings.nnClassThreshold;
}
if (settings.nnClusterizerVerbosity < 0) {
clustererNN.nnClusterizerVerbosity = settings.nnInferenceVerbosity;
clustererNN.mNnClusterizerVerbosity = settings.nnInferenceVerbosity;
} else {
clustererNN.nnClusterizerVerbosity = settings.nnClusterizerVerbosity;
clustererNN.mNnClusterizerVerbosity = settings.nnClusterizerVerbosity;
}
clustererNN.nnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos;
clustererNN.nnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos;
clustererNN.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1];
clustererNN.mNnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos;
clustererNN.mNnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos;
clustererNN.mNnClusterizerModelClassNumOutputNodes = mModelClass.getNumOutputNodes()[0][1];
if (!settings.nnClusterizerUseCfRegression) {
if (model_class.getNumOutputNodes()[0][1] == 1 || !model_reg_2.isInitialized()) {
clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];
if (mModelClass.getNumOutputNodes()[0][1] == 1 || !mModelReg2.isInitialized()) {
clustererNN.mNnClusterizerModelReg1NumOutputNodes = mModelReg1.getNumOutputNodes()[0][1];
} else {
clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];
clustererNN.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1];
clustererNN.mNnClusterizerModelReg1NumOutputNodes = mModelReg1.getNumOutputNodes()[0][1];
clustererNN.mNnClusterizerModelReg2NumOutputNodes = mModelReg2.getNumOutputNodes()[0][1];
}
}
}
Expand Down Expand Up @@ -199,20 +199,20 @@ void MockedOrtAllocator::LeakCheck()

void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate)
{
mockedAlloc = std::make_shared<MockedOrtAllocator>(rec, (OrtMemoryInfo*)(*memInfo));
mMockedAlloc = std::make_shared<MockedOrtAllocator>(rec, (OrtMemoryInfo*)(*memInfo));
if (recreate) {
Ort::ThrowOnError(Ort::GetApi().UnregisterAllocator((OrtEnv*)(*env), (OrtMemoryInfo*)(*memInfo)));
}
Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc.get()));
memInfo = (Ort::MemoryInfo*)mockedAlloc->Info();
Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mMockedAlloc.get()));
memInfo = (Ort::MemoryInfo*)mMockedAlloc->Info();
}

const OrtMemoryInfo* GPUTPCNNClusterizerHost::getMockedMemoryInfo()
{
return mockedAlloc->Info();
return mMockedAlloc->Info();
}

MockedOrtAllocator* GPUTPCNNClusterizerHost::getMockedAllocator()
{
return mockedAlloc.get();
return mMockedAlloc.get();
}
Loading