Skip to content

Commit 938a1ed

Browse files
committed
Adjust for comments
1 parent a67b634 commit 938a1ed

File tree

10 files changed

+76
-78
lines changed

10 files changed

+76
-78
lines changed

Common/ML/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ o2_add_library(ML
1616

1717
# Pass ORT variables as a preprocessor definition
1818
target_compile_definitions(${targetName} PRIVATE
19-
ORT_ROCM_BUILD=$<BOOL:${ORT_ROCM_BUILD}>
20-
ORT_CUDA_BUILD=$<BOOL:${ORT_CUDA_BUILD}>
21-
ORT_MIGRAPHX_BUILD=$<BOOL:${ORT_MIGRAPHX_BUILD}>
22-
ORT_TENSORRT_BUILD=$<BOOL:${ORT_TENSORRT_BUILD}>)
19+
$<$<BOOL:${ORT_ROCM_BUILD}>:ORT_ROCM_BUILD>
20+
$<$<BOOL:${ORT_CUDA_BUILD}>:ORT_CUDA_BUILD>
21+
$<$<BOOL:${ORT_MIGRAPHX_BUILD}>:ORT_MIGRAPHX_BUILD>
22+
$<$<BOOL:${ORT_TENSORRT_BUILD}>:ORT_TENSORRT_BUILD>)

Common/ML/src/OrtInterface.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex)
144144
(pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1");
145145
(pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
146146
(pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time
147-
147+
(pImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time
148148
// Arena memory shrinkage comes at performance cost
149149
/// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0;
150150
// (pImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27
@@ -158,7 +158,7 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex)
158158
}
159159
pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault);
160160
if (loggingLevel < 2) {
161-
LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex;
161+
LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt;
162162
}
163163
}
164164
#endif

GPU/GPUTracking/Base/cuda/CMakeLists.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,9 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
122122
${CMAKE_CURRENT_SOURCE_DIR}
123123
TARGETVARNAME targetName)
124124

125-
message("Compile definitions for ONNX runtime (CUDA):")
126-
message(STATUS "ORT_ROCM_BUILD: ${ORT_ROCM_BUILD}")
127-
message(STATUS "ORT_CUDA_BUILD: ${ORT_CUDA_BUILD}")
128-
message(STATUS "ORT_MIGRAPHX_BUILD: ${ORT_MIGRAPHX_BUILD}")
129-
message(STATUS "ORT_TENSORRT_BUILD: ${ORT_TENSORRT_BUILD}")
130-
131125
target_compile_definitions(${targetName} PRIVATE
132126
GPUCA_HAS_ONNX=1
133-
ORT_ROCM_BUILD=$<BOOL:${ORT_ROCM_BUILD}>
134127
ORT_CUDA_BUILD=$<BOOL:${ORT_CUDA_BUILD}>
135-
ORT_MIGRAPHX_BUILD=$<BOOL:${ORT_MIGRAPHX_BUILD}>
136128
ORT_TENSORRT_BUILD=$<BOOL:${ORT_TENSORRT_BUILD}>)
137129

138130
install(FILES ${HDRS} DESTINATION include/GPU)

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ void GPUReconstructionCUDA::endGPUProfiling()
655655

656656
void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId)
657657
{
658-
#if defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1
658+
#ifdef ORT_CUDA_BUILD
659659
cudaGetDevice(deviceId);
660660
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
661661
CreateCUDAProviderOptions(&cuda_options);
@@ -684,14 +684,15 @@ void* GPUReconstructionHIP::getGPUPointer(void* ptr)
684684

685685
void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId)
686686
{
687-
#if defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1
687+
#ifdef ORT_ROCM_BUILD
688688
// Create ROCm provider options
689689
cudaGetDevice(deviceId);
690690
// const auto& api = Ort::GetApi();
691691
// api.GetCurrentGpuDeviceId(deviceId);
692692
OrtROCMProviderOptions rocm_options;
693693
rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream
694694
rocm_options.arena_extend_strategy = 0; // kNextPowerOfTwo = 0, kSameAsRequested = 1 -> https://github.com/search?q=repo%3Amicrosoft%2Fonnxruntime%20kSameAsRequested&type=code
695+
// rocm_options.gpu_mem_limit = 1073741824; // 0 means no limit
695696
rocm_options.user_compute_stream = mInternals->Streams[stream];
696697
session_options.AppendExecutionProvider_ROCM(rocm_options);
697698
#endif // ORT_ROCM_BUILD

GPU/GPUTracking/Base/hip/CMakeLists.txt

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -170,18 +170,10 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
170170
${GPUCA_HIP_SOURCE_DIR}
171171
TARGETVARNAME targetName)
172172

173-
message("Compile definitions for ONNX runtime (HIP / ROCM):")
174-
message(STATUS "ORT_ROCM_BUILD: ${ORT_ROCM_BUILD}")
175-
message(STATUS "ORT_CUDA_BUILD: ${ORT_CUDA_BUILD}")
176-
message(STATUS "ORT_MIGRAPHX_BUILD: ${ORT_MIGRAPHX_BUILD}")
177-
message(STATUS "ORT_TENSORRT_BUILD: ${ORT_TENSORRT_BUILD}")
178-
179173
target_compile_definitions(${targetName} PRIVATE
180174
GPUCA_HAS_ONNX=1
181175
ORT_ROCM_BUILD=$<BOOL:${ORT_ROCM_BUILD}>
182-
ORT_CUDA_BUILD=$<BOOL:${ORT_CUDA_BUILD}>
183-
ORT_MIGRAPHX_BUILD=$<BOOL:${ORT_MIGRAPHX_BUILD}>
184-
ORT_TENSORRT_BUILD=$<BOOL:${ORT_TENSORRT_BUILD}>)
176+
ORT_MIGRAPHX_BUILD=$<BOOL:${ORT_MIGRAPHX_BUILD}>)
185177

186178
install(FILES ${HDRS} DESTINATION include/GPU)
187179

GPU/GPUTracking/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,8 +349,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
349349
${targetName}
350350
PRIVATE $<TARGET_PROPERTY:O2::Framework,INTERFACE_INCLUDE_DIRECTORIES>)
351351

352-
target_compile_definitions(${targetName} PRIVATE
353-
GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX=1)
352+
target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX=1)
354353

355354
o2_target_root_dictionary(${MODULE}
356355
HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL}

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
#ifdef GPUCA_HAS_ONNX
4343
#include "GPUTPCNNClusterizerKernels.h"
4444
#include "GPUTPCNNClusterizerHost.h"
45-
// #include "ML/3rdparty/GPUORTFloat16.h"
4645
#endif
4746

4847
using namespace o2::gpu;
@@ -628,6 +627,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
628627
int32_t deviceId = -1;
629628
int32_t numLanes = GetProcessingSettings().nTPCClustererLanes;
630629
int32_t maxThreads = mRec->getNKernelHostThreads(true);
630+
// bool recreateMemoryAllocator = false;
631631
mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) {
632632
nnApplications[lane].init(nn_settings);
633633
if (nnApplications[lane].modelsUsed[0]) {
@@ -637,7 +637,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
637637
nnApplications[lane].model_class.setIntraOpNumThreads(maxThreads);
638638
}
639639
(nnApplications[lane].model_class).initEnvironment();
640-
// nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, 0);
640+
// Registering this once seems to be enough, even with different environmnents / models. ONNX apparently uses this per device and stores the OrtAllocator internally. All models will then use the volatile allocation.
641+
// But environment must be valid, so we init the model environment first and use it here afterwards.
642+
// Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator.
643+
// TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor
644+
// nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator);
645+
// recreateMemoryAllocator = true;
641646
(nnApplications[lane].model_class).initSession();
642647
}
643648
if (nnApplications[lane].modelsUsed[1]) {
@@ -648,7 +653,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
648653
}
649654
// (nnApplications[lane].model_reg_1).setEnv((nnApplications[lane].model_class).getEnv());
650655
(nnApplications[lane].model_reg_1).initEnvironment();
651-
// nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_reg_1).getEnv(), (nnApplications[lane].model_reg_1).getMemoryInfo(), mRec, 1);
656+
// nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_reg_1).getEnv(), (nnApplications[lane].model_reg_1).getMemoryInfo(), mRec, recreateMemoryAllocator);
652657
(nnApplications[lane].model_reg_1).initSession();
653658
}
654659
if (nnApplications[lane].modelsUsed[2]) {
@@ -657,9 +662,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
657662
if (nnApplications[lane].model_reg_2.getIntraOpNumThreads() > maxThreads) {
658663
nnApplications[lane].model_reg_2.setIntraOpNumThreads(maxThreads);
659664
}
660-
// (nnApplications[lane].model_reg_2).setEnv((nnApplications[lane].model_class).getEnv());
661665
(nnApplications[lane].model_reg_2).initEnvironment();
662-
// nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_reg_2).getEnv(), (nnApplications[lane].model_reg_2).getMemoryInfo(), mRec, 2);
666+
// nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator);
663667
(nnApplications[lane].model_reg_2).initSession();
664668
}
665669
if (nn_settings.nnClusterizerVerbosity < 3) {
@@ -685,6 +689,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
685689
if (doGPU) {
686690
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
687691
}
692+
LOG(info) << "Size of nnApplications[lane]: " << sizeof(nnApplications[0]) << " bytes";
693+
LOG(info) << "Size of nnApplications: " << sizeof(GPUTPCNNClusterizerHost) * GetProcessingSettings().nTPCClustererLanes << " bytes";
688694
}
689695
#endif
690696

@@ -966,8 +972,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
966972

967973
auto start0 = std::chrono::high_resolution_clock::now();
968974
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNSingleElement>({GetGrid(iSize * clustererNNShadow.nnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data
969-
// auto stop0 = std::chrono::high_resolution_clock::now();
970975

976+
// auto stop0 = std::chrono::high_resolution_clock::now();
971977
// auto start1 = std::chrono::high_resolution_clock::now();
972978

973979
// NN evaluations
@@ -1048,12 +1054,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10481054
// time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
10491055
// time_fill += std::chrono::duration_cast<std::chrono::nanoseconds>(stop0 - start0).count() / 1e9;
10501056
}
1051-
// if (clustererNNShadow.nnClusterizerUseCfRegression) {
1052-
// auto start1 = std::chrono::high_resolution_clock::now();
1053-
// runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
1054-
// auto stop1 = std::chrono::high_resolution_clock::now();
1055-
// time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
1056-
// }
1057+
if (clustererNNShadow.nnClusterizerUseCfRegression) {
1058+
// auto start1 = std::chrono::high_resolution_clock::now();
1059+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
1060+
// auto stop1 = std::chrono::high_resolution_clock::now();
1061+
// time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
1062+
}
10571063
// if (clustererNNShadow.nnClusterizerVerbosity < 3) {
10581064
// int acceptedClusters = 0;
10591065
// for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
5454
{"enable-optimizations", std::to_string(settings.nnInferenceEnableOrtOptimization)},
5555
{"enable-profiling", std::to_string(settings.nnInferenceOrtProfiling)},
5656
{"profiling-output-path", settings.nnInferenceOrtProfilingPath},
57-
{"logging-level", std::to_string(settings.nnInferenceVerbosity)}};
57+
{"logging-level", std::to_string(settings.nnInferenceVerbosity)},
58+
{"onnx-environment-name", "c1"}};
5859

5960
model_class.initOptions(OrtOptions);
6061
modelsUsed[0] = true;
@@ -64,13 +65,16 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
6465
if (!settings.nnClusterizerUseCfRegression) {
6566
if (reg_model_paths.size() == 1) {
6667
OrtOptions["model-path"] = reg_model_paths[0];
68+
OrtOptions["onnx-environment-name"] = "r1";
6769
model_reg_1.initOptions(OrtOptions);
6870
modelsUsed[1] = true;
6971
} else {
7072
OrtOptions["model-path"] = reg_model_paths[0];
73+
OrtOptions["onnx-environment-name"] = "r1";
7174
model_reg_1.initOptions(OrtOptions);
7275
modelsUsed[1] = true;
7376
OrtOptions["model-path"] = reg_model_paths[1];
77+
OrtOptions["onnx-environment-name"] = "r2";
7478
model_reg_2.initOptions(OrtOptions);
7579
modelsUsed[2] = true;
7680
}
@@ -154,16 +158,19 @@ MockedOrtAllocator::~MockedOrtAllocator()
154158

155159
void* MockedOrtAllocator::Alloc(size_t size)
156160
{
161+
// LOG(info) << "(ORT) Allocating volatile memory of size " << size << " bytes";
157162
return rec->AllocateVolatileDeviceMemory(size);
158163
}
159164

160165
void* MockedOrtAllocator::Reserve(size_t size)
161166
{
167+
// LOG(info) << "(ORT) Reserving volatile memory of size " << size << " bytes";
162168
return rec->AllocateVolatileDeviceMemory(size);
163169
}
164170

165171
void MockedOrtAllocator::Free(void* p)
166172
{
173+
// LOG(info) << "(ORT) Freeing volatile memory " << p;
167174
rec->ReturnVolatileDeviceMemory();
168175
}
169176

@@ -188,21 +195,20 @@ void MockedOrtAllocator::LeakCheck()
188195
LOG(warning) << "memory leak!!!";
189196
}
190197

191-
void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, int32_t chooseMockedAlloc)
198+
void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate)
192199
{
193-
if (chooseMockedAlloc == 0) {
194-
mockedAlloc_class = std::make_shared<MockedOrtAllocator>(rec, (OrtMemoryInfo*)memInfo);
195-
Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc_class.get()));
196-
LOG(info) << "(ORT) Mocked ORT allocator for classification network registered";
197-
} else if (chooseMockedAlloc == 1) {
198-
mockedAlloc_reg_1 = std::make_shared<MockedOrtAllocator>(rec, (OrtMemoryInfo*)memInfo);
199-
Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc_reg_1.get()));
200-
LOG(info) << "(ORT) Mocked ORT allocator for regression network (class 1) registered";
201-
} else if (chooseMockedAlloc == 2) {
202-
mockedAlloc_reg_2 = std::make_shared<MockedOrtAllocator>(rec, (OrtMemoryInfo*)memInfo);
203-
Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc_reg_2.get()));
204-
LOG(info) << "(ORT) Mocked ORT allocator for regression network (class 2) registered";
205-
} else {
206-
LOG(fatal) << "Invalid choice for mocked allocator";
200+
mockedAlloc = std::make_shared<MockedOrtAllocator>(rec, (OrtMemoryInfo*)(*memInfo));
201+
if (recreate) {
202+
Ort::ThrowOnError(Ort::GetApi().UnregisterAllocator((OrtEnv*)(*env), (OrtMemoryInfo*)(*memInfo)));
207203
}
204+
Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc.get()));
205+
memInfo = (Ort::MemoryInfo*)mockedAlloc->Info();
206+
}
207+
208+
const OrtMemoryInfo* GPUTPCNNClusterizerHost::getMockedMemoryInfo() {
209+
return mockedAlloc->Info();
210+
}
211+
212+
MockedOrtAllocator* GPUTPCNNClusterizerHost::getMockedAllocator() {
213+
return mockedAlloc.get();
208214
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222

2323
using namespace o2::ml;
2424

25-
struct OrtAllocator;
26-
struct OrtMemoryInfo;
25+
class OrtMemoryInfo;
26+
class OrtAllocator;
2727
struct MockedOrtAllocator;
2828
namespace Ort
2929
{
@@ -53,15 +53,17 @@ class GPUTPCNNClusterizerHost
5353
void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&);
5454

5555
// ONNX
56-
void volatileOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, int32_t = 0);
56+
void volatileOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false);
57+
MockedOrtAllocator* getMockedAllocator();
58+
const OrtMemoryInfo* getMockedMemoryInfo();
5759

5860
std::unordered_map<std::string, std::string> OrtOptions;
5961
o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
6062
std::vector<bool> modelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2
6163
int32_t deviceId = -1;
6264
std::vector<std::string> reg_model_paths;
6365

64-
std::shared_ptr<MockedOrtAllocator> mockedAlloc_class = nullptr, mockedAlloc_reg_1 = nullptr, mockedAlloc_reg_2 = nullptr;
66+
std::shared_ptr<MockedOrtAllocator> mockedAlloc = nullptr;
6567
}; // class GPUTPCNNClusterizerHost
6668

6769
} // namespace o2::gpu

0 commit comments

Comments
 (0)