ChSonnabend · ChSonnabend · Apr 16, 2025 · Apr 16, 2025
diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx
@@ -143,7 +143,7 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex)
   if (deviceIndex >= 0) {
     (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1");
     (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
-    (pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time
+    (pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1");                    // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time
 
     // Arena memory shrinkage comes at performance cost
     /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0;

diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
@@ -699,7 +699,7 @@ void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options
   // api.GetCurrentGpuDeviceId(deviceId);
   OrtROCMProviderOptions rocm_options;
   rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream
-  rocm_options.arena_extend_strategy = 0; // kNextPowerOfTwo = 0, kSameAsRequested = 1 -> https://github.com/search?q=repo%3Amicrosoft%2Fonnxruntime%20kSameAsRequested&type=code
+  rocm_options.arena_extend_strategy = 0;   // kNextPowerOfTwo = 0, kSameAsRequested = 1 -> https://github.com/search?q=repo%3Amicrosoft%2Fonnxruntime%20kSameAsRequested&type=code
   rocm_options.user_compute_stream = mInternals->Streams[stream];
   session_options.AppendExecutionProvider_ROCM(rocm_options);
 #endif // ORT_ROCM_BUILD

diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx
@@ -125,7 +125,7 @@ struct MockedOrtAllocator : OrtAllocator {
 
   void LeakCheck();
 
-private:
+ private:
   MockedOrtAllocator(const MockedOrtAllocator&) = delete;
   MockedOrtAllocator& operator=(const MockedOrtAllocator&) = delete;
 
@@ -136,7 +136,8 @@ struct MockedOrtAllocator : OrtAllocator {
   GPUReconstruction* rec;
 };
 
-MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info) {
+MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info)
+{
   OrtAllocator::version = ORT_API_VERSION;
   OrtAllocator::Alloc = [](OrtAllocator* this_, size_t size) { return static_cast<MockedOrtAllocator*>(this_)->Alloc(size); };
   OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast<MockedOrtAllocator*>(this_)->Free(p); };
@@ -146,42 +147,50 @@ MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info
   memory_info = info;
 }
 
-MockedOrtAllocator::~MockedOrtAllocator() {
+MockedOrtAllocator::~MockedOrtAllocator()
+{
   // Ort::GetApi().ReleaseMemoryInfo(memory_info);
 }
 
-void* MockedOrtAllocator::Alloc(size_t size) {
+void* MockedOrtAllocator::Alloc(size_t size)
+{
   return rec->AllocateVolatileDeviceMemory(size);
 }
 
-void* MockedOrtAllocator::Reserve(size_t size) {
+void* MockedOrtAllocator::Reserve(size_t size)
+{
   return rec->AllocateVolatileDeviceMemory(size);
 }
 
-void MockedOrtAllocator::Free(void* p) {
+void MockedOrtAllocator::Free(void* p)
+{
   rec->ReturnVolatileDeviceMemory();
 }
 
-const OrtMemoryInfo* MockedOrtAllocator::Info() const {
+const OrtMemoryInfo* MockedOrtAllocator::Info() const
+{
   return memory_info;
 }
 
-size_t MockedOrtAllocator::NumAllocations() const {
+size_t MockedOrtAllocator::NumAllocations() const
+{
   return num_allocations.load();
 }
 
-size_t MockedOrtAllocator::NumReserveAllocations() const {
+size_t MockedOrtAllocator::NumReserveAllocations() const
+{
   return num_reserve_allocations.load();
 }
 
-void MockedOrtAllocator::LeakCheck() {
+void MockedOrtAllocator::LeakCheck()
+{
   if (memory_inuse.load())
     LOG(warning) << "memory leak!!!";
 }
 
 void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, int32_t chooseMockedAlloc)
 {
-  if(chooseMockedAlloc == 0) {
+  if (chooseMockedAlloc == 0) {
     mockedAlloc_class = std::make_shared<MockedOrtAllocator>(rec, (OrtMemoryInfo*)memInfo);
     Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc_class.get());
     LOG(info) << "(ORT) Mocked ORT allocator for classification network registered";