ML: Fix compiler warnings

davidrohr · davidrohr · commit 83c6a8a500f4 · 2025-04-22T22:20:14.000+02:00
diff --git a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h
@@ -535,22 +535,22 @@ GPUdi() uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
     result = kPositiveQNaNBits;
   } else {
     auto get_msb_half = [](float fl) {
-      uint16_t result;
+      uint16_t res;
 #ifdef GPUCA_GPUCODE
-      o2::gpu::CAMath::memcpy(&result, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
+      o2::gpu::CAMath::memcpy(&res, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
 #else
 #ifdef __cpp_if_constexpr
       if constexpr (detail::endian::native == detail::endian::little)
 #else
       if (detail::endian::native == detail::endian::little)
 #endif
       {
-        std::memcpy(&result, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
+        std::memcpy(&res, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
       } else {
-        std::memcpy(&result, &fl, sizeof(uint16_t));
+        std::memcpy(&res, &fl, sizeof(uint16_t));
       }
 #endif
-      return result;
+      return res;
     };
 
     uint16_t upper_bits = get_msb_half(v);
diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h
@@ -22,6 +22,7 @@
 #include <memory>
 #include <map>
 #include <thread>
+#include <unordered_map>
 
 // O2 includes
 #include "Framework/Logger.h"
diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx
@@ -19,6 +19,8 @@
 // ONNX includes
 #include <onnxruntime_cxx_api.h>
 
+#include <sstream>
+
 namespace o2
 {
 
@@ -139,7 +141,6 @@ void OrtModel::initSession()
 
 void OrtModel::memoryOnDevice(int32_t deviceIndex)
 {
-#if (defined(ORT_ROCM_BUILD) || defined(ORT_MIGRAPHX_BUILD) || defined(ORT_CUDA_BUILD) || defined(ORT_TENSORRT_BUILD))
   if (deviceIndex >= 0) {
     (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1");
     (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -161,7 +162,6 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex)
       LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt;
     }
   }
-#endif
 }
 
 void OrtModel::resetSession()
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
@@ -980,12 +980,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
             DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
           }
 
-          float time_clusterizer = 0, time_fill = 0, time_networks = 0;
+          // float time_clusterizer = 0, time_fill = 0, time_networks = 0;
           for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.nnClusterizerBatchedMode); batch++) {
             uint batchStart = batch * clustererNNShadow.nnClusterizerBatchedMode;
             size_t iSize = CAMath::Min((uint)clustererNNShadow.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
 
-            auto start0 = std::chrono::high_resolution_clock::now();
+            // auto start0 = std::chrono::high_resolution_clock::now();
             runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNSingleElement>({GetGrid(iSize * clustererNNShadow.nnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data
 
             // auto stop0 = std::chrono::high_resolution_clock::now();
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx
@@ -29,7 +29,7 @@ using namespace o2::gpu;
 void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& settings)
 {
   std::string class_model_path = settings.nnClassificationPath, reg_model_path = settings.nnRegressionPath;
-  std::vector<std::string> reg_model_paths;
+  std::vector<std::string> reg_model_paths_local;
   std::vector<std::string> evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':');
 
   if (settings.nnLoadFromCCDB) {
@@ -60,20 +60,20 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
   model_class.initOptions(OrtOptions);
   modelsUsed[0] = true;
 
-  reg_model_paths = o2::utils::Str::tokenize(reg_model_path, ':');
+  reg_model_paths_local = o2::utils::Str::tokenize(reg_model_path, ':');
 
   if (!settings.nnClusterizerUseCfRegression) {
-    if (reg_model_paths.size() == 1) {
-      OrtOptions["model-path"] = reg_model_paths[0];
+    if (reg_model_paths_local.size() == 1) {
+      OrtOptions["model-path"] = reg_model_paths_local[0];
       OrtOptions["onnx-environment-name"] = "r1";
       model_reg_1.initOptions(OrtOptions);
       modelsUsed[1] = true;
     } else {
-      OrtOptions["model-path"] = reg_model_paths[0];
+      OrtOptions["model-path"] = reg_model_paths_local[0];
       OrtOptions["onnx-environment-name"] = "r1";
       model_reg_1.initOptions(OrtOptions);
       modelsUsed[1] = true;
-      OrtOptions["model-path"] = reg_model_paths[1];
+      OrtOptions["model-path"] = reg_model_paths_local[1];
       OrtOptions["onnx-environment-name"] = "r2";
       model_reg_2.initOptions(OrtOptions);
       modelsUsed[2] = true;
@@ -154,6 +154,7 @@ MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info
 MockedOrtAllocator::~MockedOrtAllocator()
 {
   // Ort::GetApi().ReleaseMemoryInfo(memory_info);
+  (void)0; // Suppress warning for empty destructor
 }
 
 void* MockedOrtAllocator::Alloc(size_t size)
@@ -191,8 +192,9 @@ size_t MockedOrtAllocator::NumReserveAllocations() const
 
 void MockedOrtAllocator::LeakCheck()
 {
-  if (memory_inuse.load())
+  if (memory_inuse.load()) {
     LOG(warning) << "memory leak!!!";
+  }
 }
 
 void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate)
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx
@@ -124,7 +124,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
   CfChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart];
   int row = static_cast<int>(peak.row()), pad = static_cast<int>(peak.pad());
 
-  if (clustererNN.nnClusterizerAddIndexData && transient_index == (clustererNN.nnClusterizerElementSize - 1)) {
+  if (clustererNN.nnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.nnClusterizerElementSize - 1)) {
     uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize;
     for (uint16_t i = 0; i < 8; i++) {
       Delta2 d = cfconsts::InnerNeighbors[i];
@@ -141,7 +141,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
       clustererNN.inputData_32[top_idx - 2] = row / 152.f;
       clustererNN.inputData_32[top_idx - 1] = static_cast<float>(pad) / GPUTPCGeometry::NPads(row);
     }
-  } else if (transient_index < (clustererNN.nnClusterizerElementSize - 3)) {
+  } else if ((int32_t)transient_index < (clustererNN.nnClusterizerElementSize - 3)) {
     int time = static_cast<int>(peak.time());
     int r = CAMath::Floor(transient_index / ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1))) - clustererNN.nnClusterizerSizeInputRow;
     bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
@@ -197,7 +197,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
   uint elem_iterator = glo_idx * clustererNN.nnClusterizerModelClassNumOutputNodes;
   float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
   uint class_label = 0;
-  for (int pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.nnClusterizerModelClassNumOutputNodes; pIdx++) {
+  for (uint pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.nnClusterizerModelClassNumOutputNodes; pIdx++) {
     if (pIdx == elem_iterator) {
       if (dtype == 0) {
         current_max_prob = static_cast<float>(clustererNN.modelProbabilities_16[pIdx]);

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,8 @@`
`19`	`19`	`// ONNX includes`
`20`	`20`	`#include <onnxruntime_cxx_api.h>`
`21`	`21`
	`22`	`+#include <sstream>`
	`23`	`+`
`22`	`24`	`namespace o2`
`23`	`25`	`{`
`24`	`26`
`@@ -139,7 +141,6 @@ void OrtModel::initSession()`
`139`	`141`
`140`	`142`	`void OrtModel::memoryOnDevice(int32_t deviceIndex)`
`141`	`143`	`{`
`142`		`-#if (defined(ORT_ROCM_BUILD) \|\| defined(ORT_MIGRAPHX_BUILD) \|\| defined(ORT_CUDA_BUILD) \|\| defined(ORT_TENSORRT_BUILD))`
`143`	`144`	`if (deviceIndex >= 0) {`
`144`	`145`	`(pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1");`
`145`	`146`	`(pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h`
`@@ -161,7 +162,6 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex)`
`161`	`162`	`LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt;`
`162`	`163`	`}`
`163`	`164`	`}`
`164`		`-#endif`
`165`	`165`	`}`
`166`	`166`
`167`	`167`	`void OrtModel::resetSession()`