ChSonnabend · ChSonnabend · Sep 6, 2025 · Sep 6, 2025
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx
@@ -69,10 +69,10 @@ void* GPUTPCNNClusterizer::setIOPointers(void* mem)
   if (mNnClusterizerVerbosity > 2) {
     if (mNnClusterizerVerbosity > 3) {
       auto fmt = [](size_t bytes) {
-      std::ostringstream os;
-      double mb = bytes / (1024.0 * 1024.0);
-      os << bytes << " bytes (" << std::fixed << std::setprecision(3) << mb << " MB)";
-      return os.str();
+        std::ostringstream os;
+        double mb = bytes / (1024.0 * 1024.0);
+        os << bytes << " bytes (" << std::fixed << std::setprecision(3) << mb << " MB)";
+        return os.str();
       };
 
       // Element counts (number of array entries, not bytes)
@@ -101,35 +101,35 @@ void* GPUTPCNNClusterizer::setIOPointers(void* mem)
 
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") Pointers set for clusterizer with memoryID " << mMemoryId << " deviceID " << mDeviceId << " and sector " << mISector;
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataClass pointer: " << mOutputDataClass
-          << " | elements=" << elemsOutputDataClass << " (= mNnClusterizerTotalClusters)"
-          << " | " << fmt(szOutputDataClass);
+                << " | elements=" << elemsOutputDataClass << " (= mNnClusterizerTotalClusters)"
+                << " | " << fmt(szOutputDataClass);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mClusterFlags pointer: " << static_cast<const void*>(mClusterFlags)
-          << " | elements=" << elemsClusterFlags << " (= 2 * mNnClusterizerBatchedMode)"
-          << " | " << fmt(szClusterFlags);
+                << " | elements=" << elemsClusterFlags << " (= 2 * mNnClusterizerBatchedMode)"
+                << " | " << fmt(szClusterFlags);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mInputData_16 pointer: " << mInputData_16
-          << " | elements=" << elemsInput16 << " (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)"
-          << " | " << fmt(szInput16);
+                << " | elements=" << elemsInput16 << " (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)"
+                << " | " << fmt(szInput16);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mModelProbabilities_16 pointer: " << mModelProbabilities_16
-          << " | elements=" << elemsProb16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)"
-          << " | " << fmt(szProb16);
+                << " | elements=" << elemsProb16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)"
+                << " | " << fmt(szProb16);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataReg1_16 pointer: " << mOutputDataReg1_16
-          << " | elements=" << elemsReg1_16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)"
-          << " | " << fmt(szReg1_16);
+                << " | elements=" << elemsReg1_16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)"
+                << " | " << fmt(szReg1_16);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataReg2_16 pointer: " << mOutputDataReg2_16
-          << " | elements=" << elemsReg2_16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)"
-          << " | " << fmt(szReg2_16);
+                << " | elements=" << elemsReg2_16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)"
+                << " | " << fmt(szReg2_16);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mInputData_32 pointer: " << mInputData_32
-          << " | elements=" << elemsInput32 << " (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)"
-          << " | " << fmt(szInput32);
+                << " | elements=" << elemsInput32 << " (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)"
+                << " | " << fmt(szInput32);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mModelProbabilities_32 pointer: " << mModelProbabilities_32
-          << " | elements=" << elemsProb32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)"
-          << " | " << fmt(szProb32);
+                << " | elements=" << elemsProb32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)"
+                << " | " << fmt(szProb32);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataReg1_32 pointer: " << mOutputDataReg1_32
-          << " | elements=" << elemsReg1_32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)"
-          << " | " << fmt(szReg1_32);
+                << " | elements=" << elemsReg1_32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)"
+                << " | " << fmt(szReg1_32);
       LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataReg2_32 pointer: " << mOutputDataReg2_32
-          << " | elements=" << elemsReg2_32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)"
-          << " | " << fmt(szReg2_32);
+                << " | elements=" << elemsReg2_32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)"
+                << " | " << fmt(szReg2_32);
     }
     // Compute allocated bytes (difference between advanced pointer and start pointer)
     size_t allocatedBytes = static_cast<size_t>(reinterpret_cast<uintptr_t>(mem) - reinterpret_cast<uintptr_t>(startMem));

diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx
@@ -275,7 +275,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
   if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters || glo_idx >= clustererNN.mNnClusterizerBatchedMode) {
     return;
   }
-  if(clustererNN.mNnClusterizerUseClassification) {
+  if (clustererNN.mNnClusterizerUseClassification) {
     if (dtype == 0) {
       clustererNN.mOutputDataClass[glo_idx + batchStart] = (int32_t)((clustererNN.mModelProbabilities_16[glo_idx]).ToFloat() > clustererNN.mNnClassThreshold);
     } else if (dtype == 1) {
@@ -295,7 +295,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
   if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters || glo_idx >= clustererNN.mNnClusterizerBatchedMode) {
     return;
   }
-  if(clustererNN.mNnClusterizerUseClassification) {
+  if (clustererNN.mNnClusterizerUseClassification) {
     uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
     float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
     uint32_t class_label = 0;
@@ -401,20 +401,20 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
 
   if (dtype == 0) {
     pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
-                static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
-                notSinglePad ? clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat() : 0.f,
-                (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
-                notSingleTime ? clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat() : 0.f,
-                clustererNN.mClusterFlags[2 * glo_idx],
-                clustererNN.mClusterFlags[2 * glo_idx + 1]);
+               static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
+               notSinglePad ? clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat() : 0.f,
+               (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
+               notSingleTime ? clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat() : 0.f,
+               clustererNN.mClusterFlags[2 * glo_idx],
+               clustererNN.mClusterFlags[2 * glo_idx + 1]);
   } else if (dtype == 1) {
     pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
-                static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
-                notSinglePad ? clustererNN.mOutputDataReg1_32[model_output_index + 2] : 0.f,
-                (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
-                notSingleTime ? clustererNN.mOutputDataReg1_32[model_output_index + 3] : 0.f,
-                clustererNN.mClusterFlags[2 * glo_idx],
-                clustererNN.mClusterFlags[2 * glo_idx + 1]);
+               static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
+               notSinglePad ? clustererNN.mOutputDataReg1_32[model_output_index + 2] : 0.f,
+               (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
+               notSingleTime ? clustererNN.mOutputDataReg1_32[model_output_index + 3] : 0.f,
+               clustererNN.mClusterFlags[2 * glo_idx],
+               clustererNN.mClusterFlags[2 * glo_idx + 1]);
   }
 
   tpc::ClusterNative myCluster;
@@ -511,20 +511,20 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
   // Cluster 1
   if (dtype == 0) {
     pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
-                static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
-                clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
-                (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
-                clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
-                clustererNN.mClusterFlags[2 * glo_idx],
-                clustererNN.mClusterFlags[2 * glo_idx + 1]);
+               static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
+               clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
+               (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
+               clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
+               clustererNN.mClusterFlags[2 * glo_idx],
+               clustererNN.mClusterFlags[2 * glo_idx + 1]);
   } else if (dtype == 1) {
     pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
-                static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
-                clustererNN.mOutputDataReg2_32[model_output_index + 4],
-                (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
-                clustererNN.mOutputDataReg2_32[model_output_index + 6],
-                clustererNN.mClusterFlags[2 * glo_idx],
-                clustererNN.mClusterFlags[2 * glo_idx + 1]);
+               static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
+               clustererNN.mOutputDataReg2_32[model_output_index + 4],
+               (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
+               clustererNN.mOutputDataReg2_32[model_output_index + 6],
+               clustererNN.mClusterFlags[2 * glo_idx],
+               clustererNN.mClusterFlags[2 * glo_idx + 1]);
   }
 
   tpc::ClusterNative myCluster;
@@ -559,20 +559,20 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
   // Cluster 2
   if (dtype == 0) {
     pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
-                static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
-                clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
-                (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
-                clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
-                clustererNN.mClusterFlags[2 * glo_idx],
-                clustererNN.mClusterFlags[2 * glo_idx + 1]);
+               static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
+               clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
+               (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
+               clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
+               clustererNN.mClusterFlags[2 * glo_idx],
+               clustererNN.mClusterFlags[2 * glo_idx + 1]);
   } else if (dtype == 1) {
     pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
-                static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
-                clustererNN.mOutputDataReg2_32[model_output_index + 5],
-                (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
-                clustererNN.mOutputDataReg2_32[model_output_index + 7],
-                clustererNN.mClusterFlags[2 * glo_idx],
-                clustererNN.mClusterFlags[2 * glo_idx + 1]);
+               static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
+               clustererNN.mOutputDataReg2_32[model_output_index + 5],
+               (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
+               clustererNN.mOutputDataReg2_32[model_output_index + 7],
+               clustererNN.mClusterFlags[2 * glo_idx],
+               clustererNN.mClusterFlags[2 * glo_idx + 1]);
   }
 
   rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);