Bug-fixes

ChSonnabend · ChSonnabend · commit 381955a57f5c · 2025-04-03T15:17:13.000+02:00
diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx
@@ -246,9 +246,9 @@ void OrtModel::inference(I* input, size_t input_size, O* output)
   std::vector<int64_t> outputShape{input_size, mOutputShapes[0][1]};
   Ort::Value outputTensor = Ort::Value(nullptr);
   if constexpr (std::is_same_v<O, OrtDataType::Float16_t>) {
-    Ort::Value outputTensor = Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
+    outputTensor = Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
   } else {
-    Ort::Value outputTensor = Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
+    outputTensor = Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
   }
 
   (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size());
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
@@ -611,6 +611,14 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
     RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline)
   }
 
+  if (doGPU && mIOPtrs.tpcZS) {
+    processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta;
+    WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1);
+  }
+  if (doGPU) {
+    WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
+  }
+
 #ifdef GPUCA_HAS_ONNX
   const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
   GPUTPCNNClusterizerHost nnApplications[GetProcessingSettings().nTPCClustererLanes];
@@ -624,9 +632,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
     }
     mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) {
       nnApplications[lane].init(nn_settings);
-      GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane];
-      GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN;
-
       if (nnApplications[lane].modelsUsed[0]) {
         SetONNXGPUStream((nnApplications[lane].model_class).getSessionOptions(), lane, &deviceId);
         (nnApplications[lane].model_class).setDeviceId(deviceId);
@@ -642,43 +647,32 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
         (nnApplications[lane].model_reg_2).setDeviceId(deviceId);
         (nnApplications[lane].model_reg_2).initEnvironment();
       }
-      if (clustererNNShadow.nnClusterizerVerbosity < 3) {
+      if (nn_settings.nnClusterizerVerbosity < 3) {
         LOG(info) << "Allocated ONNX stream for lane " << lane << " and device " << deviceId;
       }
     });
     mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t sector) {
       GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector];
       GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN;
       int32_t lane = sector % numLanes;
+      clustererNN.deviceId = deviceId;
+      clustererNN.mISector = sector;
+      clustererNN.nnClusterizerTotalClusters = maxClusters;
+      nnApplications[lane].initClusterizer(nn_settings, clustererNN);
       if (doGPU){
         clustererNNShadow.deviceId = deviceId;
         clustererNNShadow.mISector = sector;
         clustererNNShadow.nnClusterizerTotalClusters = maxClusters;
         nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow);
-      } else {
-        // TODO: not sure if this part is needed at all
-        clustererNN.deviceId = deviceId;
-        clustererNN.mISector = sector;
-        clustererNN.nnClusterizerTotalClusters = maxClusters;
-        nnApplications[lane].initClusterizer(nn_settings, clustererNN);
       }
       AllocateRegisteredMemory(clustererNN.mMemoryId);
-      if (doGPU){
-        WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&clustererNN - (char*)processors(), &clustererNNShadow, sizeof(clustererNN), lane);
-        TransferMemoryResourcesToGPU(RecoStep::TPCClusterFinding, &clustererNNShadow, lane);
-      }
     });
+    if (doGPU){
+      WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer)*NSECTORS, mRec->NStreams() - 1, &mEvents->init);
+    }
   }
 #endif
 
-  if (doGPU && mIOPtrs.tpcZS) {
-    processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta;
-    WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1);
-  }
-  if (doGPU) {
-    WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
-  }
-
   size_t nClsTotal = 0;
   ClusterNativeAccess* tmpNativeAccess = mClusterNativeAccess.get();
   ClusterNative* tmpNativeClusters = nullptr;
@@ -961,7 +955,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
             auto stop0 = std::chrono::high_resolution_clock::now();
             auto start1 = std::chrono::high_resolution_clock::now();
 
-            // nnApplication.networkInference(nnApplication.model_class, clustererNNShadow, iSize, clustererNNShadow.modelProbabilities, clustererNNShadow.nnInferenceInputDType);
             if (clustererNNShadow.nnInferenceInputDType == 0) {
               if (clustererNNShadow.nnInferenceOutputDType == 0) {
                 (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_16);
@@ -975,6 +968,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
                 (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_32);
               }
             }
+
             if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) {
               runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels
             } else {
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx
@@ -64,32 +64,32 @@ void* GPUTPCNNClusterizer::setIOPointers(void* mem)
   return mem;
 }
 
-std::vector<int32_t> GPUTPCNNClusterizer::pointerSizes() {
-  std::vector<int32_t> sizes(7, -1);
-  if (nnClusterizerBatchedMode > 0) {
-    if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) {
-      sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16
-    } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) {
-      sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32
-    }
-    sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags
-    if (nnClusterizerModelClassNumOutputNodes > 0) {
-      sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities
-    }
-    if (!nnClusterizerUseCfRegression) {
-      if (nnClusterizerModelReg1NumOutputNodes > 0) {
-        sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1
-      }
-      if (nnClusterizerModelReg2NumOutputNodes > 0) {
-        sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2
-      }
-    }
-  }
-  if (nnClusterizerTotalClusters > 0) {
-    sizes[6] = nnClusterizerTotalClusters; // outputDataClass
-  }
-  return sizes;
-}
+// std::vector<int32_t> GPUTPCNNClusterizer::pointerSizes() {
+//   std::vector<int32_t> sizes(7, -1);
+//   if (nnClusterizerBatchedMode > 0) {
+//     if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) {
+//       sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16
+//     } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) {
+//       sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32
+//     }
+//     sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags
+//     if (nnClusterizerModelClassNumOutputNodes > 0) {
+//       sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities
+//     }
+//     if (!nnClusterizerUseCfRegression) {
+//       if (nnClusterizerModelReg1NumOutputNodes > 0) {
+//         sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1
+//       }
+//       if (nnClusterizerModelReg2NumOutputNodes > 0) {
+//         sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2
+//       }
+//     }
+//   }
+//   if (nnClusterizerTotalClusters > 0) {
+//     sizes[6] = nnClusterizerTotalClusters; // outputDataClass
+//   }
+//   return sizes;
+// }
 
 void GPUTPCNNClusterizer::RegisterMemoryAllocation()
 {
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h
@@ -34,7 +34,6 @@ class GPUTPCNNClusterizer : public GPUProcessor
   void RegisterMemoryAllocation();
   void InitializeProcessor();
   void SetMaxData(const GPUTrackingInOutPointers&);
-  std::vector<int32_t> pointerSizes();
 
   // Neural network clusterization
 
@@ -50,8 +49,6 @@ class GPUTPCNNClusterizer : public GPUProcessor
   int nnClusterizerTotalClusters = 1;
   int nnClusterizerVerbosity = 0;
   int nnClusterizerBoundaryFillValue = -1;
-  int nnClusterizerDumpDigits = 0;
-  int nnClusterizerApplyCfDeconvolution = 0;
   int nnClusterizerModelClassNumOutputNodes = -1;
   int nnClusterizerModelReg1NumOutputNodes = -1;
   int nnClusterizerModelReg2NumOutputNodes = -1;
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx
@@ -138,10 +138,11 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
   clustererNN.nnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0);
   clustererNN.nnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
   clustererNN.nnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
-  clustererNN.nnClassThreshold = settings.nnClassThreshold;
   clustererNN.nnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
   if (clustererNN.nnSigmoidTrafoClassThreshold) {
-    clustererNN.nnClassThreshold = (float)std::log(clustererNN.nnClassThreshold / (1.f - clustererNN.nnClassThreshold));
+    clustererNN.nnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
+  } else {
+    clustererNN.nnClassThreshold = settings.nnClassThreshold;
   }
   if (settings.nnClusterizerVerbosity < 0) {
     clustererNN.nnClusterizerVerbosity = settings.nnInferenceVerbosity;
@@ -152,7 +153,7 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
   clustererNN.nnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos;
   clustererNN.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1];
   if (!settings.nnClusterizerUseCfRegression) {
-    if (model_class.getNumOutputNodes()[0][1] == 1 || model_reg_2.isInitialized()) {
+    if (model_class.getNumOutputNodes()[0][1] == 1 || !model_reg_2.isInitialized()) {
       clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];
     } else {
       clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];