GPU: Add debug option to create temporary MC labels for collected merged tracks

davidrohr · davidrohr · commit 2b03a37d94fc · 2025-12-25T01:48:15.000+01:00
diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
@@ -215,10 +215,10 @@ int32_t GPUReconstructionCPU::ExitDevice()
 int32_t GPUReconstructionCPU::RunChains()
 {
   mMemoryScalers->temporaryFactor = 1.;
-  if (GetProcessingSettings().memoryScalingFuzz) {
+  if (GetProcessingSettings().debug.memoryScalingFuzz) {
     static std::mt19937 rng;
     static std::uniform_int_distribution<uint64_t> dist(0, 1000000);
-    uint64_t fuzzFactor = GetProcessingSettings().memoryScalingFuzz == 1 ? dist(rng) : GetProcessingSettings().memoryScalingFuzz;
+    uint64_t fuzzFactor = GetProcessingSettings().debug.memoryScalingFuzz == 1 ? dist(rng) : GetProcessingSettings().debug.memoryScalingFuzz;
     GPUInfo("Fuzzing memory scaling factor with %lu", fuzzFactor);
     mMemoryScalers->fuzzScalingFactor(fuzzFactor);
   }
diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h
@@ -39,7 +39,7 @@ BeginNamespace(gpu)
 
 // Reconstruction parameters for TPC, no bool in here !!!
 BeginSubConfig(GPUSettingsRecTPC, tpc, configStandalone.rec, "RECTPC", 0, "Reconstruction settings", rec_tpc)
-AddOptionRTC(rejectQPtB5, float, 1.f / 0.050f, "", 0, "QPt threshold to reject clusters of TPC tracks (Inverse Pt, scaled to B=0.5T!!!)")
+AddOptionRTC(rejectQPtB5, float, 1.f / 0.050f, "", 0, "QPt threshold to reject clusters of TPC tracks (Inverse Pt, scaled to B=0.5T!!!)") // TODO: Sort these options automatically for parameter size
 AddOptionRTC(hitPickUpFactor, float, 1.f, "", 0, "multiplier for the combined cluster+track error during track following")
 AddOptionRTC(hitSearchArea2, float, 2.f, "", 0, "square of maximum search road of hits during seeding")
 AddOptionRTC(neighboursSearchArea, float, 3.f, "", 0, "area in cm for the search of neighbours, for z only used if searchWindowDZDR = 0")
@@ -304,6 +304,14 @@ AddOption(nnCCDBInteractionRate, std::string, "500", "", 0, "Distinguishes betwe
 AddHelp("help", 'h')
 EndConfig()
 
+// Debug Settings
+BeginSubConfig(GPUSettingsProcessingDebug, debug, configStandalone.proc, "DEBUG", 0, "Debugging Settings", proc_debug)
+AddOption(memoryScalingFuzz, uint64_t, 0, "", 0, "Fuzz the memoryScalingFactor (0 disable, 1 enable, >1 set seed", def(1))
+AddOption(mergerMCLabels, bool, false, "", 0, "Create MC labels for merged tracks before refit for debugging")
+AddHelp("help", 'h')
+EndConfig()
+
+
 // Settings steering the processing once the device was selected, only available on the host
 BeginSubConfig(GPUSettingsProcessing, proc, configStandalone, "PROC", 0, "Processing settings", proc)
 AddOption(deviceNum, int32_t, -1, "gpuDevice", 0, "Set GPU device to use (-1: automatic, -2: for round-robin usage in timeslice-pipeline)")
@@ -329,7 +337,6 @@ AddOption(memoryAllocationStrategy, int8_t, 0, "", 0, "Memory Allocation Strageg
 AddOption(forceMemoryPoolSize, uint64_t, 1, "memSize", 0, "Force size of allocated GPU / page locked host memory", min(0ul))
 AddOption(forceHostMemoryPoolSize, uint64_t, 0, "hostMemSize", 0, "Force size of allocated host page locked host memory (overriding memSize)", min(0ul))
 AddOption(memoryScalingFactor, float, 1.f, "", 0, "Factor to apply to all memory scalers")
-AddOption(memoryScalingFuzz, uint64_t, 0, "", 0, "Fuzz the memoryScalingFactor (0 disable, 1 enable, >1 set seed", def(1))
 AddOption(conservativeMemoryEstimate, bool, false, "", 0, "Use some more conservative defaults for larger buffers during TPC processing")
 AddOption(tpcInputWithClusterRejection, uint8_t, 0, "", 0, "Indicate whether the TPC input is CTF data with cluster rejection, to tune buffer estimations")
 AddOption(forceMaxMemScalers, uint64_t, 0, "", 0, "Force using the maximum values for all buffers, Set a value n > 1 to rescale all maximums to a memory size of n")
@@ -377,7 +384,7 @@ AddOption(tpcUseOldCPUDecoding, bool, false, "", 0, "Enable old CPU-based TPC de
 AddOption(tpcApplyCFCutsAtDecoding, bool, false, "", 0, "Apply cluster cuts from clusterization during decoding of compressed clusters")
 AddOption(tpcApplyClusterFilterOnCPU, uint8_t, 0, "", 0, "Apply custom cluster filter of GPUTPCClusterFilter class, 0: off, 1: debug, 2: PbPb23")
 AddOption(tpcWriteClustersAfterRejection, bool, false, "", 0, "Apply TPC rejection strategy before writing clusters")
-AddOption(oclPlatformNum, int32_t, -1, "", 0, "Platform to use, in case the backend provides multiple platforms (OpenCL only, -1 = auto-select, -2 query all platforms (also incompatible))")
+AddOption(oclPlatformNum, int32_t, -1, "", 0, "Platform to use, in case the backend provides multiple platforms (OpenCL only, -1 = auto-select, -2 query all platforms (also incompatible))") // TODO: Create some backend-specific options
 AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from included source code instead of using included spirv code")
 AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options")
 AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing")
@@ -396,6 +403,7 @@ AddSubConfig(GPUSettingsProcessingRTC, rtc)
 AddSubConfig(GPUSettingsProcessingRTCtechnical, rtctech)
 AddSubConfig(GPUSettingsProcessingParam, param)
 AddSubConfig(GPUSettingsProcessingNNclusterizer, nn)
+AddSubConfig(GPUSettingsProcessingDebug, debug)
 AddHelp("help", 'h')
 EndConfig()
 #endif // __OPENCL__
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx
@@ -170,6 +170,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
     runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks1>({{1, -WarpSize(), 0, deviceType}}, 1);
     runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks2>({{1, -WarpSize(), 0, deviceType}}, 1);
   }
+  if (!doGPU && GetProcessingSettings().debug.mergerMCLabels) {
+    Merger.CreateMCLabels(1, 1, 0, 0);
+  }
   DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCollectedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile);
 
   if (param().rec.tpc.mergeCE) {
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
@@ -148,6 +148,7 @@ using namespace o2::gpu::internal;
 
 #include "GPUQA.h"
 #include "GPUMemorySizeScalers.h"
+#include "GPUQAHelper.h"
 
 GPUTPCGMMerger::GPUTPCGMMerger()
 {
@@ -164,7 +165,7 @@ GPUTPCGMMerger::GPUTPCGMMerger()
 }
 
 // DEBUG CODE
-#if !defined(GPUCA_GPUCODE) && (defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC)
+#if defined(GPUCA_MERGER_BY_MC_LABEL) || defined(GPUCA_CADEBUG_ENABLED) || GPUCA_MERGE_LOOPER_MC
 #include "GPUQAHelper.h"
 
 template <class T>
@@ -438,6 +439,9 @@ void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem)
 void* GPUTPCGMMerger::SetPointersOutput(void* mem)
 {
   computePointerWithAlignment(mem, mMergedTracks, mNMaxTracks);
+  if (mRec->GetProcessingSettings().debug.mergerMCLabels) {
+    computePointerWithAlignment(mem, mMergedTrackMC, mNMaxTracks);
+  }
   if (mRec->GetParam().dodEdxEnabled) {
     computePointerWithAlignment(mem, mMergedTracksdEdx, mNMaxTracks);
     if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) {
@@ -547,6 +551,34 @@ int32_t GPUTPCGMMerger::CheckSectors()
   return 0;
 }
 
+void GPUTPCGMMerger::CreateMCLabels(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
+{
+  const o2::tpc::ClusterNativeAccess* GPUrestrict() clusters = GetConstantMem()->ioPtrs.clustersNative;
+  if (clusters == nullptr || clusters->clustersMCTruth == nullptr) {
+    return;
+  }
+  if (mMergedTrackMC == nullptr) {
+    return;
+  }
+
+  auto labelAssigner = GPUTPCTrkLbl(clusters->clustersMCTruth, 0.1f);
+  for (int32_t i = get_global_id(0); i < NMergedTracks(); i += get_global_size(0)) {
+    const auto& trk = mMergedTracks[i];
+    if (!trk.OK()) {
+      continue;
+    }
+    labelAssigner.reset();
+    for (uint32_t j = 0; j < trk.NClusters(); j++) {
+      const auto& cl = mClusters[trk.FirstClusterRef() + j];
+      if (cl.state & GPUTPCGMMergedTrackHit::flagReject) {
+        continue;
+      }
+      labelAssigner.addLabel(cl.num);
+    }
+    mMergedTrackMC[i] = labelAssigner.computeLabel();
+  }
+}
+
 #endif // GPUCA_GPUCODE
 
 GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output)
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h
@@ -113,6 +113,7 @@ class GPUTPCGMMerger : public GPUProcessor
 
   GPUhdi() int32_t NMergedTracks() const { return mMemory->nMergedTracks; }
   GPUhdi() const GPUTPCGMMergedTrack* MergedTracks() const { return mMergedTracks; }
+  GPUhdi() const o2::MCCompLabel* MergedTrackMC() const { return mMergedTrackMC; }
   GPUhdi() GPUTPCGMMergedTrack* MergedTracks() { return mMergedTracks; }
   GPUhdi() const GPUdEdxInfo* MergedTracksdEdx() const { return mMergedTracksdEdx; }
   GPUhdi() GPUdEdxInfo* MergedTracksdEdx() { return mMergedTracksdEdx; }
@@ -204,6 +205,7 @@ class GPUTPCGMMerger : public GPUProcessor
   GPUd() void ResolveHitWeights1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iteration);
   GPUd() void ResolveHitWeights2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread);
   GPUd() void ResolveHitWeightsShared(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread);
+  GPUd() void CreateMCLabels(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread);
 
 #ifndef GPUCA_GPUCODE
   void DumpSectorTracks(std::ostream& out) const;
@@ -284,6 +286,7 @@ class GPUTPCGMMerger : public GPUProcessor
 
   int32_t mNSectorHits = 0;                         // Total number of incoming clusters (from sector tracks)
   GPUTPCGMMergedTrack* mMergedTracks = nullptr;     //* array of output merged tracks
+  o2::MCCompLabel* mMergedTrackMC = nullptr;
   trackCluster* mClusterCandidates = nullptr;
   trackRebuildHelper* mTrackRebuildHelper = nullptr;
   int32_t* mHitWeights = nullptr;
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
@@ -14,6 +14,7 @@
 
 #define GPUCA_CADEBUG 0
 #define DEBUG_SINGLE_TRACK -1
+// #define DEBUG_REBUILD_MC
 
 #include "GPUTPCDef.h"
 #include "GPUTPCGMTrackParam.h"
@@ -40,6 +41,11 @@
 #include "AliHLTTPCClusterMCData.h"
 #endif
 
+#ifndef GPUCA_GPUCODE
+#include "SimulationDataFormat/ConstMCTruthContainer.h"
+#include "SimulationDataFormat/MCCompLabel.h"
+#endif
+
 #ifndef GPUCA_GPUCODE_DEVICE
 #include <cmath>
 #include <cstdlib>
@@ -279,6 +285,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger& GPUrestrict() merger, int32_
     if (param.rec.tpc.rebuildTrackInFit && !rebuilt && !(param.rec.tpc.disableRebuildAttachment & 16) && iWay >= nWays - 3 && CAMath::Abs(mP[2]) < maxSinForUpdate && lastUpdateRow != 255) {
       const int32_t up = ((clusters[0].row < clusters[maxN - 1].row) ^ (iWay & 1)) ? 1 : -1;
       int32_t sector = lastSector;
+      CADEBUG(merger.MergedTrackMC() printf("Extrapolate Start Track %d - sector %2d row %3d %s - fake %d\n", iTrk, sector, (int32_t)lastPropagateRow, up == 1 ? "upwards" : "downwards", (int)merger.MergedTrackMC()[iTrk].isFake()));
       uint8_t rowGapActive = 0, rowGapTotal = 0, missingRowsTotal = 0;
       uint8_t lastGoodRow = lastPropagateRow, lastExtrapolateRow = lastPropagateRow;
       uint8_t consecGoodRows = param.rec.tpc.rebuildTrackExtrMinConsecGoodRows, consecGoodRowsMissing = 0;
@@ -327,6 +334,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger& GPUrestrict() merger, int32_
         auto& candidate = merger.ClusterCandidates()[(iTrk * GPUCA_ROW_COUNT + iRow) * param.rec.tpc.rebuildTrackInFitClusterCandidates + 0];
         if (candidate.id >= 2) {
           lastExtrapolateRow = iRow;
+#if defined(DEBUG_REBUILD_MC) && !defined(GPUCA_GPUCODE)
+          if (merger.MergedTrackMC() && merger.GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth) {
+            int32_t labelCorrect = GPUTPCTrkLblSearch(merger.GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth->getLabels(candidate.id - 2), merger.MergedTrackMC()[iTrk]);
+            CADEBUG(printf("\t%21sLabel correct: %d\n", "", labelCorrect));
+          }
+#endif
           float err2Y, err2Z, xx, yy, zz;
           const ClusterNative& GPUrestrict() cl = merger.GetConstantMem()->ioPtrs.clustersNative->clustersLinear[candidate.id - 2];
           merger.GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, iRow, cl.getPad(), cl.getTime(), xx, yy, zz, mTOffset);
diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h
@@ -162,6 +162,17 @@ static inline auto GPUTPCTrkLbl(const AliHLTTPCClusterMCLabel* x, Args... args)
   }
 }
 
+template <class T>
+static inline bool GPUTPCTrkLblSearch(const T& clusterLabels, const MCCompLabel& trkLabel)
+{
+  for (const auto& clLabel : clusterLabels) {
+    if (trkLabel.compare(clLabel) >= 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
 } // namespace gpu
 } // namespace o2
 

Original file line number	Diff line number	Diff line change
`@@ -215,10 +215,10 @@ int32_t GPUReconstructionCPU::ExitDevice()`
`215`	`215`	`int32_t GPUReconstructionCPU::RunChains()`
`216`	`216`	`{`
`217`	`217`	`mMemoryScalers->temporaryFactor = 1.;`
`218`		`- if (GetProcessingSettings().memoryScalingFuzz) {`
	`218`	`+ if (GetProcessingSettings().debug.memoryScalingFuzz) {`
`219`	`219`	`static std::mt19937 rng;`
`220`	`220`	`static std::uniform_int_distribution<uint64_t> dist(0, 1000000);`
`221`		`- uint64_t fuzzFactor = GetProcessingSettings().memoryScalingFuzz == 1 ? dist(rng) : GetProcessingSettings().memoryScalingFuzz;`
	`221`	`+ uint64_t fuzzFactor = GetProcessingSettings().debug.memoryScalingFuzz == 1 ? dist(rng) : GetProcessingSettings().debug.memoryScalingFuzz;`
`222`	`222`	`GPUInfo("Fuzzing memory scaling factor with %lu", fuzzFactor);`
`223`	`223`	`mMemoryScalers->fuzzScalingFactor(fuzzFactor);`
`224`	`224`	`}`
Original file line number	Diff line number	Diff line change
`@@ -170,6 +170,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)`
`170`	`170`	`runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks1>({{1, -WarpSize(), 0, deviceType}}, 1);`
`171`	`171`	`runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::mergedTracks2>({{1, -WarpSize(), 0, deviceType}}, 1);`
`172`	`172`	`}`
	`173`	`+ if (!doGPU && GetProcessingSettings().debug.mergerMCLabels) {`
	`174`	`+ Merger.CreateMCLabels(1, 1, 0, 0);`
	`175`	`+ }`
`173`	`176`	`DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCollectedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile);`
`174`	`177`
`175`	`178`	`if (param().rec.tpc.mergeCE) {`