@@ -94,7 +94,6 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
9494 uint32_t numBlocks = (!mRec ->IsGPU () || doGPU) ? BlockCount () : 1 ;
9595 GPUTPCGMMerger& Merger = processors ()->tpcMerger ;
9696 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow ()->tpcMerger : Merger;
97- GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow ()->tpcMerger : Merger;
9897 const int32_t outputStream = OutputStream ();
9998 if (GetProcessingSettings ().debugLevel >= 2 ) {
10099 GPUInfo (" Running TPC Merger" );
@@ -139,28 +138,28 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
139138 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingSectorTracks, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile );
140139
141140 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto (0 , deviceType), false );
142- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter (), NSECTORS * sizeof (*MergerShadowAll .TmpCounter ()));
141+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter (), NSECTORS * sizeof (*MergerShadow .TmpCounter ()));
143142 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto (0 , deviceType));
144143 RunTPCTrackingMerger_MergeBorderTracks (1 , 0 , deviceType);
145144 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
146145 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile );
147146
148147 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto (0 , deviceType), false );
149- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll .TmpCounter ()));
148+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadow .TmpCounter ()));
150149 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 2 , 3 , 0 );
151150 RunTPCTrackingMerger_MergeBorderTracks (0 , 0 , deviceType);
152151 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
153- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll .TmpCounter ()));
152+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadow .TmpCounter ()));
154153 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 0 , 1 , 0 );
155154 RunTPCTrackingMerger_MergeBorderTracks (0 , 0 , deviceType);
156155 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
157- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll .TmpCounter ()));
156+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadow .TmpCounter ()));
158157 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 0 , 1 , 1 );
159158 RunTPCTrackingMerger_MergeBorderTracks (0 , -1 , deviceType);
160159 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
161160 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile );
162161
163- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll .TmpCounter ()));
162+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadow .TmpCounter ()));
164163
165164 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto (0 , deviceType));
166165 if (GetProcessingSettings ().mergerSanityCheck ) {
@@ -200,8 +199,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
200199 if (maxId > Merger.NMaxClusters ()) {
201200 throw std::runtime_error (" mNMaxClusters too small" );
202201 }
203- runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .SharedCount (), maxId * sizeof (*MergerShadowAll .SharedCount ()));
204- runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .ClusterAttachment (), maxId * sizeof (*MergerShadowAll .ClusterAttachment ()));
202+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .SharedCount (), maxId * sizeof (*MergerShadow .SharedCount ()));
203+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .ClusterAttachment (), maxId * sizeof (*MergerShadow .ClusterAttachment ()));
205204 runKernel<GPUTPCGMMergerPrepareForFit, 0 >(GetGridAuto (0 , deviceType));
206205 CondWaitEvent (waitForTransfer, &mEvents ->single );
207206 runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto (0 , deviceType));
@@ -226,6 +225,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
226225 mOutputQueue .clear ();
227226 }
228227
228+ if (param ().rec .tpc .rebuildTrackInFit ) {
229+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow.ClusterCandidates (), Merger.NMergedTracks () * GPUCA_ROW_COUNT * param ().rec .tpc .rebuildTrackInFitClusterCandidates * sizeof (*MergerShadow.ClusterCandidates ()));
230+ }
229231 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid (Merger.NMergedTracks (), 0 ) : GetGridAuto (0 ), mergerSortTracks ? 1 : 0 , 0 );
230232 if (param ().rec .tpc .rebuildTrackInFit ) {
231233 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid (Merger.NMergedTracks (), 0 ) : GetGridAuto (0 ), mergerSortTracks ? 1 : 0 , 1 );
@@ -260,13 +262,13 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
260262 throw std::runtime_error (" QA Scratch buffer exceeded" );
261263 }
262264 }
263- GPUMemCpy (RecoStep::TPCMerging, Merger.MergedTracks (), MergerShadowAll .MergedTracks (), Merger.NMergedTracks () * sizeof (*Merger.MergedTracks ()), outputStream, 0 , nullptr , waitEvent);
265+ GPUMemCpy (RecoStep::TPCMerging, Merger.MergedTracks (), MergerShadow .MergedTracks (), Merger.NMergedTracks () * sizeof (*Merger.MergedTracks ()), outputStream, 0 , nullptr , waitEvent);
264266 waitEvent = nullptr ;
265267 if (param ().dodEdxEnabled ) {
266- GPUMemCpy (RecoStep::TPCMerging, Merger.MergedTracksdEdx (), MergerShadowAll .MergedTracksdEdx (), Merger.NMergedTracks () * sizeof (*Merger.MergedTracksdEdx ()), outputStream, 0 );
268+ GPUMemCpy (RecoStep::TPCMerging, Merger.MergedTracksdEdx (), MergerShadow .MergedTracksdEdx (), Merger.NMergedTracks () * sizeof (*Merger.MergedTracksdEdx ()), outputStream, 0 );
267269 }
268- GPUMemCpy (RecoStep::TPCMerging, Merger.Clusters (), MergerShadowAll .Clusters (), Merger.NMergedTrackClusters () * sizeof (*Merger.Clusters ()), outputStream, 0 );
269- GPUMemCpy (RecoStep::TPCMerging, Merger.ClusterAttachment (), MergerShadowAll .ClusterAttachment (), Merger.NMaxClusters () * sizeof (*Merger.ClusterAttachment ()), outputStream, 0 );
270+ GPUMemCpy (RecoStep::TPCMerging, Merger.Clusters (), MergerShadow .Clusters (), Merger.NMergedTrackClusters () * sizeof (*Merger.Clusters ()), outputStream, 0 );
271+ GPUMemCpy (RecoStep::TPCMerging, Merger.ClusterAttachment (), MergerShadow .ClusterAttachment (), Merger.NMaxClusters () * sizeof (*Merger.ClusterAttachment ()), outputStream, 0 );
270272 }
271273 if (GetProcessingSettings ().outputSharedClusterMap ) {
272274 TransferMemoryResourceLinkToHost (RecoStep::TPCMerging, Merger.MemoryResOutputState (), outputStream, nullptr , waitEvent);
0 commit comments