Skip to content

Commit 15a7e2f

Browse files
davidrohrktf
authored andcommitted
GPU: Remove obsolete code paths
1 parent 2b593a2 commit 15a7e2f

File tree

12 files changed

+109
-264
lines changed

12 files changed

+109
-264
lines changed

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -282,21 +282,9 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice()
282282
mProcessingSettings.nDeviceHelperThreads = 0;
283283
}
284284

285-
if (param().rec.nonConsecutiveIDs) {
286-
param().rec.tpc.disableRefitAttachment = 0xFF;
287-
}
288-
if (!(mRecoSteps.stepsGPUMask & RecoStep::TPCMerging) || !param().rec.tpc.mergerReadFromTrackerDirectly) {
289-
mProcessingSettings.fullMergerOnGPU = false;
290-
}
291-
if (mProcessingSettings.debugLevel > 3 || !IsGPU() || !mProcessingSettings.fullMergerOnGPU || mProcessingSettings.deterministicGPUReconstruction) {
285+
if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) {
292286
mProcessingSettings.delayedOutput = false;
293287
}
294-
if (!mProcessingSettings.fullMergerOnGPU && (GetRecoStepsGPU() & RecoStep::TPCMerging)) {
295-
param().rec.tpc.looperInterpolationInExtraPass = 0;
296-
if (param().rec.tpc.retryRefit == 1) {
297-
param().rec.tpc.retryRefit = 2;
298-
}
299-
}
300288

301289
UpdateAutomaticProcessingSettings();
302290
GPUCA_GPUReconstructionUpdateDefaults();

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation inst
149149
AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode")
150150
AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel")
151151
AddOptionRTC(looperInterpolationInExtraPass, int8_t, -1, "", 0, "Perform looper interpolation in an extra pass")
152-
AddOptionRTC(mergerReadFromTrackerDirectly, int8_t, 1, "", 0, "Forward data directly from tracker to merger on GPU")
153152
AddOptionRTC(dropSecondaryLegsInOutput, int8_t, 1, "", 0, "Do not store secondary legs of looping track in TrackTPC")
154153
AddOptionRTC(enablePID, int8_t, 1, "", 0, "Enable PID response")
155154
AddOptionRTC(PID_useNsigma, int8_t, 1, "", 0, "Use nSigma instead of absolute distance in PID response")
@@ -188,7 +187,6 @@ EndConfig()
188187

189188
BeginSubConfig(GPUSettingsRec, rec, configStandalone, "REC", 0, "Reconstruction settings", rec)
190189
AddOptionRTC(maxTrackQPtB5, float, 1.f / GPUCA_MIN_TRACK_PTB5_DEFAULT, "", 0, "required max Q/Pt (==min Pt) of tracks")
191-
AddOptionRTC(nonConsecutiveIDs, int8_t, false, "", 0, "Non-consecutive cluster IDs as in HLT, disables features that need access to slice data in TPC merger")
192190
AddOptionRTC(fwdTPCDigitsAsClusters, uint8_t, 0, "", 0, "Forward TPC digits as clusters (if they pass the ZS threshold)")
193191
AddOptionRTC(bz0Pt10MeV, uint8_t, 60, "", 0, "Nominal Pt to set when bz = 0 (in 10 MeV)")
194192
AddOptionRTC(fitInProjections, int8_t, -1, "", 0, "Fit in projection, -1 to enable full fit for all but passes but the first one")
@@ -261,7 +259,6 @@ AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster
261259
AddOption(trackletSelectorSlices, int8_t, -1, "", 0, "Number of slices to processes in parallel at max")
262260
AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline")
263261
AddOption(trackletSelectorInPipeline, int8_t, -1, "", 0, "Run tracklet selector in the pipeline")
264-
AddOption(fullMergerOnGPU, bool, true, "", 0, "Perform full TPC track merging on GPU instead of only refit")
265262
AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit")
266263
AddOption(mergerSortTracks, int8_t, -1, "", 0, "Sort track indizes for GPU track fit")
267264
AddOption(alternateBorderSort, int8_t, -1, "", 0, "Alternative implementation for sorting of border tracks")

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,8 @@ bool GPUChainTracking::ValidateSteps()
185185
GPUError("Invalid input, TPC Clusterizer needs TPC raw input");
186186
return false;
187187
}
188-
if (param().rec.tpc.mergerReadFromTrackerDirectly && (GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && ((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion))) {
189-
GPUError("Invalid input / output / step, mergerReadFromTrackerDirectly cannot read/store sectors tracks and needs TPC conversion");
190-
return false;
191-
}
192-
if (!GetProcessingSettings().fullMergerOnGPU && (param().rec.tpc.mergerReadFromTrackerDirectly || GetProcessingSettings().createO2Output) && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging)) {
193-
GPUError("createO2Output and mergerReadFromTrackerDirectly works only in combination with fullMergerOnGPU if the merger is to run on GPU");
188+
if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && ((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion))) {
189+
GPUError("Invalid input / output / step, merger cannot read/store sectors tracks and needs TPC conversion");
194190
return false;
195191
}
196192
bool tpcClustersAvail = (GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCClusters) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCClusterFinding) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCDecompression);
@@ -265,14 +261,6 @@ bool GPUChainTracking::ValidateSettings()
265261
GPUError("Cannot do error interpolation with NWays = 1!");
266262
return false;
267263
}
268-
if ((param().rec.tpc.mergerReadFromTrackerDirectly || !param().par.earlyTpcTransform) && param().rec.nonConsecutiveIDs) {
269-
GPUError("incompatible settings for non consecutive ids");
270-
return false;
271-
}
272-
if (!param().rec.tpc.mergerReadFromTrackerDirectly && GetProcessingSettings().ompKernels) {
273-
GPUError("OMP Kernels require mergerReadFromTrackerDirectly");
274-
return false;
275-
}
276264
if (param().continuousMaxTimeBin > (int32_t)GPUSettings::TPC_MAX_TF_TIME_BIN) {
277265
GPUError("configured max time bin exceeds 256 orbits");
278266
return false;
@@ -743,10 +731,6 @@ int32_t GPUChainTracking::RunChain()
743731
return 1;
744732
}
745733

746-
for (uint32_t i = 0; i < NSLICES; i++) {
747-
// GPUInfo("slice %d clusters %d tracks %d", i, mClusterData[i].NumberOfClusters(), processors()->tpcTrackers[i].Output()->NTracks());
748-
processors()->tpcMerger.SetSliceData(i, param().rec.tpc.mergerReadFromTrackerDirectly ? nullptr : processors()->tpcTrackers[i].Output());
749-
}
750734
if (runRecoStep(RecoStep::TPCMerging, &GPUChainTracking::RunTPCTrackingMerger, false)) {
751735
return 1;
752736
}

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 32 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ using namespace o2::gpu;
2424
void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
2525
{
2626
GPUTPCGMMerger& Merger = processors()->tpcMerger;
27-
bool doGPUall = GetRecoStepsGPU() & RecoStep::TPCMerging && GetProcessingSettings().fullMergerOnGPU;
28-
GPUTPCGMMerger& MergerShadow = doGPUall ? processorsShadow()->tpcMerger : Merger;
27+
bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
28+
GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
2929
if (GetProcessingSettings().deterministicGPUReconstruction) {
3030
uint32_t nBorderTracks = withinSlice == 1 ? NSLICES : (2 * NSLICES);
3131
runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::borderTracks>({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0);
3232
}
3333
uint32_t n = withinSlice == -1 ? NSLICES / 2 : NSLICES;
34-
if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPUall)) {
34+
if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) {
3535
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init);
3636
RecordMarker(&mEvents->single, 0);
3737
for (uint32_t i = 0; i < n; i++) {
@@ -72,7 +72,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice
7272
runKernel<GPUTPCGMMergerMergeBorders, 2>(GetGridAuto(0, deviceType), i, withinSlice, mergeMode);
7373
}
7474
}
75-
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSlice, mergeMode);
75+
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSlice, mergeMode);
7676
mRec->ReturnVolatileDeviceMemory();
7777
}
7878

@@ -89,12 +89,11 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
8989
{
9090
mRec->PushNonPersistentMemory(qStr2Tag("TPCMERGE"));
9191
bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
92-
bool doGPUall = doGPU && GetProcessingSettings().fullMergerOnGPU;
93-
GPUReconstruction::krnlDeviceType deviceType = doGPUall ? GPUReconstruction::krnlDeviceType::Auto : GPUReconstruction::krnlDeviceType::CPU;
94-
uint32_t numBlocks = (!mRec->IsGPU() || doGPUall) ? BlockCount() : 1;
92+
GPUReconstruction::krnlDeviceType deviceType = doGPU ? GPUReconstruction::krnlDeviceType::Auto : GPUReconstruction::krnlDeviceType::CPU;
93+
uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1;
9594
GPUTPCGMMerger& Merger = processors()->tpcMerger;
9695
GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
97-
GPUTPCGMMerger& MergerShadowAll = doGPUall ? processorsShadow()->tpcMerger : Merger;
96+
GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger;
9897
const int32_t outputStream = OutputStream();
9998
if (GetProcessingSettings().debugLevel >= 2) {
10099
GPUInfo("Running TPC Merger");
@@ -112,7 +111,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
112111

113112
memset(Merger.Memory(), 0, sizeof(*Merger.Memory()));
114113
WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
115-
if (doGPUall) {
114+
if (doGPU) {
116115
TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0);
117116
}
118117

@@ -136,14 +135,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
136135
if (GetProcessingSettings().deterministicGPUReconstruction) {
137136
runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::sectorTracks>({{GPUCA_NSLICES, -WarpSize(), 0, deviceType}}, 1);
138137
}
139-
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpSliceTracks, *mDebugFile);
138+
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSliceTracks, *mDebugFile);
140139

141140
runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
142141
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSLICES * sizeof(*MergerShadowAll.TmpCounter()));
143142
runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
144143
RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
145144
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
146-
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpMergedWithinSlices, *mDebugFile);
145+
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSlices, *mDebugFile);
147146

148147
runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
149148
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter()));
@@ -158,7 +157,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
158157
runKernel<GPUTPCGMMergerMergeSlicesPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
159158
RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
160159
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
161-
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpMergedBetweenSlices, *mDebugFile);
160+
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSlices, *mDebugFile);
162161

163162
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter()));
164163

@@ -168,17 +167,17 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
168167
runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::globalTracks1>({{1, -WarpSize(), 0, deviceType}}, 1);
169168
runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::globalTracks2>({{1, -WarpSize(), 0, deviceType}}, 1);
170169
}
171-
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile);
170+
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile);
172171

173172
if (param().rec.tpc.mergeCE) {
174173
runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), true);
175174
RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
176175
RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
177176
runKernel<GPUTPCGMMergerMergeCE>(GetGridAuto(0, deviceType));
178-
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile);
177+
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile);
179178
}
180179
int32_t waitForTransfer = 0;
181-
if (doGPUall) {
180+
if (doGPU) {
182181
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
183182
waitForTransfer = 1;
184183
}
@@ -189,23 +188,21 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
189188
runKernel<GPUTPCGMMergerSortTracks>(GetGridAuto(0, deviceType));
190189
}
191190

192-
uint32_t maxId = param().rec.nonConsecutiveIDs ? Merger.Memory()->nOutputTrackClusters : Merger.NMaxClusters();
191+
uint32_t maxId = Merger.NMaxClusters();
193192
if (maxId > Merger.NMaxClusters()) {
194193
throw std::runtime_error("mNMaxClusters too small");
195194
}
196-
if (!param().rec.nonConsecutiveIDs) {
197-
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount()));
198-
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment()));
199-
runKernel<GPUTPCGMMergerPrepareClusters, 0>(GetGridAuto(0, deviceType));
200-
CondWaitEvent(waitForTransfer, &mEvents->single);
201-
runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
202-
runKernel<GPUTPCGMMergerPrepareClusters, 1>(GetGridAuto(0, deviceType));
203-
runKernel<GPUTPCGMMergerPrepareClusters, 2>(GetGridAuto(0, deviceType));
204-
}
195+
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount()));
196+
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment()));
197+
runKernel<GPUTPCGMMergerPrepareClusters, 0>(GetGridAuto(0, deviceType));
198+
CondWaitEvent(waitForTransfer, &mEvents->single);
199+
runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
200+
runKernel<GPUTPCGMMergerPrepareClusters, 1>(GetGridAuto(0, deviceType));
201+
runKernel<GPUTPCGMMergerPrepareClusters, 2>(GetGridAuto(0, deviceType));
205202

206-
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile);
203+
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile);
207204

208-
if (doGPUall) {
205+
if (doGPU) {
209206
CondWaitEvent(waitForTransfer, &mEvents->single);
210207
if (waitForTransfer) {
211208
ReleaseEvent(mEvents->single);
@@ -228,29 +225,23 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
228225
if (param().rec.tpc.looperInterpolationInExtraPass) {
229226
runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
230227
}
231-
if (doGPU && !doGPUall) {
232-
TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, 0);
233-
SynchronizeStream(0);
234-
}
235228

236229
DoDebugAndDump(RecoStep::TPCMerging, 2048, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile);
237230
runKernel<GPUTPCGMMergerFinalize, 0>(GetGridAuto(0, deviceType));
238-
if (!param().rec.nonConsecutiveIDs) {
239-
runKernel<GPUTPCGMMergerFinalize, 1>(GetGridAuto(0, deviceType));
240-
runKernel<GPUTPCGMMergerFinalize, 2>(GetGridAuto(0, deviceType));
241-
}
231+
runKernel<GPUTPCGMMergerFinalize, 1>(GetGridAuto(0, deviceType));
232+
runKernel<GPUTPCGMMergerFinalize, 2>(GetGridAuto(0, deviceType));
242233
if (param().rec.tpc.mergeLoopersAfterburner) {
243-
runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPUall ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType));
234+
runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType));
244235
if (doGPU) {
245236
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0);
246237
SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel<GPUTPCGMMergerMergeLoopers, 1>
247238
}
248239
runKernel<GPUTPCGMMergerMergeLoopers, 1>(GetGridAuto(0, deviceType));
249-
runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPUall ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType));
240+
runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType));
250241
}
251-
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile);
242+
DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile);
252243

253-
if (doGPUall) {
244+
if (doGPU) {
254245
RecordMarker(&mEvents->single, 0);
255246
auto* waitEvent = &mEvents->single;
256247
if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1 || mFractionalQAEnabled) {
@@ -302,7 +293,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
302293
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
303294
runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(GetGridAuto(0, deviceType));
304295
mRec->ReturnVolatileDeviceMemory();
305-
SynchronizeEventAndRelease(mEvents->single, doGPUall);
296+
SynchronizeEventAndRelease(mEvents->single, doGPU);
306297

307298
if (GetProcessingSettings().clearO2OutputFromGPU) {
308299
mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile
@@ -316,7 +307,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
316307
AllocateRegisteredMemory(Merger.MemoryResOutputO2MC(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracksO2Labels)]);
317308
TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true);
318309
runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::mc>(GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU));
319-
} else if (doGPUall) {
310+
} else if (doGPU) {
320311
RecordMarker(&mEvents->single, 0);
321312
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2(), outputStream, nullptr, &mEvents->single);
322313
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2Clus(), outputStream);

GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,8 @@ int32_t GPUChainTracking::GlobalTracking(uint32_t iSlice, int32_t threadId, bool
3030
GPUInfo("GPU Tracker running Global Tracking for slice %u on thread %d\n", iSlice, threadId);
3131
}
3232

33-
GPUReconstruction::krnlDeviceType deviceType = GetProcessingSettings().fullMergerOnGPU ? GPUReconstruction::krnlDeviceType::Auto : GPUReconstruction::krnlDeviceType::CPU;
34-
runKernel<GPUTPCGlobalTracking>({GetGridBlk(256, iSlice % mRec->NStreams(), deviceType), {iSlice}});
35-
if (GetProcessingSettings().fullMergerOnGPU) {
36-
TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[iSlice].MemoryResCommon(), iSlice % mRec->NStreams());
37-
}
33+
runKernel<GPUTPCGlobalTracking>({GetGridBlk(256, iSlice % mRec->NStreams()), {iSlice}});
34+
TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[iSlice].MemoryResCommon(), iSlice % mRec->NStreams());
3835
if (synchronizeOutput) {
3936
SynchronizeStream(iSlice % mRec->NStreams());
4037
}
@@ -450,7 +447,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal()
450447
blocking[tmpSlice * mRec->NStreams() + sliceRight % mRec->NStreams()] = true;
451448
}
452449
}
453-
GlobalTracking(tmpSlice, 0, !GetProcessingSettings().fullMergerOnGPU);
450+
GlobalTracking(tmpSlice, 0, false);
454451
}
455452
}
456453
for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) {

0 commit comments

Comments
 (0)