Skip to content

Commit 8ee541e

Browse files
committed
GPU: Make memoryStat work from GPUWorkflow
1 parent a114b4b commit 8ee541e

File tree

6 files changed

+39
-31
lines changed

6 files changed

+39
-31
lines changed

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -999,7 +999,7 @@ void GPUReconstruction::PrintMemoryStatistics()
999999
}
10001000
printf("%59s CPU / %9s GPU\n", "", "");
10011001
for (auto it = sizes.begin(); it != sizes.end(); it++) {
1002-
printf("Allocation %30s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ? "P" : " ", it->second[0], it->second[1]);
1002+
printf("Allocation %50s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ? "P" : " ", it->second[0], it->second[1]);
10031003
}
10041004
PrintMemoryOverview();
10051005
for (uint32_t i = 0; i < mChains.size(); i++) {

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -231,26 +231,24 @@ int32_t GPUReconstructionCPU::RunChains()
231231
}
232232
mTimerTotal.Start();
233233
const std::clock_t cpuTimerStart = std::clock();
234+
int32_t retVal;
234235
if (GetProcessingSettings().doublePipeline) {
235-
int32_t retVal = EnqueuePipeline();
236-
if (retVal) {
237-
return retVal;
238-
}
236+
retVal = EnqueuePipeline();
239237
} else {
240238
if (mSlaves.size() || mMaster) {
241239
WriteConstantParams(); // Reinitialize // TODO: Get this in sync with GPUChainTracking::DoQueuedUpdates, and consider the doublePipeline
242240
}
243241
for (uint32_t i = 0; i < mChains.size(); i++) {
244-
int32_t retVal = mChains[i]->RunChain();
245-
if (retVal) {
246-
return retVal;
247-
}
248-
}
249-
if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) {
250-
ClearAllocatedMemory();
242+
retVal = mChains[i]->RunChain();
251243
}
252244
}
245+
if (retVal != 0 && retVal != 2) {
246+
return retVal;
247+
}
253248
mTimerTotal.Stop();
249+
if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) {
250+
ClearAllocatedMemory();
251+
}
254252
mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC;
255253
if (GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) {
256254
GPUInfo("Allocated memory when ending processing %36s", "");
@@ -339,7 +337,13 @@ int32_t GPUReconstructionCPU::RunChains()
339337
mTimerTotal.Reset();
340338
}
341339

342-
return 0;
340+
if (GetProcessingSettings().memoryStat) {
341+
PrintMemoryStatistics();
342+
} else if (GetProcessingSettings().debugLevel >= 2) {
343+
PrintMemoryOverview();
344+
}
345+
346+
return retVal;
343347
}
344348

345349
void GPUReconstructionCPU::ResetDeviceProcessorTypes()

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ AddOption(debugOnFailureMaxFiles, uint32_t, 0, "", 0, "Max number of files to ha
377377
AddOption(debugOnFailureMaxSize, uint32_t, 0, "", 0, "Max size of existing dumps in the target folder in GB")
378378
AddOption(debugOnFailureDirectory, std::string, ".", "", 0, "Target folder for debug / dump")
379379
AddOption(amdMI100SerializationWorkaround, bool, false, "", 0, "Enable workaround that mitigates MI100 serialization bug")
380+
AddOption(memoryStat, bool, false, "", 0, "Print memory statistics")
380381
AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr)
381382
AddSubConfig(GPUSettingsProcessingRTC, rtc)
382383
AddSubConfig(GPUSettingsProcessingRTCtechnical, rtctech)
@@ -587,7 +588,6 @@ AddOption(zsVersion, int32_t, 2, "", 0, "ZS Version: 1 = 10-bit ADC row based, 2
587588
AddOption(dumpEvents, bool, false, "", 0, "Dump events (after transformation such as encodeZS")
588589
AddOption(stripDumpedEvents, bool, false, "", 0, "Remove redundant inputs (e.g. digits and ZS) before dumping")
589590
AddOption(printSettings, int32_t, 0, "", 0, "Print all settings", def(1))
590-
AddOption(memoryStat, bool, false, "", 0, "Print memory statistics")
591591
AddOption(testSyncAsync, bool, false, "syncAsync", 0, "Test first synchronous and then asynchronous processing")
592592
AddOption(testSync, bool, false, "sync", 0, "Test settings for synchronous phase")
593593
AddOption(timeFrameTime, bool, false, "tfTime", 0, "Print some debug information about time frame processing time")

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,10 @@ bool GPUChainTracking::ValidateSettings()
278278
return false;
279279
}
280280
if (GetProcessingSettings().doublePipeline) {
281+
if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) {
282+
GPUError("Cannot use double pipeline with tpcFreeAllocatedMemoryAfterProcessing");
283+
return false;
284+
}
281285
if (!GetRecoStepsOutputs().isOnlySet(GPUDataTypes::InOutType::TPCMergedTracks, GPUDataTypes::InOutType::TPCCompressedClusters, GPUDataTypes::InOutType::TPCClusters)) {
282286
GPUError("Invalid outputs for double pipeline mode 0x%x", (uint32_t)GetRecoStepsOutputs());
283287
return false;

GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,10 @@ void GPUChainTracking::PrintMemoryStatistics()
142142
std::map<std::string, GPUChainTrackingMemUsage> usageMap;
143143
for (int32_t i = 0; i < NSECTORS; i++) {
144144
#ifdef GPUCA_TPC_GEOMETRY_O2
145-
addToMap("TPC Clusterer Sector Peaks", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nPeaks, processors()->tpcClusterer[i].mNMaxPeaks);
146-
addToMap("TPC Clusterer Sector Clusters", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nClusters, processors()->tpcClusterer[i].mNMaxClusters);
145+
if (processors()->tpcClusterer[i].mPmemory) {
146+
addToMap("TPC Clusterer Sector Peaks", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nPeaks, processors()->tpcClusterer[i].mNMaxPeaks);
147+
addToMap("TPC Clusterer Sector Clusters", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nClusters, processors()->tpcClusterer[i].mNMaxClusters);
148+
}
147149
#endif
148150
addToMap("TPC Sector Start Hits", usageMap, *processors()->tpcTrackers[i].NStartHits(), processors()->tpcTrackers[i].NMaxStartHits());
149151
addToMap("TPC Sector Tracklets", usageMap, *processors()->tpcTrackers[i].NTracklets(), processors()->tpcTrackers[i].NMaxTracklets());
@@ -152,18 +154,22 @@ void GPUChainTracking::PrintMemoryStatistics()
152154
addToMap("TPC Sector TrackHits", usageMap, *processors()->tpcTrackers[i].NTrackHits(), processors()->tpcTrackers[i].NMaxTrackHits());
153155
}
154156
addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits));
155-
addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks());
156-
addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters());
157+
if (processors()->tpcMerger.Memory()) {
158+
addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks());
159+
addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters());
160+
}
157161

158162
if (mRec->GetProcessingSettings().createO2Output) {
159163
addToMap("TPC O2 Tracks", usageMap, processors()->tpcMerger.NOutputTracksTPCO2(), processors()->tpcMerger.NOutputTracksTPCO2());
160164
addToMap("TPC O2 ClusRefs", usageMap, processors()->tpcMerger.NOutputClusRefsTPCO2(), processors()->tpcMerger.NOutputClusRefsTPCO2());
161165
}
162166

163167
#ifdef GPUCA_TPC_GEOMETRY_O2
164-
addToMap("TPC ComprCache HitsAttached", usageMap, processors()->tpcCompressor.mOutput->nAttachedClusters, processors()->tpcCompressor.mMaxTrackClusters);
165-
addToMap("TPC ComprCache HitsUnattached", usageMap, processors()->tpcCompressor.mOutput->nUnattachedClusters, processors()->tpcCompressor.mMaxClustersInCache);
166-
addToMap("TPC ComprCache Tracks", usageMap, processors()->tpcCompressor.mOutput->nTracks, processors()->tpcCompressor.mMaxTracks);
168+
if (processors()->tpcCompressor.mOutput) {
169+
addToMap("TPC ComprCache HitsAttached", usageMap, processors()->tpcCompressor.mOutput->nAttachedClusters, processors()->tpcCompressor.mMaxTrackClusters);
170+
addToMap("TPC ComprCache HitsUnattached", usageMap, processors()->tpcCompressor.mOutput->nUnattachedClusters, processors()->tpcCompressor.mMaxClustersInCache);
171+
addToMap("TPC ComprCache Tracks", usageMap, processors()->tpcCompressor.mOutput->nTracks, processors()->tpcCompressor.mMaxTracks);
172+
}
167173
#endif
168174

169175
for (auto& elem : usageMap) {
@@ -180,8 +186,10 @@ void GPUChainTracking::PrintMemoryRelations()
180186
GPUInfo("MEMREL SectorTracks NCl %d NTrk %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTracks());
181187
GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits());
182188
}
183-
GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks());
184-
GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters());
189+
if (processors()->tpcMerger.Memory()) {
190+
GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks());
191+
GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters());
192+
}
185193
}
186194

187195
void GPUChainTracking::PrepareKernelDebugOutput()

GPU/GPUTracking/Standalone/Benchmark/standalone.cxx

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -649,11 +649,6 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU
649649

650650
if (tmpRetVal == 0 || tmpRetVal == 2) {
651651
OutputStat(chainTrackingUse, iRun == 0 ? nTracksTotal : nullptr, iRun == 0 ? nClustersTotal : nullptr);
652-
if (configStandalone.memoryStat) {
653-
recUse->PrintMemoryStatistics();
654-
} else if (configStandalone.proc.debugLevel >= 2) {
655-
recUse->PrintMemoryOverview();
656-
}
657652
}
658653

659654
if (tmpRetVal == 0 && configStandalone.testSyncAsync) {
@@ -685,9 +680,6 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU
685680
tmpRetVal = recAsync->RunChains();
686681
if (tmpRetVal == 0 || tmpRetVal == 2) {
687682
OutputStat(chainTrackingAsync, nullptr, nullptr);
688-
if (configStandalone.memoryStat) {
689-
recAsync->PrintMemoryStatistics();
690-
}
691683
}
692684
recAsync->ClearAllocatedMemory();
693685
}

0 commit comments

Comments
 (0)