Skip to content

Commit 9070674

Browse files
committed
GPU: Improve memory usage debug printout
1 parent 134f5ea commit 9070674

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -811,11 +811,9 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
811811
GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str());
812812
}
813813
if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) {
814-
if (IsGPU()) {
815-
printf("Allocated Device memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked ? ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked) : 0);
816-
}
817-
printf("Allocated Host memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked ? ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked) : 0);
818-
printf("%16s", "");
814+
printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size());
815+
PrintMemoryOverview();
816+
printf("%76s", "");
819817
PrintMemoryMax();
820818
}
821819
mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back());
@@ -888,9 +886,10 @@ void GPUReconstruction::PrintMemoryMax()
888886
void GPUReconstruction::PrintMemoryOverview()
889887
{
890888
if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
891-
printf("Memory Allocation: Host %'zd / %'zu (Permanent %'zd), Device %'zd / %'zu, (Permanent %'zd) %zu chunks\n",
892-
ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase),
893-
ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), mMemoryResources.size());
889+
printf("Memory Allocation: Host %'13zd / %'13zu (Permanent %'13zd, Data %'13zd, Scratch %'13zd), Device %'13zd / %'13zu, (Permanent %'13zd, Data %'13zd, Scratch %'13zd) %zu chunks\n",
890+
ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), ptrDiff(mHostMemoryPool, mHostMemoryPermanent), ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd),
891+
ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), ptrDiff(mDeviceMemoryPool, mDeviceMemoryPermanent), ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd),
892+
mMemoryResources.size());
894893
}
895894
}
896895

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ int32_t GPUReconstructionCPU::RunChains()
215215
mStatNEvents++;
216216
mNEventsProcessed++;
217217

218+
if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) {
219+
printf("Allocated memory when starting processing %34s", "");
220+
PrintMemoryOverview();
221+
}
218222
mTimerTotal.Start();
219223
const std::clock_t cpuTimerStart = std::clock();
220224
if (mProcessingSettings.doublePipeline) {
@@ -235,6 +239,10 @@ int32_t GPUReconstructionCPU::RunChains()
235239
}
236240
mTimerTotal.Stop();
237241
mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC;
242+
if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) {
243+
printf("Allocated memory when ending processing %36s", "");
244+
PrintMemoryOverview();
245+
}
238246

239247
mStatWallTime = (mTimerTotal.GetElapsedTime() * 1000000. / mStatNEvents);
240248
std::string nEventReport;

0 commit comments

Comments
 (0)