Skip to content

Commit 070eaae

Browse files
committed
GPU: Add option to free individual stacked allocations per processor on the host
1 parent 81d282c commit 070eaae

File tree

2 files changed

+16
-9
lines changed

2 files changed

+16
-9
lines changed

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -877,8 +877,11 @@ void GPUReconstruction::PushNonPersistentMemory(uint64_t tag)
877877
mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), mNonPersistentIndividualDirectAllocations.size(), tag);
878878
}
879879

880-
void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
880+
void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor* proc)
881881
{
882+
if (proc && GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
883+
GPUFatal("Processor-depending memory-free works only with allocation strategy ALLOCATION_INDIVIDUAL");
884+
}
882885
if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().disableMemoryReuse) {
883886
return;
884887
}
@@ -888,17 +891,17 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
888891
if (tag != 0 && std::get<4>(mNonPersistentMemoryStack.back()) != tag) {
889892
GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str());
890893
}
891-
if ((GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) {
894+
if (!proc && (GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) {
892895
printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size());
893896
PrintMemoryOverview();
894897
printf("%76s", "");
895898
PrintMemoryMax();
896899
}
897-
mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back());
898-
mDeviceMemoryPoolEnd = std::get<1>(mNonPersistentMemoryStack.back());
899-
std::cout << "FOOOO POP " << std::get<2>(mNonPersistentMemoryStack.back()) << " - " << mNonPersistentIndividualAllocations.size();
900900
for (uint32_t i = std::get<2>(mNonPersistentMemoryStack.back()); i < mNonPersistentIndividualAllocations.size(); i++) {
901901
GPUMemoryResource* res = mNonPersistentIndividualAllocations[i];
902+
if (proc && res->mProcessor != proc) {
903+
continue;
904+
}
902905
if (GetProcessingSettings().allocDebugLevel >= 2 && (res->mPtr || res->mPtrDevice)) {
903906
std::cout << "Freeing NonPersistent " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n";
904907
}
@@ -908,9 +911,13 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
908911
res->mPtr = nullptr;
909912
res->mPtrDevice = nullptr;
910913
}
911-
mNonPersistentIndividualAllocations.resize(std::get<2>(mNonPersistentMemoryStack.back()));
912-
mNonPersistentIndividualDirectAllocations.resize(std::get<3>(mNonPersistentMemoryStack.back()));
913-
mNonPersistentMemoryStack.pop_back();
914+
if (!proc) {
915+
mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back());
916+
mDeviceMemoryPoolEnd = std::get<1>(mNonPersistentMemoryStack.back());
917+
mNonPersistentIndividualAllocations.resize(std::get<2>(mNonPersistentMemoryStack.back()));
918+
mNonPersistentIndividualDirectAllocations.resize(std::get<3>(mNonPersistentMemoryStack.back()));
919+
mNonPersistentMemoryStack.pop_back();
920+
}
914921
}
915922

916923
void GPUReconstruction::BlockStackedMemory(GPUReconstruction* rec)

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ class GPUReconstruction
179179
void ReturnVolatileMemory();
180180
ThrustVolatileAllocator getThrustVolatileDeviceAllocator();
181181
void PushNonPersistentMemory(uint64_t tag);
182-
void PopNonPersistentMemory(RecoStep step, uint64_t tag);
182+
void PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor* proc = nullptr);
183183
void BlockStackedMemory(GPUReconstruction* rec);
184184
void UnblockStackedMemory();
185185
void ResetRegisteredMemoryPointers(GPUProcessor* proc);

0 commit comments

Comments
 (0)