Skip to content

Commit 6a06564

Browse files
committed
GPU: Use aligned new/delete for some host allocations
1 parent 1713204 commit 6a06564

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -716,8 +716,13 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type)
716716
throw std::runtime_error("Requested invalid memory typo for unmanaged allocation");
717717
}
718718
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
719-
mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]);
720-
return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mUnmanagedChunks.back().get());
719+
char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size];
720+
if ((type & GPUMemoryResource::MEMORY_STACK)) {
721+
mNonPersistentIndividualDirectAllocations.emplace_back(retVal, alignedDeleter());
722+
} else {
723+
mDirectMemoryChunks.emplace_back(retVal, alignedDeleter());
724+
}
725+
return retVal;
721726
} else {
722727
if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) {
723728
GPUError("Must not allocate direct memory while volatile chunks are allocated");
@@ -765,8 +770,9 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device)
765770
if (device) {
766771
return AllocateVolatileDeviceMemory(size);
767772
}
768-
mVolatileChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]);
769-
return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mVolatileChunks.back().get());
773+
char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size];
774+
mVolatileChunks.emplace_back(retVal, alignedDeleter());
775+
return retVal;
770776
}
771777

772778
void GPUReconstruction::MakeFutureDeviceMemoryAllocationsVolatile()
@@ -851,7 +857,7 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res)
851857

852858
void GPUReconstruction::PushNonPersistentMemory(uint64_t tag)
853859
{
854-
mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), tag);
860+
mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), mNonPersistentIndividualDirectAllocations.size(), tag);
855861
}
856862

857863
void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
@@ -862,11 +868,11 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
862868
if (mNonPersistentMemoryStack.size() == 0) {
863869
GPUFatal("Trying to pop memory state from empty stack");
864870
}
865-
if (tag != 0 && std::get<3>(mNonPersistentMemoryStack.back()) != tag) {
866-
GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str());
871+
if (tag != 0 && std::get<4>(mNonPersistentMemoryStack.back()) != tag) {
872+
GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str());
867873
}
868874
if ((GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) {
869-
printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size());
875+
printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size());
870876
PrintMemoryOverview();
871877
printf("%76s", "");
872878
PrintMemoryMax();
@@ -882,6 +888,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
882888
res->mPtrDevice = nullptr;
883889
}
884890
mNonPersistentIndividualAllocations.resize(std::get<2>(mNonPersistentMemoryStack.back()));
891+
mNonPersistentIndividualDirectAllocations.resize(std::get<3>(mNonPersistentMemoryStack.back()));
885892
mNonPersistentMemoryStack.pop_back();
886893
}
887894

@@ -917,9 +924,11 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs)
917924
FreeRegisteredMemory(i);
918925
}
919926
}
920-
mUnmanagedChunks.clear();
921927
mNonPersistentMemoryStack.clear();
922928
mNonPersistentIndividualAllocations.clear();
929+
mDirectMemoryChunks.clear();
930+
mNonPersistentIndividualDirectAllocations.clear();
931+
mVolatileChunks.clear();
923932
mVolatileMemoryStart = nullptr;
924933
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
925934
mHostMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mHostMemoryPermanent);

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,6 @@ class GPUReconstruction
6969
class LibraryLoader; // These must be the first members to ensure correct destructor order!
7070
std::shared_ptr<LibraryLoader> mMyLib = nullptr;
7171
std::vector<GPUMemoryResource> mMemoryResources;
72-
std::vector<std::unique_ptr<char[]>> mUnmanagedChunks;
73-
std::vector<std::unique_ptr<char[]>> mVolatileChunks;
7472
std::vector<std::unique_ptr<GPUChain>> mChains;
7573

7674
public:
@@ -373,9 +371,15 @@ class GPUReconstruction
373371
GPUProcessor* proc = nullptr;
374372
std::vector<uint16_t> res;
375373
};
374+
struct alignedDeleter {
375+
void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); };
376+
};
376377
std::unordered_map<GPUMemoryReuse::ID, MemoryReuseMeta> mMemoryReuse1to1;
377-
std::vector<std::tuple<void*, void*, size_t, uint64_t>> mNonPersistentMemoryStack;
378+
std::vector<std::tuple<void*, void*, size_t, size_t, uint64_t>> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag
378379
std::vector<GPUMemoryResource*> mNonPersistentIndividualAllocations;
380+
std::vector<std::unique_ptr<char[], alignedDeleter>> mNonPersistentIndividualDirectAllocations;
381+
std::vector<std::unique_ptr<char[], alignedDeleter>> mDirectMemoryChunks;
382+
std::vector<std::unique_ptr<char[], alignedDeleter>> mVolatileChunks;
379383

380384
std::unique_ptr<GPUReconstructionPipelineContext> mPipelineContext;
381385

0 commit comments

Comments
 (0)