Skip to content

Commit a108346

Browse files
committed
GPU: Add protections not to do invalid memory allocations while volatile memory is allocated
1 parent 73a0935 commit a108346

File tree

7 files changed

+69
-49
lines changed

7 files changed

+69
-49
lines changed

GPU/GPUTracking/Base/GPUMemoryResource.h

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -56,24 +56,24 @@ class GPUMemoryResource
5656

5757
public:
5858
enum MemoryType {
59-
MEMORY_HOST = 1,
60-
MEMORY_GPU = 2,
61-
MEMORY_INPUT_FLAG = 4,
62-
MEMORY_INPUT = 7,
63-
MEMORY_OUTPUT_FLAG = 8,
64-
MEMORY_OUTPUT = 11,
65-
MEMORY_INOUT = 15,
66-
MEMORY_SCRATCH = 16,
67-
MEMORY_SCRATCH_HOST = 17,
68-
MEMORY_EXTERNAL = 32,
69-
MEMORY_PERMANENT = 64,
70-
MEMORY_CUSTOM = 128,
71-
MEMORY_CUSTOM_TRANSFER = 256,
72-
MEMORY_STACK = 512
59+
MEMORY_HOST = 1, // Memory allocated on host (irrespective of other flags)
60+
MEMORY_GPU = 2, // Memory allocated on GPU (irrespective of other flags)
61+
MEMORY_INPUT_FLAG = 4, // Flag to signal this memory is copied to GPU with TransferMemoryResourcesToGPU, and alike
62+
MEMORY_INPUT = 7, // Input data for GPU has the MEMORY_INPUT_FLAG flat and is allocated on host and GPU
63+
MEMORY_OUTPUT_FLAG = 8, // Flag to signal this memory is copied to Host with TransferMemoryResourcesToHost, and alike
64+
MEMORY_OUTPUT = 11, // Output data for GPU has the MEMORY_OUTPUT_FLAG flat and is allocated on host and GPU
65+
MEMORY_INOUT = 15, // Combination if MEMORY_INPUT and MEMORY_OUTPUT
66+
MEMORY_SCRATCH = 16, // Scratch memory, is allocated only on GPU by default if running on GPU, only on host otherwise, if MEMORY_HOST and MEMORY_GPU flags not set.
67+
MEMORY_SCRATCH_HOST = 17, // Scratch memory only on host
68+
MEMORY_EXTERNAL = 32, // Special flag to signal that memory on host shall not be allocated, but will be provided externally and manually
69+
MEMORY_PERMANENT = 64, // Permanent memory, registered once with AllocateRegisteredPermanentMemory, not per time frame. Only for small sizes!
70+
MEMORY_CUSTOM = 128, // Memory is not allocated automatically with AllocateRegisteredMemory(GPUProcessor), but must be allocated manually via AllocateRegisteredMemory(memoryId)
71+
MEMORY_CUSTOM_TRANSFER = 256, // Memory is not transfered automatically with TransferMemoryResourcesTo, but must be transferred manually with TransferMemoryTo...(memoryId)
72+
MEMORY_STACK = 512 // Use memory from non-persistent stack at the end of the global memory region. Not persistent for full TF. Use PushNonPersistentMemory and PopNonPersistentMemory to release memory from the stack
7373
};
74-
enum AllocationType { ALLOCATION_AUTO = 0,
75-
ALLOCATION_INDIVIDUAL = 1,
76-
ALLOCATION_GLOBAL = 2 };
74+
enum AllocationType { ALLOCATION_AUTO = 0, // --> GLOBAL if GPU is used, INDIVIDUAL otherwise
75+
ALLOCATION_INDIVIDUAL = 1, // Individual memory allocations with malloc (host only)
76+
ALLOCATION_GLOBAL = 2 }; // Allocate memory blocks from large preallocated memory range with internal allocator (host and GPU)
7777

7878
GPUMemoryResource(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), MemoryType type, const char* name = "") : mProcessor(proc), mPtr(nullptr), mPtrDevice(nullptr), mSetPointers(setPtr), mName(name), mSize(0), mOverrideSize(0), mReuse(-1), mType(type)
7979
{

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,10 @@ size_t GPUReconstruction::AllocateRegisteredPermanentMemory()
538538
if (GetProcessingSettings().debugLevel >= 5) {
539539
GPUInfo("Allocating Permanent Memory");
540540
}
541+
if (mVolatileMemoryStart) {
542+
GPUError("Must not allocate permanent memory while volatile chunks are allocated");
543+
throw std::bad_alloc();
544+
}
541545
int32_t total = 0;
542546
for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
543547
if ((mMemoryResources[i].mType & GPUMemoryResource::MEMORY_PERMANENT) && mMemoryResources[i].mPtr == nullptr) {
@@ -669,6 +673,10 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
669673
GPUError("Device Processor not set (%s)", res->mName);
670674
throw std::bad_alloc();
671675
}
676+
if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && !(res->mType & GPUMemoryResource::MEMORY_STACK)) {
677+
GPUError("Must not allocate non-stacked device memory while volatile chunks are allocated");
678+
throw std::bad_alloc();
679+
}
672680
size_t size = AllocateRegisteredMemoryHelper(res, res->mPtrDevice, recPool->mDeviceMemoryPool, recPool->mDeviceMemoryBase, recPool->mDeviceMemorySize, &GPUMemoryResource::SetDevicePointers, recPool->mDeviceMemoryPoolEnd, " gpu");
673681

674682
if (!(res->mType & GPUMemoryResource::MEMORY_HOST) || (res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) {
@@ -702,7 +710,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(int16_t ires, GPUOutputContro
702710
return res->mReuse >= 0 ? 0 : res->mSize;
703711
}
704712

705-
void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type)
713+
void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type)
706714
{
707715
if (type != GPUMemoryResource::MEMORY_HOST && (!IsGPU() || type != GPUMemoryResource::MEMORY_GPU)) {
708716
throw std::runtime_error("Requested invalid memory typo for unmanaged allocation");
@@ -711,6 +719,10 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type)
711719
mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]);
712720
return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mUnmanagedChunks.back().get());
713721
} else {
722+
if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) {
723+
GPUError("Must not allocate direct memory while volatile chunks are allocated");
724+
throw std::bad_alloc();
725+
}
714726
void*& pool = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPool : mHostMemoryPool;
715727
void*& poolend = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPoolEnd : mHostMemoryPoolEnd;
716728
char* retVal;
@@ -745,7 +757,6 @@ void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size)
745757
if (GetProcessingSettings().allocDebugLevel >= 2) {
746758
std::cout << "Allocated (volatile GPU): " << size << " - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
747759
}
748-
749760
return retVal;
750761
}
751762

@@ -758,6 +769,30 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device)
758769
return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mVolatileChunks.back().get());
759770
}
760771

772+
void GPUReconstruction::MakeFutureDeviceMemoryAllocationsVolatile()
773+
{
774+
mDeviceMemoryAsVolatile = true;
775+
AllocateVolatileDeviceMemory(0);
776+
}
777+
778+
void GPUReconstruction::ReturnVolatileDeviceMemory()
779+
{
780+
mDeviceMemoryAsVolatile = false;
781+
if (mVolatileMemoryStart) {
782+
mDeviceMemoryPool = mVolatileMemoryStart;
783+
mVolatileMemoryStart = nullptr;
784+
}
785+
if (GetProcessingSettings().allocDebugLevel >= 2) {
786+
std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
787+
}
788+
}
789+
790+
void GPUReconstruction::ReturnVolatileMemory()
791+
{
792+
ReturnVolatileDeviceMemory();
793+
mVolatileChunks.clear();
794+
}
795+
761796
void GPUReconstruction::ResetRegisteredMemoryPointers(GPUProcessor* proc)
762797
{
763798
for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
@@ -814,23 +849,6 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res)
814849
res->mPtrDevice = nullptr;
815850
}
816851

817-
void GPUReconstruction::ReturnVolatileDeviceMemory()
818-
{
819-
if (mVolatileMemoryStart) {
820-
mDeviceMemoryPool = mVolatileMemoryStart;
821-
mVolatileMemoryStart = nullptr;
822-
}
823-
if (GetProcessingSettings().allocDebugLevel >= 2) {
824-
std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
825-
}
826-
}
827-
828-
void GPUReconstruction::ReturnVolatileMemory()
829-
{
830-
ReturnVolatileDeviceMemory();
831-
mVolatileChunks.clear();
832-
}
833-
834852
void GPUReconstruction::PushNonPersistentMemory(uint64_t tag)
835853
{
836854
mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), tag);

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,10 @@ class GPUReconstruction
166166

167167
size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr);
168168
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction* rec, GPUOutputControl* control = nullptr);
169-
void* AllocateUnmanagedMemory(size_t size, int32_t type);
169+
void* AllocateDirectMemory(size_t size, int32_t type);
170170
void* AllocateVolatileDeviceMemory(size_t size);
171171
void* AllocateVolatileMemory(size_t size, bool device);
172+
void MakeFutureDeviceMemoryAllocationsVolatile();
172173
void FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom = false, bool freePermanent = false);
173174
void FreeRegisteredMemory(int16_t res);
174175
void ClearAllocatedMemory(bool clearOutputs = true);
@@ -326,14 +327,15 @@ class GPUReconstruction
326327
void* mHostMemoryPoolBlocked = nullptr; // Ptr to end of pool
327328
size_t mHostMemorySize = 0; // Size of host memory buffer
328329
size_t mHostMemoryUsedMax = 0; // Maximum host memory size used over time
329-
void* mDeviceMemoryBase = nullptr; //
330-
void* mDeviceMemoryPermanent = nullptr; //
331-
void* mDeviceMemoryPool = nullptr; //
332-
void* mDeviceMemoryPoolEnd = nullptr; //
333-
void* mDeviceMemoryPoolBlocked = nullptr; //
334-
size_t mDeviceMemorySize = 0; //
330+
void* mDeviceMemoryBase = nullptr; // Same for device ...
331+
void* mDeviceMemoryPermanent = nullptr; // ...
332+
void* mDeviceMemoryPool = nullptr; // ...
333+
void* mDeviceMemoryPoolEnd = nullptr; // ...
334+
void* mDeviceMemoryPoolBlocked = nullptr; // ...
335+
size_t mDeviceMemorySize = 0; // ...
336+
size_t mDeviceMemoryUsedMax = 0; // ...
335337
void* mVolatileMemoryStart = nullptr; // Ptr to beginning of temporary volatile memory allocation, nullptr if uninitialized
336-
size_t mDeviceMemoryUsedMax = 0; //
338+
bool mDeviceMemoryAsVolatile = false; // Make device memory allocations volatile
337339

338340
std::unordered_set<const void*> mRegisteredMemoryPtrs; // List of pointers registered for GPU
339341

GPU/GPUTracking/Global/GPUChainITS.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class GPUFrameworkExternalAllocator final : public o2::its::ExternalAllocator
2828
public:
2929
void* allocate(size_t size) override
3030
{
31-
return mFWReco->AllocateUnmanagedMemory(size, GPUMemoryResource::MEMORY_GPU);
31+
return mFWReco->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU);
3232
}
3333

3434
void setReconstructionFramework(o2::gpu::GPUReconstruction* fwr) { mFWReco = fwr; }
@@ -86,7 +86,7 @@ o2::its::TimeFrame* GPUChainITS::GetITSTimeframe()
8686
}
8787
#if !defined(GPUCA_STANDALONE)
8888
if (mITSTimeFrame->mIsGPU) {
89-
auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateUnmanagedMemory(size, GPUMemoryResource::MEMORY_GPU); };
89+
auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU); };
9090

9191
mFrameworkAllocator.reset(new o2::its::GPUFrameworkExternalAllocator);
9292
mFrameworkAllocator->setReconstructionFramework(rec());

GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ int32_t GPUChainTracking::RunTPCCompression()
4343
}
4444

4545
if (gatherMode == 3) {
46-
mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile
46+
mRec->MakeFutureDeviceMemoryAllocationsVolatile();
4747
}
4848
SetupGPUProcessor(&Compressor, true);
4949
new (Compressor.mMemory) GPUTPCCompression::memory;

GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ static inline uint32_t RGB(uint8_t r, uint8_t g, uint8_t b) { return (uint32_t)r
3434
int32_t GPUChainTracking::PrepareProfile()
3535
{
3636
#ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE
37-
char* tmpMem = (char*)mRec->AllocateUnmanagedMemory(PROFILE_MAX_SIZE, GPUMemoryResource::MEMORY_GPU);
37+
char* tmpMem = (char*)mRec->AllocateDirectMemory(PROFILE_MAX_SIZE, GPUMemoryResource::MEMORY_GPU);
3838
processorsShadow()->tpcTrackers[0].mStageAtSync = tmpMem;
3939
runKernel<GPUMemClean16>({{BlockCount(), ThreadCount(), -1}}, tmpMem, PROFILE_MAX_SIZE);
4040
#endif

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
297297
SynchronizeEventAndRelease(mEvents->single, doGPU);
298298

299299
if (GetProcessingSettings().clearO2OutputFromGPU) {
300-
mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile
300+
mRec->MakeFutureDeviceMemoryAllocationsVolatile();
301301
}
302302
AllocateRegisteredMemory(Merger.MemoryResOutputO2(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracksO2)]);
303303
AllocateRegisteredMemory(Merger.MemoryResOutputO2Clus(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracksO2ClusRefs)]);

0 commit comments

Comments
 (0)