Skip to content

Commit 0232107

Browse files
committed
GPU: Add protections not to do invalid memory allocations while volatile memory is allocated
1 parent 73a0935 commit 0232107

File tree

5 files changed

+34
-22
lines changed

5 files changed

+34
-22
lines changed

GPU/GPUTracking/Base/GPUMemoryResource.h

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -56,24 +56,24 @@ class GPUMemoryResource
5656

5757
public:
5858
enum MemoryType {
59-
MEMORY_HOST = 1,
60-
MEMORY_GPU = 2,
61-
MEMORY_INPUT_FLAG = 4,
62-
MEMORY_INPUT = 7,
63-
MEMORY_OUTPUT_FLAG = 8,
64-
MEMORY_OUTPUT = 11,
65-
MEMORY_INOUT = 15,
66-
MEMORY_SCRATCH = 16,
67-
MEMORY_SCRATCH_HOST = 17,
68-
MEMORY_EXTERNAL = 32,
69-
MEMORY_PERMANENT = 64,
70-
MEMORY_CUSTOM = 128,
71-
MEMORY_CUSTOM_TRANSFER = 256,
72-
MEMORY_STACK = 512
59+
MEMORY_HOST = 1, // Memory allocated on host (irrespective of other flags)
60+
MEMORY_GPU = 2, // Memory allocated on GPU (irrespective of other flags)
61+
MEMORY_INPUT_FLAG = 4, // Flag to signal this memory is copied to GPU with TransferMemoryResourcesToGPU, and alike
62+
MEMORY_INPUT = 7, // Input data for GPU has the MEMORY_INPUT_FLAG flat and is allocated on host and GPU
63+
MEMORY_OUTPUT_FLAG = 8, // Flag to signal this memory is copied to Host with TransferMemoryResourcesToHost, and alike
64+
MEMORY_OUTPUT = 11, // Output data for GPU has the MEMORY_OUTPUT_FLAG flat and is allocated on host and GPU
65+
MEMORY_INOUT = 15, // Combination if MEMORY_INPUT and MEMORY_OUTPUT
66+
MEMORY_SCRATCH = 16, // Scratch memory, is allocated only on GPU by default if running on GPU, only on host otherwise, if MEMORY_HOST and MEMORY_GPU flags not set.
67+
MEMORY_SCRATCH_HOST = 17, // Scratch memory only on host
68+
MEMORY_EXTERNAL = 32, // Special flag to signal that memory on host shall not be allocated, but will be provided externally and manually
69+
MEMORY_PERMANENT = 64, // Permanent memory, registered once with AllocateRegisteredPermanentMemory, not per time frame. Only for small sizes!
70+
MEMORY_CUSTOM = 128, // Memory is not allocated automatically with AllocateRegisteredMemory(GPUProcessor), but must be allocated manually via AllocateRegisteredMemory(memoryId)
71+
MEMORY_CUSTOM_TRANSFER = 256, // Memory is not transfered automatically with TransferMemoryResourcesTo, but must be transferred manually with TransferMemoryTo...(memoryId)
72+
MEMORY_STACK = 512 // Use memory from non-persistent stack at the end of the global memory region. Not persistent for full TF. Use PushNonPersistentMemory and PopNonPersistentMemory to release memory from the stack
7373
};
74-
enum AllocationType { ALLOCATION_AUTO = 0,
75-
ALLOCATION_INDIVIDUAL = 1,
76-
ALLOCATION_GLOBAL = 2 };
74+
enum AllocationType { ALLOCATION_AUTO = 0, // --> GLOBAL if GPU is used, INDIVIDUAL otherwise
75+
ALLOCATION_INDIVIDUAL = 1, // Individual memory allocations with malloc (host only)
76+
ALLOCATION_GLOBAL = 2 }; // Allocate memory blocks from large preallocated memory range with internal allocator (host and GPU)
7777

7878
GPUMemoryResource(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), MemoryType type, const char* name = "") : mProcessor(proc), mPtr(nullptr), mPtrDevice(nullptr), mSetPointers(setPtr), mName(name), mSize(0), mOverrideSize(0), mReuse(-1), mType(type)
7979
{

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,10 @@ size_t GPUReconstruction::AllocateRegisteredPermanentMemory()
538538
if (GetProcessingSettings().debugLevel >= 5) {
539539
GPUInfo("Allocating Permanent Memory");
540540
}
541+
if (mVolatileMemoryStart) {
542+
GPUError("Must not allocate permanent memory while volatile chunks are allocated");
543+
throw std::bad_alloc();
544+
}
541545
int32_t total = 0;
542546
for (uint32_t i = 0; i < mMemoryResources.size(); i++) {
543547
if ((mMemoryResources[i].mType & GPUMemoryResource::MEMORY_PERMANENT) && mMemoryResources[i].mPtr == nullptr) {
@@ -669,6 +673,10 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
669673
GPUError("Device Processor not set (%s)", res->mName);
670674
throw std::bad_alloc();
671675
}
676+
if (mVolatileMemoryStart && !(res->mType & GPUMemoryResource::MEMORY_STACK)) {
677+
GPUError("Must not allocate non-stacked device memory while volatile chunks are allocated");
678+
throw std::bad_alloc();
679+
}
672680
size_t size = AllocateRegisteredMemoryHelper(res, res->mPtrDevice, recPool->mDeviceMemoryPool, recPool->mDeviceMemoryBase, recPool->mDeviceMemorySize, &GPUMemoryResource::SetDevicePointers, recPool->mDeviceMemoryPoolEnd, " gpu");
673681

674682
if (!(res->mType & GPUMemoryResource::MEMORY_HOST) || (res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) {
@@ -702,7 +710,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(int16_t ires, GPUOutputContro
702710
return res->mReuse >= 0 ? 0 : res->mSize;
703711
}
704712

705-
void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type)
713+
void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type)
706714
{
707715
if (type != GPUMemoryResource::MEMORY_HOST && (!IsGPU() || type != GPUMemoryResource::MEMORY_GPU)) {
708716
throw std::runtime_error("Requested invalid memory typo for unmanaged allocation");
@@ -711,6 +719,10 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type)
711719
mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]);
712720
return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mUnmanagedChunks.back().get());
713721
} else {
722+
if (mVolatileMemoryStart && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) {
723+
GPUError("Must not allocate direct memory while volatile chunks are allocated");
724+
throw std::bad_alloc();
725+
}
714726
void*& pool = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPool : mHostMemoryPool;
715727
void*& poolend = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPoolEnd : mHostMemoryPoolEnd;
716728
char* retVal;

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ class GPUReconstruction
166166

167167
size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr);
168168
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction* rec, GPUOutputControl* control = nullptr);
169-
void* AllocateUnmanagedMemory(size_t size, int32_t type);
169+
void* AllocateDirectMemory(size_t size, int32_t type);
170170
void* AllocateVolatileDeviceMemory(size_t size);
171171
void* AllocateVolatileMemory(size_t size, bool device);
172172
void FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom = false, bool freePermanent = false);

GPU/GPUTracking/Global/GPUChainITS.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class GPUFrameworkExternalAllocator final : public o2::its::ExternalAllocator
2828
public:
2929
void* allocate(size_t size) override
3030
{
31-
return mFWReco->AllocateUnmanagedMemory(size, GPUMemoryResource::MEMORY_GPU);
31+
return mFWReco->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU);
3232
}
3333

3434
void setReconstructionFramework(o2::gpu::GPUReconstruction* fwr) { mFWReco = fwr; }
@@ -86,7 +86,7 @@ o2::its::TimeFrame* GPUChainITS::GetITSTimeframe()
8686
}
8787
#if !defined(GPUCA_STANDALONE)
8888
if (mITSTimeFrame->mIsGPU) {
89-
auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateUnmanagedMemory(size, GPUMemoryResource::MEMORY_GPU); };
89+
auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU); };
9090

9191
mFrameworkAllocator.reset(new o2::its::GPUFrameworkExternalAllocator);
9292
mFrameworkAllocator->setReconstructionFramework(rec());

GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ static inline uint32_t RGB(uint8_t r, uint8_t g, uint8_t b) { return (uint32_t)r
3434
int32_t GPUChainTracking::PrepareProfile()
3535
{
3636
#ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE
37-
char* tmpMem = (char*)mRec->AllocateUnmanagedMemory(PROFILE_MAX_SIZE, GPUMemoryResource::MEMORY_GPU);
37+
char* tmpMem = (char*)mRec->AllocateDirectMemory(PROFILE_MAX_SIZE, GPUMemoryResource::MEMORY_GPU);
3838
processorsShadow()->tpcTrackers[0].mStageAtSync = tmpMem;
3939
runKernel<GPUMemClean16>({{BlockCount(), ThreadCount(), -1}}, tmpMem, PROFILE_MAX_SIZE);
4040
#endif

0 commit comments

Comments
 (0)