@@ -716,8 +716,13 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type)
716716 throw std::runtime_error (" Requested invalid memory typo for unmanaged allocation" );
717717 }
718718 if (GetProcessingSettings ().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
719- mUnmanagedChunks .emplace_back (new char [size + GPUCA_BUFFER_ALIGNMENT]);
720- return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mUnmanagedChunks .back ().get ());
719+ char * retVal = new (std::align_val_t (GPUCA_BUFFER_ALIGNMENT)) char [size];
720+ if ((type & GPUMemoryResource::MEMORY_STACK)) {
721+ mNonPersistentIndividualDirectAllocations .emplace_back (retVal, alignedDeleter ());
722+ } else {
723+ mDirectMemoryChunks .emplace_back (retVal, alignedDeleter ());
724+ }
725+ return retVal;
721726 } else {
722727 if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) {
723728 GPUError (" Must not allocate direct memory while volatile chunks are allocated" );
@@ -765,8 +770,9 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device)
765770 if (device) {
766771 return AllocateVolatileDeviceMemory (size);
767772 }
768- mVolatileChunks .emplace_back (new char [size + GPUCA_BUFFER_ALIGNMENT]);
769- return GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(mVolatileChunks .back ().get ());
773+ char * retVal = new (std::align_val_t (GPUCA_BUFFER_ALIGNMENT)) char [size];
774+ mVolatileChunks .emplace_back (retVal, alignedDeleter ());
775+ return retVal;
770776}
771777
772778void GPUReconstruction::MakeFutureDeviceMemoryAllocationsVolatile ()
@@ -851,7 +857,7 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res)
851857
852858void GPUReconstruction::PushNonPersistentMemory (uint64_t tag)
853859{
854- mNonPersistentMemoryStack .emplace_back (mHostMemoryPoolEnd , mDeviceMemoryPoolEnd , mNonPersistentIndividualAllocations .size (), tag);
860+ mNonPersistentMemoryStack .emplace_back (mHostMemoryPoolEnd , mDeviceMemoryPoolEnd , mNonPersistentIndividualAllocations .size (), mNonPersistentIndividualDirectAllocations . size (), tag);
855861}
856862
857863void GPUReconstruction::PopNonPersistentMemory (RecoStep step, uint64_t tag)
@@ -862,11 +868,11 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
862868 if (mNonPersistentMemoryStack .size () == 0 ) {
863869 GPUFatal (" Trying to pop memory state from empty stack" );
864870 }
865- if (tag != 0 && std::get<3 >(mNonPersistentMemoryStack .back ()) != tag) {
866- GPUFatal (" Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s" , qTag2Str (tag).c_str (), qTag2Str (std::get<3 >(mNonPersistentMemoryStack .back ())).c_str ());
871+ if (tag != 0 && std::get<4 >(mNonPersistentMemoryStack .back ()) != tag) {
872+ GPUFatal (" Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s" , qTag2Str (tag).c_str (), qTag2Str (std::get<4 >(mNonPersistentMemoryStack .back ())).c_str ());
867873 }
868874 if ((GetProcessingSettings ().debugLevel >= 3 || GetProcessingSettings ().allocDebugLevel ) && (IsGPU () || GetProcessingSettings ().forceHostMemoryPoolSize )) {
869- printf (" Allocated memory after %30s (%8s) (Stack %zu): " , GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum (step, true )], qTag2Str (std::get<3 >(mNonPersistentMemoryStack .back ())).c_str (), mNonPersistentMemoryStack .size ());
875+ printf (" Allocated memory after %30s (%8s) (Stack %zu): " , GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum (step, true )], qTag2Str (std::get<4 >(mNonPersistentMemoryStack .back ())).c_str (), mNonPersistentMemoryStack .size ());
870876 PrintMemoryOverview ();
871877 printf (" %76s" , " " );
872878 PrintMemoryMax ();
@@ -882,6 +888,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
882888 res->mPtrDevice = nullptr ;
883889 }
884890 mNonPersistentIndividualAllocations .resize (std::get<2 >(mNonPersistentMemoryStack .back ()));
891+ mNonPersistentIndividualDirectAllocations .resize (std::get<3 >(mNonPersistentMemoryStack .back ()));
885892 mNonPersistentMemoryStack .pop_back ();
886893}
887894
@@ -917,9 +924,11 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs)
917924 FreeRegisteredMemory (i);
918925 }
919926 }
920- mUnmanagedChunks .clear ();
921927 mNonPersistentMemoryStack .clear ();
922928 mNonPersistentIndividualAllocations .clear ();
929+ mDirectMemoryChunks .clear ();
930+ mNonPersistentIndividualDirectAllocations .clear ();
931+ mVolatileChunks .clear ();
923932 mVolatileMemoryStart = nullptr ;
924933 if (GetProcessingSettings ().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
925934 mHostMemoryPool = GPUProcessor::alignPointer<GPUCA_MEMALIGN>(mHostMemoryPermanent );
0 commit comments