3030#include " GPUTRDTrackletLabels.h"
3131#include " GPUMemoryResource.h"
3232#include " GPUConstantMem.h"
33+ #include " GPULogging.h"
3334#include " GPUMemorySizeScalers.h"
35+ #include " GPUReconstructionProcessingKernels.inc"
36+
3437#include < atomic>
3538#include < ctime>
3639
37- #define GPUCA_LOGGING_PRINTF
38- #include " GPULogging.h"
39-
4040#ifndef _WIN32
4141#include < unistd.h>
4242#endif
4343
4444using namespace o2 ::gpu;
45- using namespace o2 ::gpu::gpu_reconstruction_kernels;
4645
4746constexpr GPUReconstructionCPU::krnlRunRange GPUReconstructionCPU::krnlRunRangeNone;
4847constexpr GPUReconstructionCPU::krnlEvent GPUReconstructionCPU::krnlEventNone;
@@ -55,7 +54,7 @@ GPUReconstructionCPU::~GPUReconstructionCPU()
5554}
5655
5756template <class T , int32_t I, typename ... Args>
58- inline void GPUReconstructionCPUBackend ::runKernelBackendInternal (const krnlSetupTime& _xyz, const Args&... args)
57+ inline void GPUReconstructionCPU ::runKernelBackendInternal (const krnlSetupTime& _xyz, const Args&... args)
5958{
6059 auto & x = _xyz.x ;
6160 auto & y = _xyz.y ;
@@ -67,7 +66,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu
6766 }
6867 int32_t nThreads = getNKernelHostThreads (false );
6968 if (nThreads > 1 ) {
70- if (mProcessingSettings .debugLevel >= 5 ) {
69+ if (GetProcessingSettings () .debugLevel >= 5 ) {
7170 printf (" Running %d Threads\n " , mThreading ->activeThreads ->max_concurrency ());
7271 }
7372 tbb::this_task_arena::isolate ([&] {
@@ -89,7 +88,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu
8988}
9089
9190template <>
92- inline void GPUReconstructionCPUBackend ::runKernelBackendInternal<GPUMemClean16, 0 >(const krnlSetupTime& _xyz, void * const & ptr, uint64_t const & size)
91+ inline void GPUReconstructionCPU ::runKernelBackendInternal<GPUMemClean16, 0 >(const krnlSetupTime& _xyz, void * const & ptr, uint64_t const & size)
9392{
9493 int32_t nThreads = std::max<int32_t >(1 , std::min<int32_t >(size / (16 * 1024 * 1024 ), getNKernelHostThreads (true )));
9594 if (nThreads > 1 ) {
@@ -110,7 +109,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean16,
110109}
111110
112111template <class T , int32_t I, typename ... Args>
113- void GPUReconstructionCPUBackend ::runKernelBackend (const krnlSetupArgs<T, I, Args...>& args)
112+ void GPUReconstructionCPU ::runKernelBackend (const krnlSetupArgs<T, I, Args...>& args)
114113{
115114#pragma GCC diagnostic push
116115#if defined(__clang__)
@@ -121,14 +120,14 @@ void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs<T, I, Arg
121120}
122121
123122template <class S , int32_t I>
124- gpu_reconstruction_kernels ::krnlProperties GPUReconstructionCPU::getKernelProperties (int gpu)
123+ GPUReconstructionProcessing ::krnlProperties GPUReconstructionCPU::getKernelProperties (int gpu)
125124{
126125 if (gpu == -1 ) {
127126 gpu = IsGPU ();
128127 }
129128 const auto num = GetKernelNum<S, I>();
130129 const auto * p = gpu ? mParDevice : mParCPU ;
131- gpu_reconstruction_kernels ::krnlProperties ret = {p->par_LB_maxThreads [num], p->par_LB_minBlocks [num], p->par_LB_forceBlocks [num]};
130+ GPUReconstructionProcessing ::krnlProperties ret = {p->par_LB_maxThreads [num], p->par_LB_minBlocks [num], p->par_LB_forceBlocks [num]};
132131 if (ret.nThreads == 0 ) {
133132 ret.nThreads = gpu ? mThreadCount : 1u ;
134133 }
@@ -138,9 +137,9 @@ gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProper
138137 return ret;
139138}
140139
141- #define GPUCA_KRNL (x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
142- template void GPUReconstructionCPUBackend ::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args); \
143- template krnlProperties GPUReconstructionCPU::getKernelProperties<GPUCA_M_KRNL_TEMPLATE(x_class)>(int gpu);
140+ #define GPUCA_KRNL (x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
141+ template void GPUReconstructionCPU ::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args); \
142+ template GPUReconstructionProcessing:: krnlProperties GPUReconstructionCPU::getKernelProperties<GPUCA_M_KRNL_TEMPLATE(x_class)>(int gpu);
144143#include " GPUReconstructionKernelList.h"
145144#undef GPUCA_KRNL
146145
@@ -169,7 +168,7 @@ size_t GPUReconstructionCPU::TransferMemoryResourcesHelper(GPUProcessor* proc, i
169168 if (!(res.mType & GPUMemoryResource::MEMORY_GPU) || (res.mType & GPUMemoryResource::MEMORY_CUSTOM_TRANSFER)) {
170169 continue ;
171170 }
172- if (!mProcessingSettings .keepAllMemory && !all && (res.mType & exc) && !(res.mType & inc)) {
171+ if (!GetProcessingSettings () .keepAllMemory && !all && (res.mType & exc) && !(res.mType & inc)) {
173172 continue ;
174173 }
175174 if (toGPU) {
@@ -197,7 +196,7 @@ int32_t GPUReconstructionCPU::InitDevice()
197196{
198197 mActiveHostKernelThreads = mMaxHostThreads ;
199198 mThreading ->activeThreads = std::make_unique<tbb::task_arena>(mActiveHostKernelThreads );
200- if (mProcessingSettings .memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
199+ if (GetProcessingSettings () .memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
201200 if (mMaster == nullptr ) {
202201 if (mDeviceMemorySize > mHostMemorySize ) {
203202 mHostMemorySize = mDeviceMemorySize ;
@@ -207,7 +206,7 @@ int32_t GPUReconstructionCPU::InitDevice()
207206 mHostMemoryPermanent = mHostMemoryBase ;
208207 ClearAllocatedMemory ();
209208 }
210- if (mProcessingSettings .inKernelParallel ) {
209+ if (GetProcessingSettings () .inKernelParallel ) {
211210 mBlockCount = mMaxHostThreads ;
212211 }
213212 mProcShadow .mProcessorsProc = processors ();
@@ -216,7 +215,7 @@ int32_t GPUReconstructionCPU::InitDevice()
216215
217216int32_t GPUReconstructionCPU::ExitDevice ()
218217{
219- if (mProcessingSettings .memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
218+ if (GetProcessingSettings () .memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
220219 if (mMaster == nullptr ) {
221220 operator delete (mHostMemoryBase , std::align_val_t (GPUCA_BUFFER_ALIGNMENT));
222221 }
@@ -232,13 +231,13 @@ int32_t GPUReconstructionCPU::RunChains()
232231 mStatNEvents ++;
233232 mNEventsProcessed ++;
234233
235- if (mProcessingSettings .debugLevel >= 3 || mProcessingSettings .allocDebugLevel ) {
234+ if (GetProcessingSettings () .debugLevel >= 3 || GetProcessingSettings () .allocDebugLevel ) {
236235 printf (" Allocated memory when starting processing %34s" , " " );
237236 PrintMemoryOverview ();
238237 }
239238 mTimerTotal .Start ();
240239 const std::clock_t cpuTimerStart = std::clock ();
241- if (mProcessingSettings .doublePipeline ) {
240+ if (GetProcessingSettings () .doublePipeline ) {
242241 int32_t retVal = EnqueuePipeline ();
243242 if (retVal) {
244243 return retVal;
@@ -259,7 +258,7 @@ int32_t GPUReconstructionCPU::RunChains()
259258 }
260259 mTimerTotal .Stop ();
261260 mStatCPUTime += (double )(std::clock () - cpuTimerStart) / CLOCKS_PER_SEC;
262- if (mProcessingSettings .debugLevel >= 3 || mProcessingSettings .allocDebugLevel ) {
261+ if (GetProcessingSettings () .debugLevel >= 3 || GetProcessingSettings () .allocDebugLevel ) {
263262 printf (" Allocated memory when ending processing %36s" , " " );
264263 PrintMemoryOverview ();
265264 }
@@ -281,7 +280,7 @@ int32_t GPUReconstructionCPU::RunChains()
281280 for (int32_t j = 0 ; j < mTimers [i]->num ; j++) {
282281 HighResTimer& timer = mTimers [i]->timer [j];
283282 time += timer.GetElapsedTime ();
284- if (mProcessingSettings .resetTimers ) {
283+ if (GetProcessingSettings () .resetTimers ) {
285284 timer.Reset ();
286285 }
287286 }
@@ -297,7 +296,7 @@ int32_t GPUReconstructionCPU::RunChains()
297296 snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , mTimers [i]->memSize / time * 1e-9 , mTimers [i]->memSize / mStatNEvents , mTimers [i]->memSize / mStatNEvents / mTimers [i]->count );
298297 }
299298 printf (" Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n " , type == 0 ? ' K' : ' C' , mTimers [i]->count , mTimers [i]->name .c_str (), time * 1000000 / mStatNEvents , bandwidth);
300- if (mProcessingSettings .resetTimers ) {
299+ if (GetProcessingSettings () .resetTimers ) {
301300 mTimers [i]->count = 0 ;
302301 mTimers [i]->memSize = 0 ;
303302 }
@@ -317,7 +316,7 @@ int32_t GPUReconstructionCPU::RunChains()
317316 printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToHost , " DMA to Host" , GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1000000 / mStatNEvents ,
318317 mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToHost / mStatNEvents , mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].countToHost );
319318 }
320- if (mProcessingSettings .resetTimers ) {
319+ if (GetProcessingSettings () .resetTimers ) {
321320 mTimersRecoSteps [i].bytesToGPU = mTimersRecoSteps [i].bytesToHost = 0 ;
322321 mTimersRecoSteps [i].timerToGPU .Reset ();
323322 mTimersRecoSteps [i].timerToHost .Reset ();
@@ -340,7 +339,7 @@ int32_t GPUReconstructionCPU::RunChains()
340339 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
341340 GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
342341 }
343- if (mProcessingSettings .resetTimers ) {
342+ if (GetProcessingSettings () .resetTimers ) {
344343 mStatNEvents = 0 ;
345344 mStatCPUTime = 0 ;
346345 mTimerTotal .Reset ();
@@ -366,7 +365,7 @@ void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, cons
366365 if (!((size_t )¶m ().occupancyTotal - (size_t )¶m ().occupancyMap == sizeof (param ().occupancyMap ) && sizeof (param ().occupancyMap ) == sizeof (size_t ) && sizeof (param ().occupancyTotal ) < sizeof (size_t ))) {
367366 throw std::runtime_error (" occupancy data not consecutive in GPUParam" );
368367 }
369- const auto threadContext = GetThreadContext ();
368+ const auto holdContext = GetThreadContext ();
370369 size_t tmp[2 ] = {(size_t )mapGPU, 0 };
371370 memcpy (&tmp[1 ], &occupancyTotal, sizeof (occupancyTotal));
372371 WriteToConstantMemory ((char *)&processors ()->param .occupancyMap - (char *)processors (), &tmp, sizeof (param ().occupancyMap ) + sizeof (param ().occupancyTotal ), stream);
0 commit comments