3333#include " GPUConstantMem.h"
3434#include " GPUMemorySizeScalers.h"
3535#include < atomic>
36+ #include < ctime>
3637
3738#define GPUCA_LOGGING_PRINTF
3839#include " GPULogging.h"
@@ -220,7 +221,8 @@ int32_t GPUReconstructionCPU::RunChains()
220221 mStatNEvents ++;
221222 mNEventsProcessed ++;
222223
223- timerTotal.Start ();
224+ mTimerTotal .Start ();
225+ const std::clock_t cpuTimerStart = std::clock ();
224226 if (mProcessingSettings .doublePipeline ) {
225227 int32_t retVal = EnqueuePipeline ();
226228 if (retVal) {
@@ -237,17 +239,18 @@ int32_t GPUReconstructionCPU::RunChains()
237239 }
238240 }
239241 }
240- timerTotal.Stop ();
242+ mTimerTotal .Stop ();
243+ mStatCPUTime += (double )(std::clock () - cpuTimerStart) / CLOCKS_PER_SEC;
241244
242- mStatWallTime = (timerTotal .GetElapsedTime () * 1000000 . / mStatNEvents );
245+ mStatWallTime = (mTimerTotal .GetElapsedTime () * 1000000 . / mStatNEvents );
243246 std::string nEventReport;
244247 if (GetProcessingSettings ().debugLevel >= 0 && mStatNEvents > 1 ) {
245248 nEventReport += " (avergage of " + std::to_string (mStatNEvents ) + " runs)" ;
246249 }
247- if (GetProcessingSettings ().debugLevel >= 1 ) {
248- double kernelTotal = 0 ;
249- std::vector<double > kernelStepTimes (GPUDataTypes::N_RECO_STEPS);
250+ double kernelTotal = 0 ;
251+ std::vector<double > kernelStepTimes (GPUDataTypes::N_RECO_STEPS, 0 .);
250252
253+ if (GetProcessingSettings ().debugLevel >= 1 ) {
251254 for (uint32_t i = 0 ; i < mTimers .size (); i++) {
252255 double time = 0 ;
253256 if (mTimers [i] == nullptr ) {
@@ -277,9 +280,12 @@ int32_t GPUReconstructionCPU::RunChains()
277280 mTimers [i]->memSize = 0 ;
278281 }
279282 }
283+ }
284+ if (GetProcessingSettings ().recoTaskTiming ) {
280285 for (int32_t i = 0 ; i < GPUDataTypes::N_RECO_STEPS; i++) {
281286 if (kernelStepTimes[i] != 0 . || mTimersRecoSteps [i].timerTotal .GetElapsedTime () != 0 .) {
282- printf (" Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us)\n " , " Tasks" , GPUDataTypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents , " " , mTimersRecoSteps [i].timerTotal .GetElapsedTime () * 1000000 / mStatNEvents );
287+ printf (" Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n " , " Tasks" ,
288+ GPUDataTypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents , " " , mTimersRecoSteps [i].timerTotal .GetElapsedTime () * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU / mTimersRecoSteps [i].timerTotal .GetElapsedTime ());
283289 }
284290 if (mTimersRecoSteps [i].bytesToGPU ) {
285291 printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToGPU , " DMA to GPU" , GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1000000 / mStatNEvents ,
@@ -294,6 +300,7 @@ int32_t GPUReconstructionCPU::RunChains()
294300 mTimersRecoSteps [i].timerToGPU .Reset ();
295301 mTimersRecoSteps [i].timerToHost .Reset ();
296302 mTimersRecoSteps [i].timerTotal .Reset ();
303+ mTimersRecoSteps [i].timerCPU = 0 ;
297304 mTimersRecoSteps [i].countToGPU = 0 ;
298305 mTimersRecoSteps [i].countToHost = 0 ;
299306 }
@@ -303,15 +310,18 @@ int32_t GPUReconstructionCPU::RunChains()
303310 printf (" Execution Time: General Step : %50s Time: %'10.0f us\n " , GPUDataTypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps [i].GetElapsedTime () * 1000000 / mStatNEvents );
304311 }
305312 }
306- mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
307- printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Kernel" , mStatKernelTime , nEventReport.c_str ());
308- printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Wall" , mStatWallTime , nEventReport.c_str ());
313+ if (GetProcessingSettings ().debugLevel >= 1 ) {
314+ mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
315+ printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Kernel" , mStatKernelTime , nEventReport.c_str ());
316+ }
317+ printf (" Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n " , " Total Wall" , mStatWallTime , mStatCPUTime * 1000000 / mStatNEvents , mStatCPUTime / mTimerTotal .GetElapsedTime (), nEventReport.c_str ());
309318 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
310- GPUInfo (" Total Wall Time: %lu us%s" , ( uint64_t ) mStatWallTime , nEventReport.c_str ());
319+ GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
311320 }
312321 if (mProcessingSettings .resetTimers ) {
313322 mStatNEvents = 0 ;
314- timerTotal.Reset ();
323+ mStatCPUTime = 0 ;
324+ mTimerTotal .Reset ();
315325 }
316326
317327 return 0 ;
0 commit comments