3535
3636#include < atomic>
3737#include < ctime>
38+ #include < string>
3839
3940#ifndef _WIN32
4041#include < unistd.h>
@@ -230,6 +231,7 @@ int32_t GPUReconstructionCPU::RunChains()
230231 GPUInfo (" Allocated memory when starting processing %34s" , " " );
231232 PrintMemoryOverview ();
232233 }
234+
233235 mTimerTotal .Start ();
234236 const std::clock_t cpuTimerStart = std::clock ();
235237 int32_t retVal = 0 ;
@@ -264,7 +266,10 @@ int32_t GPUReconstructionCPU::RunChains()
264266 double kernelTotal = 0 ;
265267 std::vector<double > kernelStepTimes (gpudatatypes::N_RECO_STEPS, 0 .);
266268
269+ debugWriter writer (GetProcessingSettings ().debugCSV , GetProcessingSettings ().debugMarkdown , mStatNEvents );
270+
267271 if (GetProcessingSettings ().debugLevel >= 1 ) {
272+ writer.header ();
268273 for (uint32_t i = 0 ; i < mTimers .size (); i++) {
269274 double time = 0 ;
270275 if (mTimers [i] == nullptr ) {
@@ -284,11 +289,7 @@ int32_t GPUReconstructionCPU::RunChains()
284289 int32_t stepNum = getRecoStepNum (mTimers [i]->step );
285290 kernelStepTimes[stepNum] += time;
286291 }
287- char bandwidth[256 ] = " " ;
288- if (mTimers [i]->memSize && mStatNEvents && time != 0 .) {
289- snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , mTimers [i]->memSize / time * 1e-9 , mTimers [i]->memSize / mStatNEvents , mTimers [i]->memSize / mStatNEvents / mTimers [i]->count );
290- }
291- printf (" Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n " , type == 0 ? ' K' : ' C' , mTimers [i]->count , mTimers [i]->name .c_str (), time * 1000000 / mStatNEvents , bandwidth);
292+ writer.row (' K' , mTimers [i]->count , mTimers [i]->name .c_str (), time, -1.0 , -1.0 , mTimers [i]->memSize );
292293 if (GetProcessingSettings ().resetTimers ) {
293294 mTimers [i]->count = 0 ;
294295 mTimers [i]->memSize = 0 ;
@@ -298,16 +299,13 @@ int32_t GPUReconstructionCPU::RunChains()
298299 if (GetProcessingSettings ().recoTaskTiming ) {
299300 for (int32_t i = 0 ; i < gpudatatypes::N_RECO_STEPS; i++) {
300301 if (kernelStepTimes[i] != 0 . || mTimersRecoSteps [i].timerTotal .GetElapsedTime () != 0 .) {
301- printf (" Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n " , " Tasks" ,
302- gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents , " " , mTimersRecoSteps [i].timerTotal .GetElapsedTime () * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU / mTimersRecoSteps [i].timerTotal .GetElapsedTime ());
302+ writer.row (' ' , 0 , std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (Tasks)" , kernelStepTimes[i], mTimersRecoSteps [i].timerCPU , mTimersRecoSteps [i].timerTotal .GetElapsedTime (), 0 );
303303 }
304304 if (mTimersRecoSteps [i].bytesToGPU ) {
305- printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToGPU , " DMA to GPU" , gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1000000 / mStatNEvents ,
306- mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToGPU / mStatNEvents , mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].countToGPU );
305+ writer.row (' D' , mTimersRecoSteps [i].countToGPU , std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to GPU)" , mTimersRecoSteps [i].timerToGPU .GetElapsedTime (), -1.0 , -1.0 , mTimersRecoSteps [i].bytesToGPU );
307306 }
308307 if (mTimersRecoSteps [i].bytesToHost ) {
309- printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToHost , " DMA to Host" , gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1000000 / mStatNEvents ,
310- mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToHost / mStatNEvents , mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].countToHost );
308+ writer.row (' D' , mTimersRecoSteps [i].countToHost , std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to Host)" , mTimersRecoSteps [i].timerToHost .GetElapsedTime (), -1.0 , -1.0 , mTimersRecoSteps [i].bytesToHost );
311309 }
312310 if (GetProcessingSettings ().resetTimers ) {
313311 mTimersRecoSteps [i].bytesToGPU = mTimersRecoSteps [i].bytesToHost = 0 ;
@@ -321,14 +319,11 @@ int32_t GPUReconstructionCPU::RunChains()
321319 }
322320 for (int32_t i = 0 ; i < gpudatatypes::N_GENERAL_STEPS; i++) {
323321 if (mTimersGeneralSteps [i].GetElapsedTime () != 0 .) {
324- printf ( " Execution Time: General Step : %50s Time: %'10.0f us \n " , gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps [i].GetElapsedTime () * 1000000 / mStatNEvents );
322+ writer. row ( ' ' , 0 , gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps [i].GetElapsedTime (), - 1.0 , - 1.0 , 0 );
325323 }
326324 }
327- if (GetProcessingSettings ().debugLevel >= 1 ) {
328- mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
329- printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Kernel" , mStatKernelTime , nEventReport.c_str ());
330- }
331- printf (" Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n " , " Total Wall" , mStatWallTime , mStatCPUTime * 1000000 / mStatNEvents , mStatCPUTime / mTimerTotal .GetElapsedTime (), nEventReport.c_str ());
325+ double gpu_time = GetProcessingSettings ().debugLevel >= 1 ? kernelTotal : -1.0 ;
326+ writer.row (' ' , 0 , " Wall" , gpu_time, mStatCPUTime , mTimerTotal .GetElapsedTime (), 0 , nEventReport);
332327 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
333328 GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
334329 }
0 commit comments