Skip to content

Commit bdf6292

Browse files
committed
GPU: fix count of CompressionGatherKernel in timing output
1 parent aa60f5e commit bdf6292

File tree

4 files changed

+13
-14
lines changed

4 files changed

+13
-14
lines changed

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,15 +357,14 @@ GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::insertTimer(uint32_t id,
357357
return retVal;
358358
}
359359

360-
GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::getTimerById(uint32_t id)
360+
GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::getTimerById(uint32_t id, bool increment)
361361
{
362362
timerMeta* retVal = nullptr;
363363
while (timerFlag.test_and_set()) {
364-
;
365364
}
366365
if (mTimers.size() > id && mTimers[id]) {
367366
retVal = mTimers[id].get();
368-
retVal->count++;
367+
retVal->count += increment;
369368
}
370369
timerFlag.clear();
371370
return retVal;

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
177177
RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS];
178178
HighResTimer timerTotal;
179179
template <class T, int32_t I = 0>
180-
HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0);
180+
HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true);
181181
template <class T, int32_t J = -1>
182182
HighResTimer& getTimer(const char* name, int32_t num = -1);
183183

@@ -186,7 +186,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
186186
private:
187187
size_t TransferMemoryResourcesHelper(GPUProcessor* proc, int32_t stream, bool all, bool toGPU);
188188
uint32_t getNextTimerId();
189-
timerMeta* getTimerById(uint32_t id);
189+
timerMeta* getTimerById(uint32_t id, bool increment = true);
190190
timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step);
191191
};
192192

@@ -272,10 +272,10 @@ inline void GPUReconstructionCPU::AddGPUEvents(T*& events)
272272
}
273273

274274
template <class T, int32_t I>
275-
HighResTimer& GPUReconstructionCPU::getKernelTimer(RecoStep step, int32_t num, size_t addMemorySize)
275+
HighResTimer& GPUReconstructionCPU::getKernelTimer(RecoStep step, int32_t num, size_t addMemorySize, bool increment)
276276
{
277277
static int32_t id = getNextTimerId();
278-
timerMeta* timer = getTimerById(id);
278+
timerMeta* timer = getTimerById(id, increment);
279279
if (timer == nullptr) {
280280
timer = insertTimer(id, GetKernelName<T, I>(), -1, NSLICES, 0, step);
281281
}

GPU/GPUTracking/Global/GPUChain.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,9 @@ class GPUChain
190190
}
191191

192192
template <class T, int32_t I = 0>
193-
HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0)
193+
HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true)
194194
{
195-
return mRec->getKernelTimer<T, I>(step, num, addMemorySize);
195+
return mRec->getKernelTimer<T, I>(step, num, addMemorySize, increment);
196196
}
197197
template <class T, int32_t J = -1>
198198
HighResTimer& getTimer(const char* name, int32_t num = -1)

GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,24 +100,24 @@ int32_t GPUChainTracking::RunTPCCompression()
100100
switch (ProcessingSettings().tpcCompressionGatherModeKernel) {
101101
case 0:
102102
runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::unbuffered>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));
103-
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::unbuffered>(RecoStep::TPCCompression, 0, outputSize);
103+
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::unbuffered>(RecoStep::TPCCompression, 0, outputSize, false);
104104
break;
105105
case 1:
106106
runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered32>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));
107-
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered32>(RecoStep::TPCCompression, 0, outputSize);
107+
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered32>(RecoStep::TPCCompression, 0, outputSize, false);
108108
break;
109109
case 2:
110110
runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered64>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));
111-
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered64>(RecoStep::TPCCompression, 0, outputSize);
111+
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered64>(RecoStep::TPCCompression, 0, outputSize, false);
112112
break;
113113
case 3:
114114
runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered128>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));
115-
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered128>(RecoStep::TPCCompression, 0, outputSize);
115+
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered128>(RecoStep::TPCCompression, 0, outputSize, false);
116116
break;
117117
case 4:
118118
static_assert((nBlocksMulti & 1) && nBlocksMulti >= 3);
119119
runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::multiBlock>(GetGridBlkStep(nBlocksMulti, outputStream, RecoStep::TPCCompression));
120-
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::multiBlock>(RecoStep::TPCCompression, 0, outputSize);
120+
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::multiBlock>(RecoStep::TPCCompression, 0, outputSize, false);
121121
break;
122122
default:
123123
GPUError("Invalid compression kernel %d selected.", (int32_t)ProcessingSettings().tpcCompressionGatherModeKernel);

0 commit comments

Comments
 (0)