Skip to content

Commit 5f90f0c

Browse files
committed
GPU: Solve a todo to make the timer atomic flag a member variable
1 parent 95ae41e commit 5f90f0c

File tree

2 files changed

+20
-23
lines changed

2 files changed

+20
-23
lines changed

GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,24 @@ void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThr
5757
}
5858
}
5959

60-
namespace o2::gpu
61-
{
62-
namespace // anonymous
60+
uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max)
6361
{
64-
static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation
65-
} // anonymous namespace
66-
} // namespace o2::gpu
62+
if (condition && mProcessingSettings.inKernelParallel != 1) {
63+
mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min<uint32_t>(max, mMaxHostThreads) : mMaxHostThreads;
64+
} else {
65+
mNActiveThreadsOuterLoop = 1;
66+
}
67+
if (mProcessingSettings.debugLevel >= 5) {
68+
printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop);
69+
}
70+
return mNActiveThreadsOuterLoop;
71+
}
72+
73+
std::atomic_flag GPUReconstructionProcessing::mTimerFlag = ATOMIC_FLAG_INIT;
6774

6875
GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step)
6976
{
70-
while (timerFlag.test_and_set()) {
77+
while (mTimerFlag.test_and_set()) {
7178
}
7279
if (mTimers.size() <= id) {
7380
mTimers.resize(id + 1);
@@ -81,20 +88,20 @@ GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer
8188
mTimers[id]->count++;
8289
}
8390
timerMeta* retVal = mTimers[id].get();
84-
timerFlag.clear();
91+
mTimerFlag.clear();
8592
return retVal;
8693
}
8794

8895
GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::getTimerById(uint32_t id, bool increment)
8996
{
9097
timerMeta* retVal = nullptr;
91-
while (timerFlag.test_and_set()) {
98+
while (mTimerFlag.test_and_set()) {
9299
}
93100
if (mTimers.size() > id && mTimers[id]) {
94101
retVal = mTimers[id].get();
95102
retVal->count += increment;
96103
}
97-
timerFlag.clear();
104+
mTimerFlag.clear();
98105
return retVal;
99106
}
100107

@@ -104,19 +111,6 @@ uint32_t GPUReconstructionProcessing::getNextTimerId()
104111
return id.fetch_add(1);
105112
}
106113

107-
uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max)
108-
{
109-
if (condition && mProcessingSettings.inKernelParallel != 1) {
110-
mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min<uint32_t>(max, mMaxHostThreads) : mMaxHostThreads;
111-
} else {
112-
mNActiveThreadsOuterLoop = 1;
113-
}
114-
if (mProcessingSettings.debugLevel >= 5) {
115-
printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop);
116-
}
117-
return mNActiveThreadsOuterLoop;
118-
}
119-
120114
std::unique_ptr<gpu_reconstruction_kernels::threadContext> GPUReconstructionProcessing::GetThreadContext()
121115
{
122116
return std::make_unique<gpu_reconstruction_kernels::threadContext>();

GPU/GPUTracking/Base/GPUReconstructionProcessing.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "utils/timer.h"
2222
#include <functional>
23+
#include <atomic>
2324

2425
namespace o2::gpu
2526
{
@@ -135,6 +136,8 @@ class GPUReconstructionProcessing : public GPUReconstruction
135136
uint32_t getNextTimerId();
136137
timerMeta* getTimerById(uint32_t id, bool increment = true);
137138
timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step);
139+
140+
static std::atomic_flag mTimerFlag;
138141
};
139142

140143
template <class T>

0 commit comments

Comments
 (0)