Skip to content

Commit 5835f44

Browse files
committed
GPU: Preparation to make memset on host multi-threaded
1 parent 5399339 commit 5835f44

File tree

2 files changed

+23
-11
lines changed

2 files changed

+23
-11
lines changed

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,21 @@ GPUReconstructionCPU::~GPUReconstructionCPU()
6060
Exit(); // Needs to be identical to GPU backend bahavior in order to avoid calling abstract methods later in the destructor
6161
}
6262

63+
int32_t GPUReconstructionCPUBackend::getNOMPThreads()
64+
{
65+
int32_t ompThreads = 0;
66+
if (mProcessingSettings.ompKernels == 2) {
67+
ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor;
68+
if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) {
69+
ompThreads++;
70+
}
71+
ompThreads = std::max(1, ompThreads);
72+
} else {
73+
ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1;
74+
}
75+
return ompThreads;
76+
}
77+
6378
template <class T, int32_t I, typename... Args>
6479
inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args)
6580
{
@@ -73,16 +88,7 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS
7388
}
7489
uint32_t num = y.num == 0 || y.num == -1 ? 1 : y.num;
7590
for (uint32_t k = 0; k < num; k++) {
76-
int32_t ompThreads = 0;
77-
if (mProcessingSettings.ompKernels == 2) {
78-
ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor;
79-
if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) {
80-
ompThreads++;
81-
}
82-
ompThreads = std::max(1, ompThreads);
83-
} else {
84-
ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1;
85-
}
91+
int32_t ompThreads = getNOMPThreads();
8692
if (ompThreads > 1) {
8793
if (mProcessingSettings.debugLevel >= 5) {
8894
printf("Running %d ompThreads\n", ompThreads);
@@ -105,7 +111,12 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS
105111
template <>
106112
inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
107113
{
108-
memset(ptr, 0, size);
114+
int32_t ompThreads = std::max<int32_t>(1, std::min<int32_t>(size / (16 * 1024 * 1024), getNOMPThreads()));
115+
if (ompThreads > 1) {
116+
memset(ptr, 0, size);
117+
} else {
118+
memset(ptr, 0, size);
119+
}
109120
return 0;
110121
}
111122

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class GPUReconstructionCPUBackend : public GPUReconstruction
4646
uint32_t mNestedLoopOmpFactor = 1;
4747
static int32_t getOMPThreadNum();
4848
static int32_t getOMPMaxThreads();
49+
int32_t getNOMPThreads();
4950
};
5051

5152
class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCPUBackend>

0 commit comments

Comments
 (0)