@@ -60,6 +60,21 @@ GPUReconstructionCPU::~GPUReconstructionCPU()
6060 Exit (); // Needs to be identical to GPU backend bahavior in order to avoid calling abstract methods later in the destructor
6161}
6262
63+ int32_t GPUReconstructionCPUBackend::getNOMPThreads ()
64+ {
65+ int32_t ompThreads = 0 ;
66+ if (mProcessingSettings .ompKernels == 2 ) {
67+ ompThreads = mProcessingSettings .ompThreads / mNestedLoopOmpFactor ;
68+ if ((uint32_t )getOMPThreadNum () < mProcessingSettings .ompThreads % mNestedLoopOmpFactor ) {
69+ ompThreads++;
70+ }
71+ ompThreads = std::max (1 , ompThreads);
72+ } else {
73+ ompThreads = mProcessingSettings .ompKernels ? mProcessingSettings .ompThreads : 1 ;
74+ }
75+ return ompThreads;
76+ }
77+
6378template <class T , int32_t I, typename ... Args>
6479inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal (const krnlSetupTime& _xyz, const Args&... args)
6580{
@@ -73,16 +88,7 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS
7388 }
7489 uint32_t num = y.num == 0 || y.num == -1 ? 1 : y.num ;
7590 for (uint32_t k = 0 ; k < num; k++) {
76- int32_t ompThreads = 0 ;
77- if (mProcessingSettings .ompKernels == 2 ) {
78- ompThreads = mProcessingSettings .ompThreads / mNestedLoopOmpFactor ;
79- if ((uint32_t )getOMPThreadNum () < mProcessingSettings .ompThreads % mNestedLoopOmpFactor ) {
80- ompThreads++;
81- }
82- ompThreads = std::max (1 , ompThreads);
83- } else {
84- ompThreads = mProcessingSettings .ompKernels ? mProcessingSettings .ompThreads : 1 ;
85- }
91+ int32_t ompThreads = getNOMPThreads ();
8692 if (ompThreads > 1 ) {
8793 if (mProcessingSettings .debugLevel >= 5 ) {
8894 printf (" Running %d ompThreads\n " , ompThreads);
@@ -105,7 +111,12 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS
105111template <>
106112inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean16, 0 >(const krnlSetupTime& _xyz, void * const & ptr, uint64_t const & size)
107113{
108- memset (ptr, 0 , size);
114+ int32_t ompThreads = std::max<int32_t >(1 , std::min<int32_t >(size / (16 * 1024 * 1024 ), getNOMPThreads ()));
115+ if (ompThreads > 1 ) {
116+ memset (ptr, 0 , size);
117+ } else {
118+ memset (ptr, 0 , size);
119+ }
109120 return 0 ;
110121}
111122
0 commit comments