Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 22 additions & 11 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,21 @@ GPUReconstructionCPU::~GPUReconstructionCPU()
Exit(); // Needs to be identical to GPU backend bahavior in order to avoid calling abstract methods later in the destructor
}

int32_t GPUReconstructionCPUBackend::getNOMPThreads()
{
int32_t ompThreads = 0;
if (mProcessingSettings.ompKernels == 2) {
ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor;
if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) {
ompThreads++;
}
ompThreads = std::max(1, ompThreads);
} else {
ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1;
}
return ompThreads;
}

template <class T, int32_t I, typename... Args>
inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args)
{
Expand All @@ -73,16 +88,7 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS
}
uint32_t num = y.num == 0 || y.num == -1 ? 1 : y.num;
for (uint32_t k = 0; k < num; k++) {
int32_t ompThreads = 0;
if (mProcessingSettings.ompKernels == 2) {
ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor;
if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) {
ompThreads++;
}
ompThreads = std::max(1, ompThreads);
} else {
ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1;
}
int32_t ompThreads = getNOMPThreads();
if (ompThreads > 1) {
if (mProcessingSettings.debugLevel >= 5) {
printf("Running %d ompThreads\n", ompThreads);
Expand All @@ -105,7 +111,12 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS
template <>
inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
{
memset(ptr, 0, size);
int32_t ompThreads = std::max<int32_t>(1, std::min<int32_t>(size / (16 * 1024 * 1024), getNOMPThreads()));
if (ompThreads > 1) {
memset(ptr, 0, size);
} else {
memset(ptr, 0, size);
}
return 0;
}

Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class GPUReconstructionCPUBackend : public GPUReconstruction
uint32_t mNestedLoopOmpFactor = 1;
static int32_t getOMPThreadNum();
static int32_t getOMPMaxThreads();
int32_t getNOMPThreads();
};

class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCPUBackend>
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o
COMMAND cp -u $<TARGET_OBJECTS:GPUTrackingCUDAKernels> ${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/
COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o $<PATH:RELATIVE_PATH,$<LIST:TRANSFORM,$<PATH:GET_FILENAME,$<TARGET_OBJECTS:GPUTrackingCUDAKernels>>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}>
COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o $<PATH:RELATIVE_PATH,$<LIST:TRANSFORM,$<PATH:GET_FILENAME,$<TARGET_OBJECTS:GPUTrackingCUDAKernels>>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}>
DEPENDS GPUTrackingCUDAKernels $<TARGET_OBJECTS:GPUTrackingCUDAKernels>
COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o"
VERBATIM
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o
COMMAND cp -u $<TARGET_OBJECTS:GPUTrackingHIPKernels> ${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/
COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o $<PATH:RELATIVE_PATH,$<LIST:TRANSFORM,$<PATH:GET_FILENAME,$<TARGET_OBJECTS:GPUTrackingHIPKernels>>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}>
COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o $<PATH:RELATIVE_PATH,$<LIST:TRANSFORM,$<PATH:GET_FILENAME,$<TARGET_OBJECTS:GPUTrackingHIPKernels>>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}>
DEPENDS GPUTrackingHIPKernels $<TARGET_OBJECTS:GPUTrackingHIPKernels>
COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o"
VERBATIM
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ function(create_binary_resource RESOURCE OUTPUTFILE)
FILE(RELATIVE_PATH input-file-rel ${CMAKE_CURRENT_BINARY_DIR} ${input-file-abs})
add_custom_command(
OUTPUT ${OUTPUTFILE}
COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${OUTPUTFILE} ${input-file-rel}
COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${OUTPUTFILE} ${input-file-rel}
DEPENDS ${input-file-rel}
COMMENT "Adding binary resource ${input-file-rel}"
VERBATIM
Expand Down