Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu
int32_t nThreads = getNKernelHostThreads(false);
if (nThreads > 1) {
if (mProcessingSettings.debugLevel >= 5) {
printf("Running %d Threads\n", nThreads);
printf("Running %d Threads\n", mThreading->activeThreads->max_concurrency());
}
tbb::this_task_arena::isolate([&] {
mThreading->activeThreads->execute([&] {
Expand All @@ -91,10 +91,10 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu
template <>
inline void GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
{
int32_t nnThreads = std::max<int32_t>(1, std::min<int32_t>(size / (16 * 1024 * 1024), getNKernelHostThreads(true)));
if (nnThreads > 1) {
tbb::parallel_for(0, nnThreads, [&](int iThread) {
size_t threadSize = size / nnThreads;
int32_t nThreads = std::max<int32_t>(1, std::min<int32_t>(size / (16 * 1024 * 1024), getNKernelHostThreads(true)));
if (nThreads > 1) {
tbb::parallel_for(0, nThreads, [&](int iThread) {
size_t threadSize = size / nThreads;
if (threadSize % 4096) {
threadSize += 4096 - threadSize % 4096;
}
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ add_custom_command(
create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o)

add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done
COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain standard headers 1>&2 && exit 1"
COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src"
COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain system headers 1>&2 && exit 1"
COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src for system headers"
DEPENDS ${GPU_RTC_BIN}.src VERBATIM)
add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done)

Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ add_custom_command(
create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o)

add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done
COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain standard headers 1>&2 && exit 1"
COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src"
COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain system headers 1>&2 && exit 1"
COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src for system headers"
DEPENDS ${GPU_RTC_BIN}.src VERBATIM)
add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done)

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Global/GPUChainTracking.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ bool GPUChainTracking::ValidateSettings()
return false;
}
if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > (int32_t)mRec->NStreams()) {
GPUError("NStreams (%d) must be > nTPCClustererLanes (%d)", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes);
GPUError("NStreams of %d insufficient for %d nTPCClustererLanes", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes);
return false;
}
if (GetProcessingSettings().noGPUMemoryRegistration && GetProcessingSettings().tpcCompressionGatherMode != 3) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/cmake/kernel_helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ function(o2_gpu_add_kernel kernel_name kernel_files)
set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_NAMES "${kernel_name}")
set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_INCLUDES "${TMP_KERNEL_CLASS_FILE}")
set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_FILES "${TMP_KERNEL_CLASS_FILE}.cxx")
# add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround

set(O2_GPU_KERNEL_TEMPLATE_FILES "GPUConstantMem.h")
if (GPUCA_BUILD_DEBUG)
Expand All @@ -102,6 +101,7 @@ function(o2_gpu_add_kernel kernel_name kernel_files)
list(TRANSFORM O2_GPU_KERNEL_TEMPLATE_FILES PREPEND "#include \"")
list(JOIN O2_GPU_KERNEL_TEMPLATE_FILES "\n" O2_GPU_KERNEL_TEMPLATE_FILES)

# add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround to create CUDA and HIP files
string(REPLACE ", " "_" TMP_FILENAME "${kernel_name}")
if(CUDA_ENABLED)
set(TMP_FILENAMEA "${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${TMP_FILENAME}.cu")
Expand Down