Skip to content

Commit e382946

Browse files
committed
GPU: Add GPUCA_RTC_NO_COMPILED_KERNELS debug option
1 parent 396c94a commit e382946

File tree

4 files changed

+55
-39
lines changed

4 files changed

+55
-39
lines changed

GPU/GPUTracking/Base/cuda/CMakeLists.txt

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -159,25 +159,29 @@ endif()
159159
if(GPUCA_CUDA_COMPILE_MODE STREQUAL "onefile")
160160
target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=0)
161161
elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")
162-
add_library(GPUTrackingCUDAKernels OBJECT $<JOIN:$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NAMES>,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.cu>, >)
163-
set_property(TARGET GPUTrackingCUDAKernels PROPERTY CUDA_FATBIN_COMPILATION ON)
164-
set_property(TARGET GPUTrackingCUDAKernels PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
165162
target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=1)
166-
target_compile_definitions(GPUTrackingCUDAKernels PRIVATE $<TARGET_PROPERTY:${targetName},COMPILE_DEFINITIONS>)
167-
target_include_directories(GPUTrackingCUDAKernels PRIVATE $<TARGET_PROPERTY:${targetName},INCLUDE_DIRECTORIES>)
168-
target_link_libraries(GPUTrackingCUDAKernels PRIVATE $<TARGET_PROPERTY:${targetName},LINK_LIBRARIES>)
169-
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin)
170-
add_custom_command(
171-
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o
172-
COMMAND cp -u $<TARGET_OBJECTS:GPUTrackingCUDAKernels> ${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/
173-
COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o $<PATH:RELATIVE_PATH,$<LIST:TRANSFORM,$<PATH:GET_FILENAME,$<TARGET_OBJECTS:GPUTrackingCUDAKernels>>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}>
174-
DEPENDS GPUTrackingCUDAKernels $<TARGET_OBJECTS:GPUTrackingCUDAKernels>
175-
COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o"
176-
VERBATIM
177-
COMMAND_EXPAND_LISTS
178-
)
179-
target_sources(${targetName} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o)
180-
set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o PROPERTIES EXTERNAL_OBJECT true GENERATED true)
163+
if(NOT DEFINED GPUCA_RTC_NO_COMPILED_KERNELS OR NOT GPUCA_RTC_NO_COMPILED_KERNELS)
164+
add_library(GPUTrackingCUDAKernels OBJECT $<JOIN:$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NAMES>,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.cu>, >)
165+
set_property(TARGET GPUTrackingCUDAKernels PROPERTY CUDA_FATBIN_COMPILATION ON)
166+
set_property(TARGET GPUTrackingCUDAKernels PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
167+
target_compile_definitions(GPUTrackingCUDAKernels PRIVATE $<TARGET_PROPERTY:${targetName},COMPILE_DEFINITIONS>)
168+
target_include_directories(GPUTrackingCUDAKernels PRIVATE $<TARGET_PROPERTY:${targetName},INCLUDE_DIRECTORIES>)
169+
target_link_libraries(GPUTrackingCUDAKernels PRIVATE $<TARGET_PROPERTY:${targetName},LINK_LIBRARIES>)
170+
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin)
171+
add_custom_command(
172+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o
173+
COMMAND cp -u $<TARGET_OBJECTS:GPUTrackingCUDAKernels> ${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/
174+
COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o $<PATH:RELATIVE_PATH,$<LIST:TRANSFORM,$<PATH:GET_FILENAME,$<TARGET_OBJECTS:GPUTrackingCUDAKernels>>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}>
175+
DEPENDS GPUTrackingCUDAKernels $<TARGET_OBJECTS:GPUTrackingCUDAKernels>
176+
COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o"
177+
VERBATIM
178+
COMMAND_EXPAND_LISTS
179+
)
180+
target_sources(${targetName} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o)
181+
set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o PROPERTIES EXTERNAL_OBJECT true GENERATED true)
182+
else()
183+
set_source_files_properties(GPUReconstructionCUDA.cu PROPERTIES COMPILE_DEFINITIONS GPUCA_RTC_NO_COMPILED_KERNELS)
184+
endif()
181185

182186
elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc")
183187
message(FATAL_ERROR "CUDA RDC compilation of GPUReconstruction ios not yet working!")

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@
3434
#else // HIP
3535
#define PER_KERNEL_OBJECT_EXT _hip_o
3636
#endif
37+
#ifdef GPUCA_RTC_NO_COMPILED_KERNELS
38+
#define GPUCA_KRNL(x_class, ...) static void* GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)) = nullptr;
39+
#else
3740
#define GPUCA_KRNL(x_class, ...) QGET_LD_BINARY_SYMBOLS(GPUCA_M_CAT3(cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), PER_KERNEL_OBJECT_EXT))
41+
#endif
3842
#include "GPUReconstructionKernelList.h"
3943
#undef GPUCA_KRNL
4044
#endif
@@ -335,6 +339,9 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
335339
}
336340
#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1
337341
else {
342+
#ifdef GPUCA_RTC_NO_COMPILED_KERNELS
343+
GPUFatal("Compiled with GPUCA_RTC_NO_COMPILED_KERNELS, must run RTC mode!");
344+
#endif
338345
#define GPUCA_KRNL(x_class, ...) \
339346
mInternals->kernelModules.emplace_back(std::make_unique<CUmodule>()); \
340347
GPUChkErr(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start))));

GPU/GPUTracking/Base/hip/CMakeLists.txt

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -223,29 +223,33 @@ endif()
223223
if(GPUCA_HIP_COMPILE_MODE STREQUAL "onefile")
224224
target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=0)
225225
elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")
226-
add_library(GPUTrackingHIPKernels OBJECT $<JOIN:$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NAMES>,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.hip>, >)
227-
target_compile_options(GPUTrackingHIPKernels PRIVATE "--cuda-device-only")
228-
target_compile_options(GPUTrackingHIPKernels PRIVATE $<$<COMPILE_LANGUAGE:HIP>:-fno-gpu-rdc>)
229-
target_link_options(GPUTrackingHIPKernels PRIVATE $<$<COMPILE_LANGUAGE:HIP>:-fno-gpu-rdc>)
230226
target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=1)
231-
target_compile_definitions(GPUTrackingHIPKernels PRIVATE $<TARGET_PROPERTY:${targetName},COMPILE_DEFINITIONS>)
232-
target_include_directories(GPUTrackingHIPKernels PRIVATE $<TARGET_PROPERTY:${targetName},INCLUDE_DIRECTORIES>)
233-
target_link_libraries(GPUTrackingHIPKernels PRIVATE $<TARGET_PROPERTY:${targetName},LINK_LIBRARIES>)
234-
if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}")
235-
add_dependencies(GPUTrackingHIPKernels ${MODULE}_HIPIFIED)
227+
if(NOT DEFINED GPUCA_RTC_NO_COMPILED_KERNELS OR NOT GPUCA_RTC_NO_COMPILED_KERNELS)
228+
add_library(GPUTrackingHIPKernels OBJECT $<JOIN:$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NAMES>,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.hip>, >)
229+
target_compile_options(GPUTrackingHIPKernels PRIVATE "--cuda-device-only")
230+
target_compile_options(GPUTrackingHIPKernels PRIVATE $<$<COMPILE_LANGUAGE:HIP>:-fno-gpu-rdc>)
231+
target_link_options(GPUTrackingHIPKernels PRIVATE $<$<COMPILE_LANGUAGE:HIP>:-fno-gpu-rdc>)
232+
target_compile_definitions(GPUTrackingHIPKernels PRIVATE $<TARGET_PROPERTY:${targetName},COMPILE_DEFINITIONS>)
233+
target_include_directories(GPUTrackingHIPKernels PRIVATE $<TARGET_PROPERTY:${targetName},INCLUDE_DIRECTORIES>)
234+
target_link_libraries(GPUTrackingHIPKernels PRIVATE $<TARGET_PROPERTY:${targetName},LINK_LIBRARIES>)
235+
if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}")
236+
add_dependencies(GPUTrackingHIPKernels ${MODULE}_HIPIFIED)
237+
endif()
238+
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin)
239+
add_custom_command(
240+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o
241+
COMMAND cp -u $<TARGET_OBJECTS:GPUTrackingHIPKernels> ${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/
242+
COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o $<PATH:RELATIVE_PATH,$<LIST:TRANSFORM,$<PATH:GET_FILENAME,$<TARGET_OBJECTS:GPUTrackingHIPKernels>>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}>
243+
DEPENDS GPUTrackingHIPKernels $<TARGET_OBJECTS:GPUTrackingHIPKernels>
244+
COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o"
245+
VERBATIM
246+
COMMAND_EXPAND_LISTS
247+
)
248+
target_sources(${targetName} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o)
249+
set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o PROPERTIES EXTERNAL_OBJECT true GENERATED true)
250+
else()
251+
set_source_files_properties(${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip PROPERTIES COMPILE_DEFINITIONS GPUCA_RTC_NO_COMPILED_KERNELS)
236252
endif()
237-
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin)
238-
add_custom_command(
239-
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o
240-
COMMAND cp -u $<TARGET_OBJECTS:GPUTrackingHIPKernels> ${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/
241-
COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o $<PATH:RELATIVE_PATH,$<LIST:TRANSFORM,$<PATH:GET_FILENAME,$<TARGET_OBJECTS:GPUTrackingHIPKernels>>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}>
242-
DEPENDS GPUTrackingHIPKernels $<TARGET_OBJECTS:GPUTrackingHIPKernels>
243-
COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o"
244-
VERBATIM
245-
COMMAND_EXPAND_LISTS
246-
)
247-
target_sources(${targetName} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o)
248-
set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o PROPERTIES EXTERNAL_OBJECT true GENERATED true)
249253

250254
elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "rdc")
251255
message(FATAL_ERROR "HIP RDC compilation of GPUReconstruction ios not yet working!")

GPU/GPUTracking/Standalone/cmake/config.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ set(HIP_AMDGPUTARGET "default") # "gfx906;gfx908;gfx90a"
3535
set(CUDA_COMPUTETARGET "default") # 86 89
3636
#set(GPUCA_CUDA_COMPILE_MODE perkernel) # onefile / perkernel / rtc
3737
#set(GPUCA_HIP_COMPILE_MODE perkernel)
38+
#set(GPUCA_RTC_NO_COMPILED_KERNELS 1)
3839
#set(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE 1)
3940
#set(GPUCA_CONFIG_COMPILER gcc) # gcc / clang
4041
#add_definitions(-DGPUCA_GPU_DEBUG_PRINT)

0 commit comments

Comments
 (0)