Skip to content

Commit bccffa5

Browse files
committed
GPU RTC: Add deterministic mode
1 parent d4a5ca7 commit bccffa5

File tree

5 files changed

+29
-12
lines changed

5 files changed

+29
-12
lines changed

GPU/GPUTracking/Base/cuda/CMakeLists.txt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,28 @@ add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}
8585
add_custom_command(
8686
OUTPUT ${GPU_RTC_BIN}.command
8787
COMMAND echo -n "${CMAKE_CUDA_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -fatbin" > ${GPU_RTC_BIN}.command
88-
COMMAND_EXPAND_LISTS
89-
VERBATIM
88+
COMMAND_EXPAND_LISTS VERBATIM
9089
COMMENT "Preparing CUDA RTC command file ${GPU_RTC_BIN}.command"
9190
)
9291
create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o)
9392

9493
add_custom_command(
9594
OUTPUT ${GPU_RTC_BIN}.command.arch
9695
COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch
97-
COMMAND_EXPAND_LISTS
98-
VERBATIM
99-
COMMENT "Preparing CUDA RTC ARCH file ${GPU_RTC_BIN}.command.arch"
96+
COMMAND_EXPAND_LISTS VERBATIM
97+
COMMENT "Preparing CUDA RTC ARCH command file ${GPU_RTC_BIN}.command.arch"
10098
)
10199
create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o)
102100

103-
set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o)
101+
add_custom_command(
102+
OUTPUT ${GPU_RTC_BIN}.command.no_fast_math
103+
COMMAND echo -n "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" > ${GPU_RTC_BIN}.command.no_fast_math
104+
COMMAND_EXPAND_LISTS VERBATIM
105+
COMMENT "Preparing CUDA RTC NO_FAST_MATH command file ${GPU_RTC_BIN}.command.arch"
106+
)
107+
create_binary_resource(${GPU_RTC_BIN}.command.no_fast_math ${GPU_RTC_BIN}.command.no_fast_math.o)
108+
109+
set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o ${GPU_RTC_BIN}.command.no_fast_math.o)
104110
# -------------------------------- End RTC -------------------------------------------------------
105111

106112
if(ALIGPU_BUILD_TYPE STREQUAL "O2")

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ using namespace o2::gpu;
2929
QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_src);
3030
QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command);
3131
QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch);
32+
QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_no_fast_math);
3233

3334
int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
3435
{
3536
std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") +
3637
std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") +
38+
std::string(mProcessingSettings.rtc.deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "") +
3739
GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr);
3840
if (filename == "") {
3941
filename = "/tmp/o2cagpu_rtc_";
@@ -52,6 +54,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
5254
std::string baseCommand = (mProcessingSettings.RTCprependCommand != "" ? (mProcessingSettings.RTCprependCommand + " ") : "");
5355
baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len));
5456
baseCommand += std::string(" ") + (mProcessingSettings.RTCoverrideArchitecture != "" ? mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len));
57+
baseCommand += mProcessingSettings.rtc.deterministic ? (std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len)) : std::string("");
5558

5659
char shasource[21], shaparam[21], shacmd[21], shakernels[21];
5760
if (mProcessingSettings.rtc.cacheOutput) {

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define GPUCA_GPUCODE_GENRTC
1616
#define GPUCA_GPUCODE_COMPILEKERNELS
1717
#define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__)
18+
#define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__)
1819
#include "GPUReconstructionCUDADef.h"
1920
#include "GPUReconstructionIncludesDeviceAll.h"
2021

GPU/GPUTracking/Base/hip/CMakeLists.txt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,22 +123,28 @@ add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/
123123
add_custom_command(
124124
OUTPUT ${GPU_RTC_BIN}.command
125125
COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command
126-
COMMAND_EXPAND_LISTS
127-
VERBATIM
126+
COMMAND_EXPAND_LISTS VERBATIM
128127
COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command"
129128
)
130129
create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o)
131130

132131
add_custom_command(
133132
OUTPUT ${GPU_RTC_BIN}.command.arch
134133
COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch
135-
COMMAND_EXPAND_LISTS
136-
VERBATIM
137-
COMMENT "Preparing HIP RTC ARCH file ${GPU_RTC_BIN}.command.arch"
134+
COMMAND_EXPAND_LISTS VERBATIM
135+
COMMENT "Preparing HIP RTC ARCH command file ${GPU_RTC_BIN}.command.arch"
138136
)
139137
create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o)
140138

141-
set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o)
139+
add_custom_command(
140+
OUTPUT ${GPU_RTC_BIN}.command.no_fast_math
141+
COMMAND echo -n "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" > ${GPU_RTC_BIN}.command.no_fast_math
142+
COMMAND_EXPAND_LISTS VERBATIM
143+
COMMENT "Preparing HIP RTC NO_FAST_MATH command file ${GPU_RTC_BIN}.command.no_fast_math"
144+
)
145+
create_binary_resource(${GPU_RTC_BIN}.command.no_fast_math ${GPU_RTC_BIN}.command.no_fast_math.o)
146+
147+
set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o ${GPU_RTC_BIN}.command.no_fast_math.o)
142148
# -------------------------------- End RTC -------------------------------------------------------
143149

144150
if(ALIGPU_BUILD_TYPE STREQUAL "O2")

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ BeginSubConfig(GPUSettingsProcessingRTC, rtc, configStandalone.proc, "RTC", 0, "
208208
AddOption(cacheOutput, bool, false, "", 0, "Cache RTC compilation results")
209209
AddOption(optConstexpr, bool, true, "", 0, "Replace constant variables by static constexpr expressions")
210210
AddOption(optSpecialCode, int8_t, -1, "", 0, "Insert GPUCA_RTC_SPECIAL_CODE special code during RTC")
211+
AddOption(deterministic, bool, false, "", 0, "Compile RTC in deterministic mode, with NO_FAST_MATH flags and GPUCA_DETERMINISTIC_MODE define")
211212
AddOption(compilePerKernel, bool, true, "", 0, "Run one RTC compilation per kernel")
212213
AddOption(enable, bool, false, "", 0, "Use RTC to optimize GPU code")
213214
AddOption(runTest, int32_t, 0, "", 0, "Do not run the actual benchmark, but just test RTC compilation (1 full test, 2 test only compilation)")

0 commit comments

Comments
 (0)