Skip to content

Commit 19cdb4c

Browse files
committed
GPU: Propagate list of noFastMathKernels to GPU RTC and apply special compile settings
1 parent c6c4adc commit 19cdb4c

File tree

4 files changed

+57
-23
lines changed

4 files changed

+57
-23
lines changed

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,12 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command);
3131
QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch);
3232
QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_no_fast_math);
3333

34+
#include "GPUNoFastMathKernels.h"
35+
3436
int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
3537
{
3638
std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") +
3739
std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") +
38-
std::string(mProcessingSettings.rtc.deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n") +
3940
GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr);
4041
if (filename == "") {
4142
filename = "/tmp/o2cagpu_rtc_";
@@ -54,7 +55,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
5455
std::string baseCommand = (mProcessingSettings.RTCprependCommand != "" ? (mProcessingSettings.RTCprependCommand + " ") : "");
5556
baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len));
5657
baseCommand += std::string(" ") + (mProcessingSettings.RTCoverrideArchitecture != "" ? mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len));
57-
baseCommand += mProcessingSettings.rtc.deterministic ? (std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len)) : std::string("");
5858

5959
char shasource[21], shaparam[21], shacmd[21], shakernels[21];
6060
if (mProcessingSettings.rtc.cacheOutput) {
@@ -169,13 +169,20 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
169169
kernel += mProcessingSettings.rtc.compilePerKernel ? kernels[i] : kernelsall;
170170
kernel += "}";
171171

172-
if (fwrite(rtcparam.c_str(), 1, rtcparam.size(), fp) != rtcparam.size() ||
172+
bool deterministic = mProcessingSettings.rtc.deterministic || o2::gpu::internal::noFastMathKernels.find(GetKernelName(i)) != o2::gpu::internal::noFastMathKernels.end();
173+
const std::string deterministicStr = std::string(deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n");
174+
175+
if (fwrite(deterministicStr.c_str(), 1, deterministicStr.size(), fp) != deterministicStr.size() ||
176+
fwrite(rtcparam.c_str(), 1, rtcparam.size(), fp) != rtcparam.size() ||
173177
fwrite(_binary_GPUReconstructionCUDArtc_src_start, 1, _binary_GPUReconstructionCUDArtc_src_len, fp) != _binary_GPUReconstructionCUDArtc_src_len ||
174178
fwrite(kernel.c_str(), 1, kernel.size(), fp) != kernel.size()) {
175179
throw std::runtime_error("Error writing file");
176180
}
177181
fclose(fp);
178182
std::string command = baseCommand;
183+
if (deterministic) {
184+
command += std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len);
185+
}
179186
command += " -c " + filename + "_" + std::to_string(i) + mRtcSrcExtension + " -o " + filename + "_" + std::to_string(i) + mRtcBinExtension;
180187
if (mProcessingSettings.debugLevel < 0) {
181188
command += " &> /dev/null";

GPU/GPUTracking/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,10 @@ file(GENERATE
242242
OUTPUT include_gpu_onthefly/GPUReconstructionIncludesDeviceAll.h
243243
INPUT Base/GPUReconstructionIncludesDeviceAll.template.h
244244
)
245+
file(GENERATE
246+
OUTPUT include_gpu_onthefly/GPUNoFastMathKernels.h
247+
INPUT cmake/GPUNoFastMathKernels.template.h
248+
)
245249
if(NOT ALIGPU_BUILD_TYPE STREQUAL "O2")
246250
include_directories(${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly)
247251
endif()
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file GPUNoFastMathKernels.h
13+
/// \author David Rohr
14+
15+
#include <unordered_set>
16+
#include <string>
17+
18+
namespace o2::gpu::internal
19+
{
20+
// clang-format off
21+
static const std::unordered_set<std::string> noFastMathKernels = {$<JOIN:$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<LIST:REMOVE_DUPLICATES,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NO_FAST_MATH>>,APPEND,">,PREPEND,">,$<COMMA> >};
22+
// clang-format on
23+
} // namespace o2::gpu::internal

GPU/GPUTracking/cmake/kernel_helpers.cmake

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ define_property(TARGET PROPERTY O2_GPU_KERNELS)
1717
define_property(TARGET PROPERTY O2_GPU_KERNEL_NAMES)
1818
define_property(TARGET PROPERTY O2_GPU_KERNEL_INCLUDES)
1919
define_property(TARGET PROPERTY O2_GPU_KERNEL_FILES)
20+
define_property(TARGET PROPERTY O2_GPU_KERNEL_NO_FAST_MATH)
2021
set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/GPU/include_gpu_onthefly")
2122
file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER})
2223
set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../")
@@ -144,24 +145,23 @@ function(o2_gpu_kernel_file_list list)
144145
endfunction()
145146

146147
function(o2_gpu_kernel_set_deterministic)
147-
if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU})
148-
list(LENGTH ARGV n)
149-
math(EXPR n "${n} - 1")
150-
foreach(i RANGE 0 ${n})
151-
if(CUDA_ENABLED AND (NOT DEFINED GPUCA_CUDA_COMPILE_MODE OR GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel"))
152-
set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.cu"
153-
TARGET_DIRECTORY O2::GPUTrackingCUDA
154-
PROPERTIES
155-
COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}"
156-
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
157-
endif()
158-
if(HIP_ENABLED AND (NOT DEFINED GPUCA_HIP_COMPILE_MODE OR GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel"))
159-
set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.hip"
160-
TARGET_DIRECTORY O2::GPUTrackingHIP
161-
PROPERTIES
162-
COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}"
163-
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
164-
endif()
165-
endforeach()
166-
endif()
148+
list(LENGTH ARGV n)
149+
math(EXPR n "${n} - 1")
150+
foreach(i RANGE 0 ${n})
151+
set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_NO_FAST_MATH "${ARGV${i}}")
152+
if(CUDA_ENABLED AND (NOT DEFINED GPUCA_CUDA_COMPILE_MODE OR GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel"))
153+
set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.cu"
154+
TARGET_DIRECTORY O2::GPUTrackingCUDA
155+
PROPERTIES
156+
COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}"
157+
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
158+
endif()
159+
if(HIP_ENABLED AND (NOT DEFINED GPUCA_HIP_COMPILE_MODE OR GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel"))
160+
set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.hip"
161+
TARGET_DIRECTORY O2::GPUTrackingHIP
162+
PROPERTIES
163+
COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}"
164+
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
165+
endif()
166+
endforeach()
167167
endfunction()

0 commit comments

Comments
 (0)