Skip to content

Commit 78d0f9c

Browse files
committed
GPU Compile Flags: Set denormals to zero unconditionally
Before I kept them in NO_FAST_MATH mode, but this yields warning by nvcc, which for some stupid reason cannot be disabled. And in principle, with denormals globally disabled, it is also deterministic again, and disabling it everywhere is as simple as enabling them everywhere.
1 parent fa403b4 commit 78d0f9c

File tree

4 files changed

+18
-12
lines changed

4 files changed

+18
-12
lines changed

GPU/GPUTracking/Base/opencl/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021)
2727
if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
2828
set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math)
2929
else()
30-
set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt)
30+
set(OCL_FLAGS ${OCL_FLAGS} ${GPUCA_OCL_NO_FAST_MATH_FLAGS})
3131
endif()
3232
set(OCL_DEFINECL "-D$<JOIN:$<TARGET_PROPERTY:O2::GPUTracking,COMPILE_DEFINITIONS>,$<SEMICOLON>-D>"
3333
"-I$<JOIN:$<FILTER:$<TARGET_PROPERTY:O2::GPUTracking,INCLUDE_DIRECTORIES>,EXCLUDE,^/usr/include/?>,$<SEMICOLON>-I>"

GPU/GPUTracking/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ set(MODULE GPUTracking)
1717
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
1818
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
1919
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2})
20-
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2")
20+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2 ${GPUCA_CLANG_FTZ}")
2121
endif()
2222
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
23-
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math")
23+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math ${GPUCA_CLANG_FTZ}")
2424
endif()
2525

2626
include(cmake/helpers.cmake)

GPU/GPUTracking/Definitions/GPUDefGPUParameters.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -603,11 +603,11 @@
603603
// #define GPUCA_KERNEL_DEBUGGER_OUTPUT
604604

605605
// Some assertions to make sure out parameters are not invalid
606-
static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP");
607-
static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE");
608-
#ifdef GPUCA_GPUCODE
609-
static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE");
610-
#endif
606+
static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP");
607+
static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE");
608+
#ifdef GPUCA_GPUCODE
609+
static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE");
610+
#endif
611611

612612
// Derived parameters
613613
#ifdef GPUCA_USE_TEXTURES
@@ -621,5 +621,5 @@
621621
#define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT})
622622
#define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT
623623

624-
// clang-format on
624+
// clang-format on
625625
#endif

dependencies/FindO2GPU.cmake

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,14 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$")
8484
set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}})
8585
message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}")
8686
endif()
87-
set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off")
88-
set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false")
87+
if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
88+
set(GPUCA_CLANG_FTZ "")
89+
else()
90+
set(GPUCA_CLANG_FTZ "-mdaz-ftz")
91+
endif()
92+
set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off ${GPUCA_CLANG_FTZ}")
93+
set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=true --prec-div=true --prec-sqrt=true --fmad false")
94+
set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt -cl-denorms-are-zero)
8995
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2})
9096
add_definitions(-DGPUCA_DETERMINISTIC_MODE)
9197
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
@@ -172,7 +178,7 @@ if(ENABLE_CUDA)
172178
endif()
173179
endif()
174180

175-
# ---------------------------------- HIP ----------------------------------
181+
# ---------------------------------- OpenCL ----------------------------------
176182
if(ENABLE_OPENCL)
177183
find_package(OpenCL)
178184
if(ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO")

0 commit comments

Comments
 (0)