Skip to content

Commit d9d78bc

Browse files
committed
GPU CMake: Cleanup and do not use Clang x86 denormals flag for device compilation
1 parent 81dad27 commit d9d78bc

File tree

3 files changed

+33
-31
lines changed

3 files changed

+33
-31
lines changed

GPU/GPUTracking/Base/opencl/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ endif()
2323
set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl)
2424
set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode)
2525

26-
set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021)
26+
set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS})
2727
if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
28-
set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math)
28+
set(OCL_FLAGS ${OCL_FLAGS} -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math)
2929
else()
3030
set(OCL_FLAGS ${OCL_FLAGS} ${GPUCA_OCL_NO_FAST_MATH_FLAGS})
3131
endif()

GPU/GPUTracking/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ set(MODULE GPUTracking)
1717
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
1818
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
1919
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2})
20-
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2 ${GPUCA_CLANG_FTZ}")
20+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2")
2121
endif()
2222
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
23-
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math ${GPUCA_CLANG_FTZ}")
23+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math")
2424
endif()
25+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}")
2526

2627
include(cmake/helpers.cmake)
2728

dependencies/FindO2GPU.cmake

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,17 +85,20 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$")
8585
message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}")
8686
endif()
8787
if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
88-
set(GPUCA_CLANG_FTZ "")
88+
set(GPUCA_CXX_DENORMALS_FLAGS "")
8989
else()
90-
set(GPUCA_CLANG_FTZ "-mdaz-ftz")
90+
set(GPUCA_CXX_DENORMALS_FLAGS "-mdaz-ftz")
9191
endif()
92-
set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off ${GPUCA_CLANG_FTZ}")
93-
set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=true --prec-div=true --prec-sqrt=true --fmad false")
94-
set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt -cl-denorms-are-zero)
92+
set(GPUCA_CUDA_DENORMALS_FLAGS "--ftz=true")
93+
set(GPUCA_OCL_DENORMALS_FLAGS "-cl-denorms-are-zero")
94+
set(GPUCA_HIP_DENORMALS_FLAGS "-fgpu-flush-denormals-to-zero")
95+
set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off")
96+
set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--prec-div=true --prec-sqrt=true --fmad false")
97+
set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt )
9598
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2})
9699
add_definitions(-DGPUCA_DETERMINISTIC_MODE)
97-
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
98-
set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
100+
string(APPEND CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
101+
string(APPEND CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
99102
endif()
100103

101104

@@ -143,27 +146,27 @@ if(ENABLE_CUDA)
143146
endif()
144147
endif()
145148
if(CMAKE_CUDA_COMPILER)
146-
set(CMAKE_CUDA_FLAGS "-Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes")
149+
set(CMAKE_CUDA_FLAGS "-Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes ${GPUCA_CUDA_DENORMALS_FLAGS}")
150+
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \"${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
147151
if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE)
148152
string(APPEND CMAKE_CUDA_FLAGS " -Xptxas -v")
149153
endif()
150154
string(APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=114")
151155
if (NOT ENABLE_CUDA STREQUAL "AUTO")
152156
string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler")
153157
endif()
154-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \"${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
155158
if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
156-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -lineinfo -Xptxas -O0 -Xcompiler -O0")
159+
string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -lineinfo -Xptxas -O0")
157160
else()
158-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4")
161+
string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -Xptxas -O4 -Xcompiler -O4")
159162
endif()
160163
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
161-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}")
164+
string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}")
162165
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
163-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -use_fast_math --ftz=true")#
166+
string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math --ftz=true")
164167
endif()
165168
if(CMAKE_CXX_FLAGS MATCHES "(^| )-Werror( |$)")
166-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call")
169+
string(APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call")
167170
endif()
168171
if(GPUCA_CUDA_GCCBIN)
169172
list(FILTER CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES EXCLUDE REGEX "^/usr/lib.*/gcc/") # Workaround, since CMake adds old GCC lib paths implicitly if we request that gcc for CUDA
@@ -285,27 +288,25 @@ if(ENABLE_HIP)
285288
set(HIP_ENABLED ON)
286289
set_target_properties(roc::rocthrust PROPERTIES IMPORTED_GLOBAL TRUE)
287290
message(STATUS "HIP Found (${hip_HIPCC_EXECUTABLE} version ${hip_VERSION})")
288-
set(O2_HIP_CMAKE_CXX_FLAGS "-fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed")
291+
set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS}")
292+
set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
293+
string(APPEND CMAKE_HIP_FLAGS " -fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed ")
289294
if(hip_VERSION VERSION_GREATER_EQUAL "6.0" AND NOT hip_VERSION VERSION_GREATER_EQUAL "6.2")
290-
string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup
295+
string(APPEND CMAKE_HIP_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup
291296
endif()
292297
if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE)
293-
string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -Rpass-analysis=kernel-resource-usage")
298+
string(APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage")
299+
endif()
300+
if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
301+
string(APPEND CMAKE_HIP_FLAGS " -ffast-math")
302+
endif()
303+
if(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
304+
string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -O3")
294305
endif()
295306
string(REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}") # ROCm currently doesn’t support integrated graphics
296307
if(HIP_AMDGPUTARGET)
297308
set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection
298309
endif()
299-
if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
300-
string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -fgpu-flush-denormals-to-zero -ffast-math")
301-
endif()
302-
set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS}")
303-
set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
304-
if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
305-
set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0 -ggdb")
306-
else()
307-
set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3")
308-
endif()
309310
else()
310311
set(HIP_ENABLED OFF)
311312
endif()

0 commit comments

Comments
 (0)