@@ -85,17 +85,20 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$")
8585 message (STATUS "Set to ${GPUCA_DETERMINISTIC_MODE} " )
8686endif ()
8787if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" )
88- set (GPUCA_CLANG_FTZ "" )
88+ set (GPUCA_CXX_DENORMALS_FLAGS "" )
8989else ()
90- set (GPUCA_CLANG_FTZ "-mdaz-ftz" )
90+ set (GPUCA_CXX_DENORMALS_FLAGS "-mdaz-ftz" )
9191endif ()
92- set (GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off ${GPUCA_CLANG_FTZ} " )
93- set (GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=true --prec-div=true --prec-sqrt=true --fmad false" )
94- set (GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt -cl-denorms-are-zero)
92+ set (GPUCA_CUDA_DENORMALS_FLAGS "--ftz=true" )
93+ set (GPUCA_OCL_DENORMALS_FLAGS "-cl-denorms-are-zero" )
94+ set (GPUCA_HIP_DENORMALS_FLAGS "-fgpu-flush-denormals-to-zero" )
95+ set (GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off" )
96+ set (GPUCA_CUDA_NO_FAST_MATH_FLAGS "--prec-div=true --prec-sqrt=true --fmad false" )
97+ set (GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt )
9598if (GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2} )
9699 add_definitions (-DGPUCA_DETERMINISTIC_MODE)
97- set ( CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } ${GPUCA_CXX_NO_FAST_MATH_FLAGS} " )
98- set ( CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } ${GPUCA_CXX_NO_FAST_MATH_FLAGS} " )
100+ string ( APPEND CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS} " )
101+ string ( APPEND CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS} " )
99102endif ()
100103
101104
@@ -143,27 +146,27 @@ if(ENABLE_CUDA)
143146 endif ()
144147 endif ()
145148 if (CMAKE_CUDA_COMPILER)
146- set (CMAKE_CUDA_FLAGS "-Xcompiler \" ${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} \" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes" )
149+ set (CMAKE_CUDA_FLAGS "-Xcompiler \" ${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} \" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes ${GPUCA_CUDA_DENORMALS_FLAGS} " )
150+ set (CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \" ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
147151 if (GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE)
148152 string (APPEND CMAKE_CUDA_FLAGS " -Xptxas -v" )
149153 endif ()
150154 string (APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=114" )
151155 if (NOT ENABLE_CUDA STREQUAL "AUTO" )
152156 string (APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler" )
153157 endif ()
154- set (CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \" ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
155158 if (CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
156- set ( CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } -lineinfo -Xptxas -O0 -Xcompiler -O0" )
159+ string ( APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -lineinfo -Xptxas -O0" )
157160 else ()
158- set ( CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } -Xptxas -O4 -Xcompiler -O4" )
161+ string ( APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -Xptxas -O4 -Xcompiler -O4" )
159162 endif ()
160163 if (GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH} )
161- set ( CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } ${GPUCA_CUDA_NO_FAST_MATH_FLAGS} " )
164+ string ( APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CUDA_NO_FAST_MATH_FLAGS} " )
162165 elseif (NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
163- set ( CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } -use_fast_math --ftz=true" )#
166+ string ( APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math --ftz=true" )
164167 endif ()
165168 if (CMAKE_CXX_FLAGS MATCHES "(^| )-Werror( |$)" )
166- set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call" )
169+ string ( APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call" )
167170 endif ()
168171 if (GPUCA_CUDA_GCCBIN)
169172 list (FILTER CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES EXCLUDE REGEX "^/usr/lib.*/gcc/" ) # Workaround, since CMake adds old GCC lib paths implicitly if we request that gcc for CUDA
@@ -285,27 +288,25 @@ if(ENABLE_HIP)
285288 set (HIP_ENABLED ON )
286289 set_target_properties (roc::rocthrust PROPERTIES IMPORTED_GLOBAL TRUE )
287290 message (STATUS "HIP Found (${hip_HIPCC_EXECUTABLE} version ${hip_VERSION} )" )
288- set (O2_HIP_CMAKE_CXX_FLAGS "-fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed" )
291+ set (CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS} " )
292+ set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
293+ string (APPEND CMAKE_HIP_FLAGS " -fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed " )
289294 if (hip_VERSION VERSION_GREATER_EQUAL "6.0" AND NOT hip_VERSION VERSION_GREATER_EQUAL "6.2" )
290- string (APPEND O2_HIP_CMAKE_CXX_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true" ) # TODO: Cleanup
295+ string (APPEND CMAKE_HIP_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true" ) # TODO: Cleanup
291296 endif ()
292297 if (GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE)
293- string (APPEND O2_HIP_CMAKE_CXX_FLAGS " -Rpass-analysis=kernel-resource-usage" )
298+ string (APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage" )
299+ endif ()
300+ if (NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH} )
301+ string (APPEND CMAKE_HIP_FLAGS " -ffast-math" )
302+ endif ()
303+ if (NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
304+ string (APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -O3" )
294305 endif ()
295306 string (REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES} " ) # ROCm currently doesn’t support integrated graphics
296307 if (HIP_AMDGPUTARGET)
297308 set (CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET} " ) # If GPU build is enforced we override autodetection
298309 endif ()
299- if (NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH} )
300- string (APPEND O2_HIP_CMAKE_CXX_FLAGS " -fgpu-flush-denormals-to-zero -ffast-math" )
301- endif ()
302- set (CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS} " )
303- set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
304- if (CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
305- set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } -O0 -ggdb" )
306- else ()
307- set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } -O3" )
308- endif ()
309310 else ()
310311 set (HIP_ENABLED OFF )
311312 endif ()
0 commit comments