99# granted to it by virtue of its status as an Intergovernmental Organization
1010# or submit itself to any jurisdiction.
1111
12+ # NOTE!!!! - Whenever this file is changed, move it over to alidist/resources
13+ # FindO2GPU.cmake Version 1
14+
1215if (NOT DEFINED ENABLE_CUDA)
1316 set (ENABLE_CUDA "AUTO" )
1417endif ()
15- if (NOT DEFINED ENABLE_OPENCL1)
16- set (ENABLE_OPENCL1 "AUTO" )
17- endif ()
18- if (NOT DEFINED ENABLE_OPENCL2)
19- set (ENABLE_OPENCL2 "AUTO" )
18+ if (NOT DEFINED ENABLE_OPENCL)
19+ set (ENABLE_OPENCL "AUTO" )
2020endif ()
2121if (NOT DEFINED ENABLE_HIP)
2222 set (ENABLE_HIP "AUTO" )
2323endif ()
2424string (TOUPPER "${ENABLE_CUDA} " ENABLE_CUDA)
25- string (TOUPPER "${ENABLE_OPENCL1} " ENABLE_OPENCL1)
26- string (TOUPPER "${ENABLE_OPENCL2} " ENABLE_OPENCL2)
25+ string (TOUPPER "${ENABLE_OPENCL} " ENABLE_OPENCL)
2726string (TOUPPER "${ENABLE_HIP} " ENABLE_HIP)
2827if (NOT DEFINED CMAKE_BUILD_TYPE_UPPER)
2928 string (TOUPPER "${CMAKE_BUILD_TYPE} " CMAKE_BUILD_TYPE_UPPER)
3029endif ()
30+ if (CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
31+ set (GPUCA_BUILD_DEBUG 1)
32+ endif ()
3133
3234if (CUDA_COMPUTETARGET AND CUDA_COMPUTETARGET STREQUAL "default" )
3335 set (CUDA_COMPUTETARGET 86 89)
@@ -65,10 +67,50 @@ function(set_target_hip_arch target)
6567 endif ()
6668endfunction ()
6769
68- # Detect and enable CUDA
69- STRING (REGEX REPLACE "\- std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS} " ) # Need to strip c++17 imposed by alidist defaults
70+ # Need to strip c++17 imposed by alidist defaults
71+ STRING (REGEX REPLACE "\- std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS} " )
7072
73+ # ================================== Fast Math / Deterministic Mode ==================================
74+ # set(GPUCA_DETERMINISTIC_MODE WHOLEO2) # Override
75+ set (GPUCA_DETERMINISTIC_MODE_MAP_OFF 0)
76+ set (GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH 1) # No -ffast-math and similar compile flags for GPU folder
77+ set (GPUCA_DETERMINISTIC_MODE_MAP_OPTO2 2) # In addition, -O2 optimization on host for GPU folder
78+ set (GPUCA_DETERMINISTIC_MODE_MAP_GPU 3) # In addition, GPUCA_DETERMINISTIC_MODE define for GPU folder
79+ set (GPUCA_DETERMINISTIC_MODE_MAP_ON 3) # Synonym for GPU
80+ set (GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2 4) # As GPU but for whole O2 code
81+ if (NOT DEFINED GPUCA_DETERMINISTIC_MODE)
82+ set (GPUCA_DETERMINISTIC_MODE 0)
83+ elseif (NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$" )
84+ if (NOT DEFINED GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE} )
85+ message (FATAL_ERROR "Invalid setting ${GPUCA_DETERMINISTIC_MODE} for GPUCA_DETERMINISTIC_MODE" )
86+ endif ()
87+ set (GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE} })
88+ endif ()
89+ if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" )
90+ set (GPUCA_CXX_DENORMALS_FLAGS "" )
91+ else ()
92+ set (GPUCA_CXX_DENORMALS_FLAGS "-mdaz-ftz" )
93+ endif ()
94+ set (GPUCA_CUDA_DENORMALS_FLAGS "--ftz=true" )
95+ set (GPUCA_OCL_DENORMALS_FLAGS "-cl-denorms-are-zero" )
96+ set (GPUCA_HIP_DENORMALS_FLAGS "-fgpu-flush-denormals-to-zero" )
97+ set (GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off" )
98+ set (GPUCA_CUDA_NO_FAST_MATH_FLAGS "--prec-div=true --prec-sqrt=true --fmad false" )
99+ set (GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt )
100+ if (GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2} )
101+ add_definitions (-DGPUCA_DETERMINISTIC_MODE)
102+ string (APPEND CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS} " )
103+ string (APPEND CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS} " )
104+ endif ()
105+
106+
107+ # ================================== CUDA ==================================
71108if (ENABLE_CUDA)
109+ if (CUDA_COMPUTETARGET)
110+ set (CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} )
111+ else ()
112+ set (CMAKE_CUDA_ARCHITECTURES 61-virtual)
113+ endif ()
72114 set (CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD} )
73115 set (CMAKE_CUDA_STANDARD_REQUIRED TRUE )
74116 include (CheckLanguage)
@@ -84,11 +126,6 @@ if(ENABLE_CUDA)
84126 message (STATUS "Using as CUDA GCC version: ${GPUCA_CUDA_GCCBIN} " )
85127 set (CMAKE_CUDA_HOST_COMPILER "${GPUCA_CUDA_GCCBIN} " )
86128 endif ()
87- if (CUDA_COMPUTETARGET)
88- set (CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} CACHE STRING "" FORCE)
89- else ()
90- set (CMAKE_CUDA_ARCHITECTURES 61-virtual CACHE STRING "" FORCE)
91- endif ()
92129 enable_language (CUDA)
93130 get_property (LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES )
94131 if (ENABLE_CUDA STREQUAL "AUTO" )
@@ -105,30 +142,36 @@ if(ENABLE_CUDA)
105142 message (${FAILURE_SEVERITY} "CUDA found but thrust not available" )
106143 set (CMAKE_CUDA_COMPILER OFF )
107144 endif ()
108- if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.4 " )
109- message (${FAILURE_SEVERITY} "CUDA Version too old: ${CMAKE_CUDA_COMPILER_VERSION} , 11.4 required" )
145+ if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.8 " )
146+ message (${FAILURE_SEVERITY} "CUDA Version too old: ${CMAKE_CUDA_COMPILER_VERSION} , 12.8 required" )
110147 set (CMAKE_CUDA_COMPILER OFF )
111148 endif ()
112149 endif ()
113150 if (CMAKE_CUDA_COMPILER)
114- set (CMAKE_CUDA_FLAGS "-Xcompiler \" ${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} \" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda --allow-unsupported-compiler -Xptxas -v -Xcompiler -Wno-attributes" )
115- if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.3" )
116- string (APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=20257" ) # TODO: Cleanup
117- endif ()
151+ set (CMAKE_CUDA_FLAGS "-Xcompiler \" ${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} \" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes -Wno-deprecated-gpu-targets ${GPUCA_CUDA_DENORMALS_FLAGS} " )
118152 set (CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \" ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
153+ if (GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE)
154+ string (APPEND CMAKE_CUDA_FLAGS " -Xptxas -v" )
155+ endif ()
156+ string (APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=114" )
157+ if (NOT ENABLE_CUDA STREQUAL "AUTO" )
158+ string (APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler" )
159+ endif ()
119160 if (CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
120- set ( CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } -lineinfo -Xptxas -O0 -Xcompiler -O0" )
161+ string ( APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -lineinfo -Xptxas -O0" )
121162 else ()
122- set ( CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } -Xptxas -O4 -Xcompiler -O4" )
163+ string ( APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -Xptxas -O4 -Xcompiler -O4" )
123164 endif ()
124- set (GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false" )
125- if (DEFINED GPUCA_NO_FAST_MATH AND "${GPUCA_NO_FAST_MATH} " )
126- set (CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } ${GPUCA_CUDA_NO_FAST_MATH_FLAGS} " )
165+ if (GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH} )
166+ string (APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CUDA_NO_FAST_MATH_FLAGS} " )
127167 elseif (NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
128- set ( CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_ ${CMAKE_BUILD_TYPE_UPPER} } -use_fast_math --ftz=true" ) #
168+ string ( APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math ${GPUCA_CUDA_DENORMALS_FLAGS} " )
129169 endif ()
130170 if (CMAKE_CXX_FLAGS MATCHES "(^| )-Werror( |$)" )
131- set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call" )
171+ string (APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call" )
172+ endif ()
173+ if (GPUCA_CUDA_GCCBIN)
174+ list (FILTER CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES EXCLUDE REGEX "^/usr/lib.*/gcc/" ) # Workaround, since CMake adds old GCC lib paths implicitly if we request that gcc for CUDA
132175 endif ()
133176
134177 set (CUDA_ENABLED ON )
@@ -140,44 +183,23 @@ if(ENABLE_CUDA)
140183 endif ()
141184endif ()
142185
143- # Detect and enable OpenCL 1.2 from AMD
144- if (ENABLE_OPENCL1 OR ENABLE_OPENCL2 )
186+ # ================================== OpenCL ==================================
187+ if (ENABLE_OPENCL )
145188 find_package (OpenCL)
146- if ((ENABLE_OPENCL1 AND NOT ENABLE_OPENCL1 STREQUAL "AUTO" )
147- OR (ENABLE_OPENCL2 AND NOT ENABLE_OPENCL2 STREQUAL "AUTO" ))
189+ if (ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO" )
148190 set_package_properties(OpenCL PROPERTIES TYPE REQUIRED)
149191 else ()
150192 set_package_properties(OpenCL PROPERTIES TYPE OPTIONAL )
151193 endif ()
152- endif ()
153- if (ENABLE_OPENCL1)
154- if (NOT AMDAPPSDKROOT)
155- set (AMDAPPSDKROOT "$ENV{AMDAPPSDKROOT} " )
156- endif ()
157-
158- if (OpenCL_FOUND
159- AND OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 1.2
160- AND AMDAPPSDKROOT
161- AND EXISTS "${AMDAPPSDKROOT} " )
162- set (OPENCL1_ENABLED ON )
163- message (STATUS "Found AMD OpenCL 1.2" )
164- elseif (NOT ENABLE_OPENCL1 STREQUAL "AUTO" )
165- message (FATAL_ERROR "AMD OpenCL 1.2 not available" )
166- else ()
167- set (OPENCL1_ENABLED OFF )
168- endif ()
169- endif ()
170-
171- # Detect and enable OpenCL 2.x
172- if (ENABLE_OPENCL2)
173- find_package (OpenCL)
174- find_package (LLVM)
175- if (LLVM_FOUND)
176- find_package (Clang)
194+ if (NOT OPENCL_COMPATIBLE_CLANG_FOUND)
195+ find_package (LLVM)
196+ if (LLVM_FOUND)
197+ find_package (Clang)
198+ endif ()
177199 endif ()
178200 if (GPUCA_OPENCL_CLANGBIN)
179201 set (LLVM_CLANG ${GPUCA_OPENCL_CLANGBIN} )
180- execute_process (COMMAND "which" "/usr/lib/llvm/15/bin/clang-15 " OUTPUT_VARIABLE TMP_LLVM_SPIRV_PATH COMMAND_ERROR_IS_FATAL ANY)
202+ execute_process (COMMAND "which" "${GPUCA_OPENCL_CLANGBIN} " OUTPUT_VARIABLE TMP_LLVM_SPIRV_PATH COMMAND_ERROR_IS_FATAL ANY)
181203 cmake_path(GET TMP_LLVM_SPIRV_PATH PARENT_PATH TMP_LLVM_SPIRV_PATH)
182204 find_program (LLVM_SPIRV llvm-spirv HINTS "${TMP_LLVM_SPIRV_PATH} " )
183205 else ()
@@ -187,38 +209,40 @@ if(ENABLE_OPENCL2)
187209 if (Clang_FOUND
188210 AND LLVM_FOUND
189211 AND NOT LLVM_CLANG STREQUAL "LLVM_CLANG-NOTFOUND"
190- AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13 .0)
191- set (OPENCL2_COMPATIBLE_CLANG_FOUND ON )
212+ AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 18 .0)
213+ set (OPENCL_COMPATIBLE_CLANG_FOUND ON )
192214 endif ()
193215 if (OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2
194216 AND NOT LLVM_SPIRV STREQUAL "LLVM_SPIRV-NOTFOUND"
195- AND OPENCL2_COMPATIBLE_CLANG_FOUND )
196- set (OPENCL2_ENABLED_SPIRV ON )
217+ AND OPENCL_COMPATIBLE_CLANG_FOUND )
218+ set (OPENCL_ENABLED_SPIRV ON )
197219 message (STATUS "Using CLANG ${LLVM_CLANG} and ${LLVM_SPIRV} for SPIR-V compilation" )
198220 endif ()
199- if (OPENCL2_COMPATIBLE_CLANG_FOUND AND
221+ if (OPENCL_COMPATIBLE_CLANG_FOUND AND
200222 (OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2
201- OR OPENCL2_ENABLED_SPIRV ))
202- set (OPENCL2_ENABLED ON )
203- message (STATUS "Found OpenCL 2 (${OpenCL_VERSION_STRING} SPIR-V ${OPENCL2_ENABLED_SPIRV } with CLANG ${LLVM_PACKAGE_VERSION} )" )
204- elseif (NOT ENABLE_OPENCL2 STREQUAL "AUTO" )
223+ OR OPENCL_ENABLED_SPIRV ))
224+ set (OPENCL_ENABLED ON )
225+ message (STATUS "Found OpenCL 2 (${OpenCL_VERSION_STRING} SPIR-V ${OPENCL_ENABLED_SPIRV } with CLANG ${LLVM_PACKAGE_VERSION} )" )
226+ elseif (NOT ENABLE_OPENCL STREQUAL "AUTO" )
205227 message (FATAL_ERROR "OpenCL 2.x not available" )
206228 else ()
207- set (OPENCL2_ENABLED OFF )
229+ set (OPENCL_ENABLED OFF )
208230 endif ()
209231endif ()
210232
211- # Detect and enable HIP
233+ # ================================== HIP ==================================
212234if (ENABLE_HIP)
213- if ("$ENV{CMAKE_PREFIX_PATH} " MATCHES "rocm" )
235+ if (HIP_AMDGPUTARGET)
236+ set (CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET} " )
237+ set (GPU_TARGETS "${HIP_AMDGPUTARGET} " )
238+ endif ()
239+ if (NOT "$ENV{CMAKE_PREFIX_PATH} " MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/" )
240+ list (APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake" )
241+ endif ()
242+ if ("$ENV{CMAKE_PREFIX_PATH} " MATCHES "rocm" OR CMAKE_PREFIX_PATH MATCHES "rocm" )
214243 set (CMAKE_HIP_STANDARD ${CMAKE_CXX_STANDARD} )
215244 set (CMAKE_HIP_STANDARD_REQUIRED TRUE )
216- if (HIP_AMDGPUTARGET)
217- set (AMDGPU_TARGETS "${HIP_AMDGPUTARGET} " CACHE STRING "AMD GPU targets to compile for" FORCE)
218- set (GPU_TARGETS "${HIP_AMDGPUTARGET} " CACHE STRING "AMD GPU targets to compile for" FORCE)
219- set (CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET} " CACHE STRING "AMD GPU targets to compile for" FORCE)
220- endif ()
221- set (TMP_ROCM_DIR_LIST $ENV{CMAKE_PREFIX_PATH} )
245+ set (TMP_ROCM_DIR_LIST "${CMAKE_PREFIX_PATH} :$ENV{CMAKE_PREFIX_PATH} " )
222246 string (REPLACE ":" ";" TMP_ROCM_DIR_LIST "${TMP_ROCM_DIR_LIST} " )
223247 list (FILTER TMP_ROCM_DIR_LIST INCLUDE REGEX rocm)
224248 list (POP_FRONT TMP_ROCM_DIR_LIST TMP_ROCM_DIR)
@@ -260,36 +284,26 @@ if(ENABLE_HIP)
260284 elseif (NOT ENABLE_HIP STREQUAL "AUTO" )
261285 message (FATAL_ERROR "HIP requested, but CMAKE_PREFIX_PATH env variable does not contain rocm folder!" )
262286 endif ()
287+ if (hip_FOUND AND NOT hip_VERSION VERSION_GREATER_EQUAL "6.3" )
288+ set (hip_FOUND 0)
289+ endif ()
263290 if (hip_FOUND AND hipcub_FOUND AND rocthrust_FOUND AND rocprim_FOUND AND hip_HIPCC_EXECUTABLE AND hip_HIPIFY_PERL_EXECUTABLE)
264291 set (HIP_ENABLED ON )
265292 set_target_properties (roc::rocthrust PROPERTIES IMPORTED_GLOBAL TRUE )
266293 message (STATUS "HIP Found (${hip_HIPCC_EXECUTABLE} version ${hip_VERSION} )" )
267- set (O2_HIP_CMAKE_CXX_FLAGS "-fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed" )
268- if (hip_VERSION VERSION_GREATER_EQUAL "6.0" )
269- set (O2_HIP_CMAKE_CXX_FLAGS "${O2_HIP_CMAKE_CXX_FLAGS} -mllvm -amdgpu-legacy-sgpr-spill-lowering=true" ) # TODO: Cleanup
294+ set (CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS} " )
295+ set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
296+ if (GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE)
297+ string (APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage" )
298+ endif ()
299+ if (GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH} )
300+ string (APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS} " )
301+ elseif (NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
302+ string (APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -ffast-math -O3" )
270303 endif ()
271- set (O2_HIP_CMAKE_LINK_FLAGS "-Wno-pass-failed" )
272304 string (REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES} " ) # ROCm currently doesn’t support integrated graphics
273305 if (HIP_AMDGPUTARGET)
274- foreach (HIP_ARCH ${HIP_AMDGPUTARGET} )
275- set (O2_HIP_CMAKE_CXX_FLAGS "${O2_HIP_CMAKE_CXX_FLAGS} --offload-arch=${HIP_ARCH} " )
276- set (O2_HIP_CMAKE_LINK_FLAGS "${O2_HIP_CMAKE_LINK_FLAGS} --offload-arch=${HIP_ARCH} " )
277- endforeach ()
278- set (CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET} " ) # If GPU build is enforced we override autodetection
279- endif ()
280- if (NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH} )
281- set (O2_HIP_CMAKE_CXX_FLAGS "${O2_HIP_CMAKE_CXX_FLAGS} -fgpu-flush-denormals-to-zero -ffast-math" )
282- endif ()
283- if (CMAKE_CXX_COMPILER MATCHES "bin/c\\ +\\ +\$ " AND NOT CMAKE_CXX_COMPILER MATCHES "^/usr/bin" )
284- string (REGEX REPLACE "(.*)bin/c\\ +\\ +\$ " "\\ 1" HIP_GCC_TOOLCHAIN_PATH "${CMAKE_CXX_COMPILER} " )
285- set (O2_HIP_CMAKE_CXX_FLAGS "${O2_HIP_CMAKE_CXX_FLAGS} --gcc-toolchain=${HIP_GCC_TOOLCHAIN_PATH} " )
286- endif ()
287- set (CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS} " )
288- set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
289- if (CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG" )
290- set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } -O0 -ggdb" )
291- else ()
292- set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } -O3" )
306+ set (CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET} " )
293307 endif ()
294308 else ()
295309 set (HIP_ENABLED OFF )
@@ -315,9 +329,10 @@ if(ENABLE_HIP)
315329 endif ()
316330 message (FATAL_ERROR "HIP requested but some of the above packages are not found" )
317331 endif ()
318-
319332endif ()
320333
321334# if we end up here without a FATAL, it means we have found the "O2GPU" package
322335set (O2GPU_FOUND TRUE )
323- include ("${CMAKE_CURRENT_LIST_DIR} /../GPU/GPUTracking/cmake/kernel_helpers.cmake" )
336+ if (NOT GPUCA_FINDO2GPU_CHECK_ONLY)
337+ include ("${CMAKE_CURRENT_LIST_DIR} /../GPU/GPUTracking/cmake/kernel_helpers.cmake" )
338+ endif ()
0 commit comments