Skip to content

Commit 014c3dd

Browse files
committed
fixed file
1 parent 29f8cb5 commit 014c3dd

File tree

1 file changed

+115
-100
lines changed

1 file changed

+115
-100
lines changed

dependencies/FindO2GPU.cmake

Lines changed: 115 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,27 @@
99
# granted to it by virtue of its status as an Intergovernmental Organization
1010
# or submit itself to any jurisdiction.
1111

12+
# NOTE!!!! - Whenever this file is changed, move it over to alidist/resources
13+
# FindO2GPU.cmake Version 1
14+
1215
if(NOT DEFINED ENABLE_CUDA)
1316
set(ENABLE_CUDA "AUTO")
1417
endif()
15-
if(NOT DEFINED ENABLE_OPENCL1)
16-
set(ENABLE_OPENCL1 "AUTO")
17-
endif()
18-
if(NOT DEFINED ENABLE_OPENCL2)
19-
set(ENABLE_OPENCL2 "AUTO")
18+
if(NOT DEFINED ENABLE_OPENCL)
19+
set(ENABLE_OPENCL "AUTO")
2020
endif()
2121
if(NOT DEFINED ENABLE_HIP)
2222
set(ENABLE_HIP "AUTO")
2323
endif()
2424
string(TOUPPER "${ENABLE_CUDA}" ENABLE_CUDA)
25-
string(TOUPPER "${ENABLE_OPENCL1}" ENABLE_OPENCL1)
26-
string(TOUPPER "${ENABLE_OPENCL2}" ENABLE_OPENCL2)
25+
string(TOUPPER "${ENABLE_OPENCL}" ENABLE_OPENCL)
2726
string(TOUPPER "${ENABLE_HIP}" ENABLE_HIP)
2827
if(NOT DEFINED CMAKE_BUILD_TYPE_UPPER)
2928
string(TOUPPER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_UPPER)
3029
endif()
30+
if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
31+
set(GPUCA_BUILD_DEBUG 1)
32+
endif()
3133

3234
if(CUDA_COMPUTETARGET AND CUDA_COMPUTETARGET STREQUAL "default")
3335
set(CUDA_COMPUTETARGET 86 89)
@@ -65,10 +67,50 @@ function(set_target_hip_arch target)
6567
endif()
6668
endfunction()
6769

68-
# Detect and enable CUDA
69-
STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") # Need to strip c++17 imposed by alidist defaults
70+
# Need to strip c++17 imposed by alidist defaults
71+
STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}")
7072

73+
# ================================== Fast Math / Deterministic Mode ==================================
74+
# set(GPUCA_DETERMINISTIC_MODE WHOLEO2) # Override
75+
set(GPUCA_DETERMINISTIC_MODE_MAP_OFF 0)
76+
set(GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH 1) # No -ffast-math and similar compile flags for GPU folder
77+
set(GPUCA_DETERMINISTIC_MODE_MAP_OPTO2 2) # In addition, -O2 optimization on host for GPU folder
78+
set(GPUCA_DETERMINISTIC_MODE_MAP_GPU 3) # In addition, GPUCA_DETERMINISTIC_MODE define for GPU folder
79+
set(GPUCA_DETERMINISTIC_MODE_MAP_ON 3) # Synonym for GPU
80+
set(GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2 4) # As GPU but for whole O2 code
81+
if(NOT DEFINED GPUCA_DETERMINISTIC_MODE)
82+
set(GPUCA_DETERMINISTIC_MODE 0)
83+
elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$")
84+
if(NOT DEFINED GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE})
85+
message(FATAL_ERROR "Invalid setting ${GPUCA_DETERMINISTIC_MODE} for GPUCA_DETERMINISTIC_MODE")
86+
endif()
87+
set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}})
88+
endif()
89+
if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
90+
set(GPUCA_CXX_DENORMALS_FLAGS "")
91+
else()
92+
set(GPUCA_CXX_DENORMALS_FLAGS "-mdaz-ftz")
93+
endif()
94+
set(GPUCA_CUDA_DENORMALS_FLAGS "--ftz=true")
95+
set(GPUCA_OCL_DENORMALS_FLAGS "-cl-denorms-are-zero")
96+
set(GPUCA_HIP_DENORMALS_FLAGS "-fgpu-flush-denormals-to-zero")
97+
set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off")
98+
set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--prec-div=true --prec-sqrt=true --fmad false")
99+
set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt )
100+
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2})
101+
add_definitions(-DGPUCA_DETERMINISTIC_MODE)
102+
string(APPEND CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
103+
string(APPEND CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
104+
endif()
105+
106+
107+
# ================================== CUDA ==================================
71108
if(ENABLE_CUDA)
109+
if(CUDA_COMPUTETARGET)
110+
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET})
111+
else()
112+
set(CMAKE_CUDA_ARCHITECTURES 61-virtual)
113+
endif()
72114
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
73115
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
74116
include(CheckLanguage)
@@ -84,11 +126,6 @@ if(ENABLE_CUDA)
84126
message(STATUS "Using as CUDA GCC version: ${GPUCA_CUDA_GCCBIN}")
85127
set(CMAKE_CUDA_HOST_COMPILER "${GPUCA_CUDA_GCCBIN}")
86128
endif()
87-
if(CUDA_COMPUTETARGET)
88-
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} CACHE STRING "" FORCE)
89-
else()
90-
set(CMAKE_CUDA_ARCHITECTURES 61-virtual CACHE STRING "" FORCE)
91-
endif()
92129
enable_language(CUDA)
93130
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
94131
if (ENABLE_CUDA STREQUAL "AUTO")
@@ -105,30 +142,36 @@ if(ENABLE_CUDA)
105142
message(${FAILURE_SEVERITY} "CUDA found but thrust not available")
106143
set(CMAKE_CUDA_COMPILER OFF)
107144
endif()
108-
if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.4")
109-
message(${FAILURE_SEVERITY} "CUDA Version too old: ${CMAKE_CUDA_COMPILER_VERSION}, 11.4 required")
145+
if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.8")
146+
message(${FAILURE_SEVERITY} "CUDA Version too old: ${CMAKE_CUDA_COMPILER_VERSION}, 12.8 required")
110147
set(CMAKE_CUDA_COMPILER OFF)
111148
endif()
112149
endif()
113150
if(CMAKE_CUDA_COMPILER)
114-
set(CMAKE_CUDA_FLAGS "-Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda --allow-unsupported-compiler -Xptxas -v -Xcompiler -Wno-attributes")
115-
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.3")
116-
string(APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=20257") # TODO: Cleanup
117-
endif()
151+
set(CMAKE_CUDA_FLAGS "-Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes -Wno-deprecated-gpu-targets ${GPUCA_CUDA_DENORMALS_FLAGS}")
118152
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \"${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
153+
if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE)
154+
string(APPEND CMAKE_CUDA_FLAGS " -Xptxas -v")
155+
endif()
156+
string(APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=114")
157+
if (NOT ENABLE_CUDA STREQUAL "AUTO")
158+
string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler")
159+
endif()
119160
if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
120-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -lineinfo -Xptxas -O0 -Xcompiler -O0")
161+
string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -lineinfo -Xptxas -O0")
121162
else()
122-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4")
163+
string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -Xptxas -O4 -Xcompiler -O4")
123164
endif()
124-
set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false")
125-
if(DEFINED GPUCA_NO_FAST_MATH AND "${GPUCA_NO_FAST_MATH}")
126-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}")
165+
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
166+
string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}")
127167
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
128-
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -use_fast_math --ftz=true")#
168+
string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math ${GPUCA_CUDA_DENORMALS_FLAGS}")
129169
endif()
130170
if(CMAKE_CXX_FLAGS MATCHES "(^| )-Werror( |$)")
131-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call")
171+
string(APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call")
172+
endif()
173+
if(GPUCA_CUDA_GCCBIN)
174+
list(FILTER CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES EXCLUDE REGEX "^/usr/lib.*/gcc/") # Workaround, since CMake adds old GCC lib paths implicitly if we request that gcc for CUDA
132175
endif()
133176

134177
set(CUDA_ENABLED ON)
@@ -140,44 +183,23 @@ if(ENABLE_CUDA)
140183
endif()
141184
endif()
142185

143-
# Detect and enable OpenCL 1.2 from AMD
144-
if(ENABLE_OPENCL1 OR ENABLE_OPENCL2)
186+
# ================================== OpenCL ==================================
187+
if(ENABLE_OPENCL)
145188
find_package(OpenCL)
146-
if((ENABLE_OPENCL1 AND NOT ENABLE_OPENCL1 STREQUAL "AUTO")
147-
OR (ENABLE_OPENCL2 AND NOT ENABLE_OPENCL2 STREQUAL "AUTO"))
189+
if(ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO")
148190
set_package_properties(OpenCL PROPERTIES TYPE REQUIRED)
149191
else()
150192
set_package_properties(OpenCL PROPERTIES TYPE OPTIONAL)
151193
endif()
152-
endif()
153-
if(ENABLE_OPENCL1)
154-
if(NOT AMDAPPSDKROOT)
155-
set(AMDAPPSDKROOT "$ENV{AMDAPPSDKROOT}")
156-
endif()
157-
158-
if(OpenCL_FOUND
159-
AND OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 1.2
160-
AND AMDAPPSDKROOT
161-
AND EXISTS "${AMDAPPSDKROOT}")
162-
set(OPENCL1_ENABLED ON)
163-
message(STATUS "Found AMD OpenCL 1.2")
164-
elseif(NOT ENABLE_OPENCL1 STREQUAL "AUTO")
165-
message(FATAL_ERROR "AMD OpenCL 1.2 not available")
166-
else()
167-
set(OPENCL1_ENABLED OFF)
168-
endif()
169-
endif()
170-
171-
# Detect and enable OpenCL 2.x
172-
if(ENABLE_OPENCL2)
173-
find_package(OpenCL)
174-
find_package(LLVM)
175-
if(LLVM_FOUND)
176-
find_package(Clang)
194+
if(NOT OPENCL_COMPATIBLE_CLANG_FOUND)
195+
find_package(LLVM)
196+
if(LLVM_FOUND)
197+
find_package(Clang)
198+
endif()
177199
endif()
178200
if (GPUCA_OPENCL_CLANGBIN)
179201
set(LLVM_CLANG ${GPUCA_OPENCL_CLANGBIN})
180-
execute_process(COMMAND "which" "/usr/lib/llvm/15/bin/clang-15" OUTPUT_VARIABLE TMP_LLVM_SPIRV_PATH COMMAND_ERROR_IS_FATAL ANY)
202+
execute_process(COMMAND "which" "${GPUCA_OPENCL_CLANGBIN}" OUTPUT_VARIABLE TMP_LLVM_SPIRV_PATH COMMAND_ERROR_IS_FATAL ANY)
181203
cmake_path(GET TMP_LLVM_SPIRV_PATH PARENT_PATH TMP_LLVM_SPIRV_PATH)
182204
find_program(LLVM_SPIRV llvm-spirv HINTS "${TMP_LLVM_SPIRV_PATH}")
183205
else()
@@ -187,38 +209,40 @@ if(ENABLE_OPENCL2)
187209
if(Clang_FOUND
188210
AND LLVM_FOUND
189211
AND NOT LLVM_CLANG STREQUAL "LLVM_CLANG-NOTFOUND"
190-
AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13.0)
191-
set(OPENCL2_COMPATIBLE_CLANG_FOUND ON)
212+
AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 18.0)
213+
set(OPENCL_COMPATIBLE_CLANG_FOUND ON)
192214
endif()
193215
if(OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2
194216
AND NOT LLVM_SPIRV STREQUAL "LLVM_SPIRV-NOTFOUND"
195-
AND OPENCL2_COMPATIBLE_CLANG_FOUND)
196-
set(OPENCL2_ENABLED_SPIRV ON)
217+
AND OPENCL_COMPATIBLE_CLANG_FOUND)
218+
set(OPENCL_ENABLED_SPIRV ON)
197219
message(STATUS "Using CLANG ${LLVM_CLANG} and ${LLVM_SPIRV} for SPIR-V compilation")
198220
endif ()
199-
if(OPENCL2_COMPATIBLE_CLANG_FOUND AND
221+
if(OPENCL_COMPATIBLE_CLANG_FOUND AND
200222
(OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2
201-
OR OPENCL2_ENABLED_SPIRV))
202-
set(OPENCL2_ENABLED ON)
203-
message(STATUS "Found OpenCL 2 (${OpenCL_VERSION_STRING} SPIR-V ${OPENCL2_ENABLED_SPIRV} with CLANG ${LLVM_PACKAGE_VERSION})")
204-
elseif(NOT ENABLE_OPENCL2 STREQUAL "AUTO")
223+
OR OPENCL_ENABLED_SPIRV))
224+
set(OPENCL_ENABLED ON)
225+
message(STATUS "Found OpenCL 2 (${OpenCL_VERSION_STRING} SPIR-V ${OPENCL_ENABLED_SPIRV} with CLANG ${LLVM_PACKAGE_VERSION})")
226+
elseif(NOT ENABLE_OPENCL STREQUAL "AUTO")
205227
message(FATAL_ERROR "OpenCL 2.x not available")
206228
else()
207-
set(OPENCL2_ENABLED OFF)
229+
set(OPENCL_ENABLED OFF)
208230
endif()
209231
endif()
210232

211-
# Detect and enable HIP
233+
# ================================== HIP ==================================
212234
if(ENABLE_HIP)
213-
if("$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm")
235+
if(HIP_AMDGPUTARGET)
236+
set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}")
237+
set(GPU_TARGETS "${HIP_AMDGPUTARGET}")
238+
endif()
239+
if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/")
240+
list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake")
241+
endif()
242+
if("$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" OR CMAKE_PREFIX_PATH MATCHES "rocm")
214243
set(CMAKE_HIP_STANDARD ${CMAKE_CXX_STANDARD})
215244
set(CMAKE_HIP_STANDARD_REQUIRED TRUE)
216-
if(HIP_AMDGPUTARGET)
217-
set(AMDGPU_TARGETS "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE)
218-
set(GPU_TARGETS "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE)
219-
set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE)
220-
endif()
221-
set(TMP_ROCM_DIR_LIST $ENV{CMAKE_PREFIX_PATH})
245+
set(TMP_ROCM_DIR_LIST "${CMAKE_PREFIX_PATH}:$ENV{CMAKE_PREFIX_PATH}")
222246
string(REPLACE ":" ";" TMP_ROCM_DIR_LIST "${TMP_ROCM_DIR_LIST}")
223247
list(FILTER TMP_ROCM_DIR_LIST INCLUDE REGEX rocm)
224248
list(POP_FRONT TMP_ROCM_DIR_LIST TMP_ROCM_DIR)
@@ -260,36 +284,26 @@ if(ENABLE_HIP)
260284
elseif(NOT ENABLE_HIP STREQUAL "AUTO")
261285
message(FATAL_ERROR "HIP requested, but CMAKE_PREFIX_PATH env variable does not contain rocm folder!")
262286
endif()
287+
if(hip_FOUND AND NOT hip_VERSION VERSION_GREATER_EQUAL "6.3")
288+
set(hip_FOUND 0)
289+
endif()
263290
if(hip_FOUND AND hipcub_FOUND AND rocthrust_FOUND AND rocprim_FOUND AND hip_HIPCC_EXECUTABLE AND hip_HIPIFY_PERL_EXECUTABLE)
264291
set(HIP_ENABLED ON)
265292
set_target_properties(roc::rocthrust PROPERTIES IMPORTED_GLOBAL TRUE)
266293
message(STATUS "HIP Found (${hip_HIPCC_EXECUTABLE} version ${hip_VERSION})")
267-
set(O2_HIP_CMAKE_CXX_FLAGS "-fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed")
268-
if(hip_VERSION VERSION_GREATER_EQUAL "6.0")
269-
set(O2_HIP_CMAKE_CXX_FLAGS "${O2_HIP_CMAKE_CXX_FLAGS} -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup
294+
set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS}")
295+
set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
296+
if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE)
297+
string(APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage")
298+
endif()
299+
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
300+
string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
301+
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
302+
string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -ffast-math -O3")
270303
endif()
271-
set(O2_HIP_CMAKE_LINK_FLAGS "-Wno-pass-failed")
272304
string(REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}") # ROCm currently doesn’t support integrated graphics
273305
if(HIP_AMDGPUTARGET)
274-
foreach(HIP_ARCH ${HIP_AMDGPUTARGET})
275-
set(O2_HIP_CMAKE_CXX_FLAGS "${O2_HIP_CMAKE_CXX_FLAGS} --offload-arch=${HIP_ARCH}")
276-
set(O2_HIP_CMAKE_LINK_FLAGS "${O2_HIP_CMAKE_LINK_FLAGS} --offload-arch=${HIP_ARCH}")
277-
endforeach()
278-
set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection
279-
endif()
280-
if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH})
281-
set(O2_HIP_CMAKE_CXX_FLAGS "${O2_HIP_CMAKE_CXX_FLAGS} -fgpu-flush-denormals-to-zero -ffast-math")
282-
endif()
283-
if (CMAKE_CXX_COMPILER MATCHES "bin/c\\+\\+\$" AND NOT CMAKE_CXX_COMPILER MATCHES "^/usr/bin")
284-
string(REGEX REPLACE "(.*)bin/c\\+\\+\$" "\\1" HIP_GCC_TOOLCHAIN_PATH "${CMAKE_CXX_COMPILER}")
285-
set(O2_HIP_CMAKE_CXX_FLAGS "${O2_HIP_CMAKE_CXX_FLAGS} --gcc-toolchain=${HIP_GCC_TOOLCHAIN_PATH}")
286-
endif()
287-
set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS}")
288-
set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
289-
if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
290-
set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0 -ggdb")
291-
else()
292-
set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3")
306+
set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}")
293307
endif()
294308
else()
295309
set(HIP_ENABLED OFF)
@@ -315,9 +329,10 @@ if(ENABLE_HIP)
315329
endif()
316330
message(FATAL_ERROR "HIP requested but some of the above packages are not found")
317331
endif()
318-
319332
endif()
320333

321334
# if we end up here without a FATAL, it means we have found the "O2GPU" package
322335
set(O2GPU_FOUND TRUE)
323-
include("${CMAKE_CURRENT_LIST_DIR}/../GPU/GPUTracking/cmake/kernel_helpers.cmake")
336+
if (NOT GPUCA_FINDO2GPU_CHECK_ONLY)
337+
include("${CMAKE_CURRENT_LIST_DIR}/../GPU/GPUTracking/cmake/kernel_helpers.cmake")
338+
endif()

0 commit comments

Comments
 (0)