Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#define THRUST_NAMESPACE thrust::hip
#endif

#ifdef GPUCA_NO_FAST_MATH
#ifdef GPUCA_DETERMINISTIC_MODE
#define GPU_BLOCKS 1
#define GPU_THREADS 1
#else
Expand Down Expand Up @@ -1452,4 +1452,4 @@ template void processNeighboursHandler<7>(const int startLayer,
const o2::base::PropagatorF::MatCorrType matCorrType,
const int nBlocks,
const int nThreads);
} // namespace o2::its
} // namespace o2::its
8 changes: 0 additions & 8 deletions GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@
# CMake, variables are defined for Sources / Headers first. Then, the actual
# CMake build scripts use these variables.

if(NOT DEFINED GPUCA_NO_FAST_MATH)
set(GPUCA_NO_FAST_MATH 0)
endif()
set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off")
if(${GPUCA_NO_FAST_MATH})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
endif()

add_subdirectory(Common)
add_subdirectory(Utils)
add_subdirectory(TPCFastTransformation)
Expand Down
6 changes: 3 additions & 3 deletions GPU/Common/GPUCommonMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
#endif
}

#ifdef GPUCA_NO_FAST_MATH
#ifdef GPUCA_DETERMINISTIC_MODE
GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); }
GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
Expand Down Expand Up @@ -286,7 +286,7 @@ GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; }

GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)
{
#if defined(GPUCA_NO_FAST_MATH) && !defined(__OPENCL__)
#if defined(GPUCA_DETERMINISTIC_MODE) && !defined(__OPENCL__)
s = sin((double)x);
c = cos((double)x);
#elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
Expand Down Expand Up @@ -392,7 +392,7 @@ GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ,

GPUdi() float GPUCommonMath::InvSqrt(float _x)
{
#if defined(GPUCA_NO_FAST_MATH) || defined(__OPENCL__)
#if defined(GPUCA_DETERMINISTIC_MODE) || defined(__OPENCL__)
return 1.f / Sqrt(_x);
#elif defined(__CUDACC__) || defined(__HIPCC__)
return __frsqrt_rn(_x);
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,8 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice()
mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6;
}
if (mProcessingSettings.deterministicGPUReconstruction) {
#ifndef GPUCA_NO_FAST_MATH
GPUError("Warning, deterministicGPUReconstruction needs GPUCA_NO_FAST_MATH for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!");
#ifndef GPUCA_DETERMINISTIC_MODE
GPUError("Warning, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!");
#endif
mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU;
param().rec.tpc.nWaysOuter = true;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")
TARGET_DIRECTORY ${targetName}
PROPERTIES
COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}"
COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH")
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc")
message(FATAL_ERROR "CUDA RDC compilation of GPUReconstruction ios not yet working!")
target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2)
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")
TARGET_DIRECTORY ${targetName}
PROPERTIES
COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}"
COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH")
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "rdc")
message(FATAL_ERROR "HIP RDC compilation of GPUReconstruction ios not yet working!")
target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2)
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/opencl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl)
set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode)

set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021)
if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH})
if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math)
else()
set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt)
set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt)
endif()
set(OCL_DEFINECL "-D$<JOIN:$<TARGET_PROPERTY:O2::GPUTracking,COMPILE_DEFINITIONS>,$<SEMICOLON>-D>"
"-I$<JOIN:$<FILTER:$<TARGET_PROPERTY:O2::GPUTracking,INCLUDE_DIRECTORIES>,EXCLUDE,^/usr/include/?>,$<SEMICOLON>-I>"
Expand Down
15 changes: 10 additions & 5 deletions GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@ set(MODULE GPUTracking)
# set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0") # to uncomment if needed, tired of typing this...
# set(GPUCA_BUILD_DEBUG 1)

if(NOT "${GPUCA_NO_FAST_MATH}" AND NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2})
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2")
endif()
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math")
endif()

include(cmake/helpers.cmake)
Expand Down Expand Up @@ -375,7 +380,7 @@ set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx
TARGET_DIRECTORY ${targetName}
PROPERTIES
COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}"
COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH")
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")

# GPUReconstructionLibrary needs to know which GPU backends are enabled for proper error messages
configure_file(Base/GPUReconstructionAvailableBackends.template.h ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionAvailableBackends.h)
Expand Down Expand Up @@ -417,6 +422,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR ALIGPU_BUILD_TYPE STREQUAL "Standalone")
endif()
endif()

if(${GPUCA_NO_FAST_MATH})
target_compile_definitions(${targetName} PUBLIC GPUCA_NO_FAST_MATH)
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU})
target_compile_definitions(${targetName} PUBLIC GPUCA_DETERMINISTIC_MODE)
endif()
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Definitions/GPUDefGPUParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@
#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
#endif
#ifdef GPUCA_NO_FAST_MATH
#ifdef GPUCA_DETERMINISTIC_MODE
#undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
#undef GPUCA_DEDX_STORAGE_TYPE
Expand Down
24 changes: 12 additions & 12 deletions GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -723,13 +723,13 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea

if (iThread == 0) {
if (iBlock == 0) {
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); });
#else
GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; });
#endif
} else if (iBlock == 1) {
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); });
#else
GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; });
Expand All @@ -749,7 +749,7 @@ namespace // anonymous
struct MergeBorderTracks_compMax {
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
{
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId);
#else
return a.fMax < b.fMax;
Expand All @@ -759,7 +759,7 @@ struct MergeBorderTracks_compMax {
struct MergeBorderTracks_compMin {
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
{
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId);
#else
return a.fMin < b.fMin;
Expand Down Expand Up @@ -906,7 +906,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea

mTrackLinks[b1.TrackID()] = iBest2;
if (mergeMode > 0) {
#if defined(GPUCA_NO_FAST_MATH) // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID());
#else
mTrackLinks[iBest2] = b1.TrackID();
Expand Down Expand Up @@ -1469,7 +1469,7 @@ struct GPUTPCGMMerger_CompareClusterIdsLooper {
if (a1.row != b1.row) {
return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards);
}
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
if (a1.id != b1.id) {
return (a1.id > b1.id);
}
Expand All @@ -1490,7 +1490,7 @@ struct GPUTPCGMMerger_CompareClusterIds {
if (a.row != b.row) {
return (a.row > b.row);
}
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
if (a.id != b.id) {
return (a.id > b.id);
}
Expand Down Expand Up @@ -1569,7 +1569,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
// unpack and sort clusters
if (nParts > 1 && leg == 0) {
GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) {
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
if (a->X() != b->X()) {
return (a->X() > b->X());
}
Expand Down Expand Up @@ -1834,7 +1834,7 @@ struct GPUTPCGMMergerSortTracks_comp {
if (a.Legs() != b.Legs()) {
return a.Legs() > b.Legs();
}
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
if (a.NClusters() != b.NClusters()) {
return a.NClusters() > b.NClusters();
}
Expand All @@ -1858,7 +1858,7 @@ struct GPUTPCGMMergerSortTracksQPt_comp {
{
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
}
Expand Down Expand Up @@ -1907,7 +1907,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_
if (a.Legs() != b.Legs()) {
return a.Legs() > b.Legs();
}
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
if (a.NClusters() != b.NClusters()) {
return a.NClusters() > b.NClusters();
}
Expand Down Expand Up @@ -1937,7 +1937,7 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int
auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) {
const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa];
const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb];
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
#ifdef GPUCA_DETERMINISTIC_MODE
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
}
Expand Down
10 changes: 1 addition & 9 deletions GPU/GPUTracking/Standalone/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,7 @@ if(GPUCA_BUILD_DEBUG)
set(CMAKE_CXX_FLAGS "-O0 -ggdb")
set(CMAKE_BUILD_TYPE DEBUG)
else()
set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb -minline-all-stringops -funroll-loops -fno-stack-protector")
if(DEFINED GPUCA_NO_FAST_MATH AND ${GPUCA_NO_FAST_MATH})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math -ffp-contract=off")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
endif()
if (NOT CMAKE_CXX_COMPILER STREQUAL "clang++")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftracer -fprefetch-loop-arrays")
endif()
set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb")
set(CMAKE_BUILD_TYPE RELEASE)
add_definitions(-DNDEBUG)
endif()
Expand Down
16 changes: 8 additions & 8 deletions GPU/GPUTracking/Standalone/cmake/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ set(GPUCA_CONFIG_GL3W 0)
set(GPUCA_CONFIG_O2 1)
set(GPUCA_BUILD_DEBUG 0)
set(GPUCA_BUILD_DEBUG_SANITIZE 0)
set(GPUCA_NO_FAST_MATH 0)
#set(GPUCA_CUDA_GCCBIN c++-13)
#set(GPUCA_OPENCL_CLANGBIN clang-18)
#set(HIP_AMDGPUTARGET "gfx906;gfx908;gfx90a")
set(HIP_AMDGPUTARGET "default")
#set(CUDA_COMPUTETARGET 86 89)
set(CUDA_COMPUTETARGET "default")
#set(GPUCA_CUDA_COMPILE_MODE perkernel)
set(GPUCA_DETERMINISTIC_MODE 0) # OFF / NO_FAST_MATH / OPTO2 / GPU / WHOLEO2
#set(GPUCA_CUDA_GCCBIN c++-14)
#set(GPUCA_OPENCL_CLANGBIN clang-19)
set(HIP_AMDGPUTARGET "default") # "gfx906;gfx908;gfx90a"
set(CUDA_COMPUTETARGET "default") # 86 89
#set(GPUCA_CUDA_COMPILE_MODE perkernel) # onefile / perkernel / rtc
#set(GPUCA_HIP_COMPILE_MODE perkernel)
#set(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE 1)
#set(GPUCA_CONFIG_COMPILER gcc) # gcc / clang
#add_definitions(-DGPUCA_GPU_DEBUG_PRINT)
46 changes: 33 additions & 13 deletions dependencies/FindO2GPU.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,36 @@ function(set_target_hip_arch target)
endif()
endfunction()

# Detect and enable CUDA
STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") # Need to strip c++17 imposed by alidist defaults
# Need to strip c++17 imposed by alidist defaults
STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}")

# ---------------------------------- Fast Math / Deterministic Mode ----------------------------------
# set(GPUCA_DETERMINISTIC_MODE WHOLEO2) # Override
set(GPUCA_DETERMINISTIC_MODE_MAP_OFF 0)
set(GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH 1) # No -ffast-math and similar compile flags for GPU folder
set(GPUCA_DETERMINISTIC_MODE_MAP_OPTO2 2) # In addition, -O2 optimization on host for GPU folder
set(GPUCA_DETERMINISTIC_MODE_MAP_GPU 3) # In addition, GPUCA_DETERMINISTIC_MODE define for GPU folder
set(GPUCA_DETERMINISTIC_MODE_MAP_ON 3) # Synonym for GPU
set(GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2 4) # As GPU but for whole O2 code
if(NOT DEFINED GPUCA_DETERMINISTIC_MODE)
set(GPUCA_DETERMINISTIC_MODE 0)
elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$")
if(NOT DEFINED GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE})
message(FATAL_ERROR "Invalid setting ${GPUCA_DETERMINISTIC_MODE} for GPUCA_DETERMINISTIC_MODE")
endif()
set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}})
message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}")
endif()
set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off")
set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false")
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2})
add_definitions(-DGPUCA_DETERMINISTIC_MODE)
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
endif()


# ---------------------------------- CUDA ----------------------------------
if(ENABLE_CUDA)
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
Expand Down Expand Up @@ -124,8 +151,7 @@ if(ENABLE_CUDA)
else()
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4")
endif()
set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false")
if(DEFINED GPUCA_NO_FAST_MATH AND "${GPUCA_NO_FAST_MATH}")
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}")
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -use_fast_math --ftz=true")#
Expand All @@ -146,19 +172,14 @@ if(ENABLE_CUDA)
endif()
endif()

# Detect and enable OpenCL 1.2 from AMD
# ---------------------------------- HIP ----------------------------------
if(ENABLE_OPENCL)
find_package(OpenCL)
if(ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO")
set_package_properties(OpenCL PROPERTIES TYPE REQUIRED)
else()
set_package_properties(OpenCL PROPERTIES TYPE OPTIONAL)
endif()
endif()

# Detect and enable OpenCL 2.x
if(ENABLE_OPENCL)
find_package(OpenCL)
find_package(LLVM)
if(LLVM_FOUND)
find_package(Clang)
Expand Down Expand Up @@ -196,7 +217,7 @@ if(ENABLE_OPENCL)
endif()
endif()

# Detect and enable HIP
# ---------------------------------- HIP ----------------------------------
if(ENABLE_HIP)
if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/")
list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake")
Expand Down Expand Up @@ -269,7 +290,7 @@ if(ENABLE_HIP)
if(HIP_AMDGPUTARGET)
set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection
endif()
if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH})
if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -fgpu-flush-denormals-to-zero -ffast-math")
endif()
set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS}")
Expand Down Expand Up @@ -303,7 +324,6 @@ if(ENABLE_HIP)
endif()
message(FATAL_ERROR "HIP requested but some of the above packages are not found")
endif()

endif()

# if we end up here without a FATAL, it means we have found the "O2GPU" package
Expand Down
7 changes: 0 additions & 7 deletions dependencies/O2CompileFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,4 @@ if(DEFINED ENV{O2_CXXFLAGS_OVERRIDE})
message(STATUS "Setting CXXFLAGS Override $ENV{O2_CXXFLAGS_OVERRIDE}")
endif()

if(GPUCA_NO_FAST_MATH_WHOLEO2)
set(GPUCA_NO_FAST_MATH 1)
add_definitions(-DGPUCA_NO_FAST_MATH)
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off")
set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off")
endif()

message(STATUS "Using build type: ${CMAKE_BUILD_TYPE} - CXXFLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")