Skip to content

Commit e836e3f

Browse files
committed
GPU: Replace GPUCA_NO_FAST_MATH by more fine-grain GPUCA_DETERMINISTIC_MODE
1 parent ee497d7 commit e836e3f

File tree

13 files changed

+66
-60
lines changed

13 files changed

+66
-60
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
#define THRUST_NAMESPACE thrust::hip
4444
#endif
4545

46-
#ifdef GPUCA_NO_FAST_MATH
46+
#ifdef GPUCA_DETERMINISTIC_MODE
4747
#define GPU_BLOCKS 1
4848
#define GPU_THREADS 1
4949
#else
@@ -1452,4 +1452,4 @@ template void processNeighboursHandler<7>(const int startLayer,
14521452
const o2::base::PropagatorF::MatCorrType matCorrType,
14531453
const int nBlocks,
14541454
const int nThreads);
1455-
} // namespace o2::its
1455+
} // namespace o2::its

GPU/CMakeLists.txt

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,6 @@
1313
# CMake, variables are defined for Sources / Headers first. Then, the actual
1414
# CMake build scripts use these variables.
1515

16-
if(NOT DEFINED GPUCA_NO_FAST_MATH)
17-
set(GPUCA_NO_FAST_MATH 0)
18-
endif()
19-
if(${GPUCA_NO_FAST_MATH})
20-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
21-
endif()
22-
2316
add_subdirectory(Common)
2417
add_subdirectory(Utils)
2518
add_subdirectory(TPCFastTransformation)

GPU/Common/GPUCommonMath.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
250250
#endif
251251
}
252252

253-
#ifdef GPUCA_NO_FAST_MATH
253+
#ifdef GPUCA_DETERMINISTIC_MODE
254254
GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); }
255255
GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
256256
GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
@@ -286,7 +286,7 @@ GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; }
286286

287287
GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)
288288
{
289-
#if defined(GPUCA_NO_FAST_MATH) && !defined(__OPENCL__)
289+
#if defined(GPUCA_DETERMINISTIC_MODE) && !defined(__OPENCL__)
290290
s = sin((double)x);
291291
c = cos((double)x);
292292
#elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
@@ -392,7 +392,7 @@ GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ,
392392

393393
GPUdi() float GPUCommonMath::InvSqrt(float _x)
394394
{
395-
#if defined(GPUCA_NO_FAST_MATH) || defined(__OPENCL__)
395+
#if defined(GPUCA_DETERMINISTIC_MODE) || defined(__OPENCL__)
396396
return 1.f / Sqrt(_x);
397397
#elif defined(__CUDACC__) || defined(__HIPCC__)
398398
return __frsqrt_rn(_x);

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,8 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice()
261261
mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6;
262262
}
263263
if (mProcessingSettings.deterministicGPUReconstruction) {
264-
#ifndef GPUCA_NO_FAST_MATH
265-
GPUError("Warning, deterministicGPUReconstruction needs GPUCA_NO_FAST_MATH for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!");
264+
#ifndef GPUCA_DETERMINISTIC_MODE
265+
GPUError("Warning, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!");
266266
#endif
267267
mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU;
268268
param().rec.tpc.nWaysOuter = true;

GPU/GPUTracking/Base/cuda/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")
172172
TARGET_DIRECTORY ${targetName}
173173
PROPERTIES
174174
COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}"
175-
COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH")
175+
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
176176
elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc")
177177
message(FATAL_ERROR "CUDA RDC compilation of GPUReconstruction ios not yet working!")
178178
target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2)

GPU/GPUTracking/Base/hip/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")
230230
TARGET_DIRECTORY ${targetName}
231231
PROPERTIES
232232
COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}"
233-
COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH")
233+
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
234234
elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "rdc")
235235
message(FATAL_ERROR "HIP RDC compilation of GPUReconstruction ios not yet working!")
236236
target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2)

GPU/GPUTracking/Base/opencl/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl)
2424
set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode)
2525

2626
set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021)
27-
if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH})
27+
if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
2828
set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math)
2929
else()
30-
set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt)
30+
set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt)
3131
endif()
3232
set(OCL_DEFINECL "-D$<JOIN:$<TARGET_PROPERTY:O2::GPUTracking,COMPILE_DEFINITIONS>,$<SEMICOLON>-D>"
3333
"-I$<JOIN:$<FILTER:$<TARGET_PROPERTY:O2::GPUTracking,INCLUDE_DIRECTORIES>,EXCLUDE,^/usr/include/?>,$<SEMICOLON>-I>"

GPU/GPUTracking/CMakeLists.txt

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,13 @@ set(MODULE GPUTracking)
1414
# set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0") # to uncomment if needed, tired of typing this...
1515
# set(GPUCA_BUILD_DEBUG 1)
1616

17-
if(NOT "${GPUCA_NO_FAST_MATH}" AND NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
18-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
17+
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
18+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
19+
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2})
20+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2")
21+
endif()
22+
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
23+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math")
1924
endif()
2025

2126
include(cmake/helpers.cmake)
@@ -375,7 +380,7 @@ set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx
375380
TARGET_DIRECTORY ${targetName}
376381
PROPERTIES
377382
COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}"
378-
COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH")
383+
COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE")
379384

380385
# GPUReconstructionLibrary needs to know which GPU backends are enabled for proper error messages
381386
configure_file(Base/GPUReconstructionAvailableBackends.template.h ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionAvailableBackends.h)
@@ -417,6 +422,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR ALIGPU_BUILD_TYPE STREQUAL "Standalone")
417422
endif()
418423
endif()
419424

420-
if(${GPUCA_NO_FAST_MATH})
421-
target_compile_definitions(${targetName} PUBLIC GPUCA_NO_FAST_MATH)
425+
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU})
426+
target_compile_definitions(${targetName} PUBLIC GPUCA_DETERMINISTIC_MODE)
422427
endif()

GPU/GPUTracking/Definitions/GPUDefGPUParameters.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@
566566
#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
567567
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
568568
#endif
569-
#ifdef GPUCA_NO_FAST_MATH
569+
#ifdef GPUCA_DETERMINISTIC_MODE
570570
#undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
571571
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
572572
#undef GPUCA_DEDX_STORAGE_TYPE

GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -723,13 +723,13 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea
723723

724724
if (iThread == 0) {
725725
if (iBlock == 0) {
726-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
726+
#ifdef GPUCA_DETERMINISTIC_MODE
727727
GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); });
728728
#else
729729
GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; });
730730
#endif
731731
} else if (iBlock == 1) {
732-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
732+
#ifdef GPUCA_DETERMINISTIC_MODE
733733
GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); });
734734
#else
735735
GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; });
@@ -749,7 +749,7 @@ namespace // anonymous
749749
struct MergeBorderTracks_compMax {
750750
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
751751
{
752-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
752+
#ifdef GPUCA_DETERMINISTIC_MODE
753753
return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId);
754754
#else
755755
return a.fMax < b.fMax;
@@ -759,7 +759,7 @@ struct MergeBorderTracks_compMax {
759759
struct MergeBorderTracks_compMin {
760760
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
761761
{
762-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
762+
#ifdef GPUCA_DETERMINISTIC_MODE
763763
return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId);
764764
#else
765765
return a.fMin < b.fMin;
@@ -906,7 +906,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea
906906

907907
mTrackLinks[b1.TrackID()] = iBest2;
908908
if (mergeMode > 0) {
909-
#if defined(GPUCA_NO_FAST_MATH) // TODO: Use a better define as swith
909+
#ifdef GPUCA_DETERMINISTIC_MODE
910910
CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID());
911911
#else
912912
mTrackLinks[iBest2] = b1.TrackID();
@@ -1469,7 +1469,7 @@ struct GPUTPCGMMerger_CompareClusterIdsLooper {
14691469
if (a1.row != b1.row) {
14701470
return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards);
14711471
}
1472-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1472+
#ifdef GPUCA_DETERMINISTIC_MODE
14731473
if (a1.id != b1.id) {
14741474
return (a1.id > b1.id);
14751475
}
@@ -1490,7 +1490,7 @@ struct GPUTPCGMMerger_CompareClusterIds {
14901490
if (a.row != b.row) {
14911491
return (a.row > b.row);
14921492
}
1493-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1493+
#ifdef GPUCA_DETERMINISTIC_MODE
14941494
if (a.id != b.id) {
14951495
return (a.id > b.id);
14961496
}
@@ -1569,7 +1569,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
15691569
// unpack and sort clusters
15701570
if (nParts > 1 && leg == 0) {
15711571
GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) {
1572-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1572+
#ifdef GPUCA_DETERMINISTIC_MODE
15731573
if (a->X() != b->X()) {
15741574
return (a->X() > b->X());
15751575
}
@@ -1834,7 +1834,7 @@ struct GPUTPCGMMergerSortTracks_comp {
18341834
if (a.Legs() != b.Legs()) {
18351835
return a.Legs() > b.Legs();
18361836
}
1837-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1837+
#ifdef GPUCA_DETERMINISTIC_MODE
18381838
if (a.NClusters() != b.NClusters()) {
18391839
return a.NClusters() > b.NClusters();
18401840
}
@@ -1858,7 +1858,7 @@ struct GPUTPCGMMergerSortTracksQPt_comp {
18581858
{
18591859
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
18601860
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
1861-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1861+
#ifdef GPUCA_DETERMINISTIC_MODE
18621862
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
18631863
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
18641864
}
@@ -1907,7 +1907,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_
19071907
if (a.Legs() != b.Legs()) {
19081908
return a.Legs() > b.Legs();
19091909
}
1910-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1910+
#ifdef GPUCA_DETERMINISTIC_MODE
19111911
if (a.NClusters() != b.NClusters()) {
19121912
return a.NClusters() > b.NClusters();
19131913
}
@@ -1937,7 +1937,7 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int
19371937
auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) {
19381938
const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa];
19391939
const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb];
1940-
#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith
1940+
#ifdef GPUCA_DETERMINISTIC_MODE
19411941
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
19421942
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
19431943
}

0 commit comments

Comments
 (0)