Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,8 @@
#define THRUST_NAMESPACE thrust::hip
#endif

#ifdef GPUCA_DETERMINISTIC_MODE
#define GPU_BLOCKS 1
#define GPU_THREADS 1
#else
#define GPU_BLOCKS 99999
#define GPU_THREADS 99999
#endif
#define GPU_BLOCKS GPUCA_DETERMINISTIC_CODE(1, 99999)
#define GPU_THREADS GPUCA_DETERMINISTIC_CODE(1, 99999)

// O2 track model
#include "ReconstructionDataFormats/Track.h"
Expand Down
10 changes: 9 additions & 1 deletion GPU/Common/GPUCommonDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,18 @@
#define GPUCA_DEBUG_STREAMER_CHECK(...)
#endif

#ifndef GPUCA_RTC_SPECIAL_CODE
#ifndef GPUCA_RTC_SPECIAL_CODE // By default, we ignore special RTC code
#define GPUCA_RTC_SPECIAL_CODE(...)
#endif

#ifndef GPUCA_DETERMINISTIC_CODE
#ifdef GPUCA_DETERMINISTIC_MODE
#define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path
#else
#define GPUCA_DETERMINISTIC_CODE(det, indet) indet // otherwise the fast default code path
#endif
#endif

// API Definitions for GPU Compilation
#include "GPUCommonDefAPI.h"

Expand Down
54 changes: 30 additions & 24 deletions GPU/Common/GPUCommonMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
#endif
}

#ifdef GPUCA_DETERMINISTIC_MODE
GPUCA_DETERMINISTIC_CODE( // clang-format off
GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); }
GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
Expand All @@ -264,7 +264,7 @@ GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float
GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); }
GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); }
GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); }
#else
, // !GPUCA_DETERMINISTIC_CODE
GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); }
GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); }
GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); }
Expand All @@ -280,20 +280,22 @@ GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x
GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); }
GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return true; }
GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; }
#endif
) // clang-format on

GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)
{
#if defined(GPUCA_DETERMINISTIC_MODE) && !defined(__OPENCL__)
s = sin((double)x);
c = cos((double)x);
#elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
__sincosf(x, &s, &c);
GPUCA_DETERMINISTIC_CODE( // clang-format off
s = sin((double)x);
c = cos((double)x);
, // !GPUCA_DETERMINISTIC_CODE
#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
__sincosf(x, &s, &c);
#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
sincosf(x, &s, &c);
sincosf(x, &s, &c);
#else
GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c));
GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c));
#endif
) // clang-format on
}

GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c)
Expand Down Expand Up @@ -390,22 +392,26 @@ GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ,

GPUdi() float GPUCommonMath::InvSqrt(float _x)
{
#if defined(GPUCA_DETERMINISTIC_MODE) || defined(__OPENCL__)
return 1.f / Sqrt(_x);
#elif defined(__CUDACC__) || defined(__HIPCC__)
return __frsqrt_rn(_x);
#elif defined(__FAST_MATH__)
return 1.f / sqrtf(_x);
GPUCA_DETERMINISTIC_CODE( // clang-format off
return 1.f / Sqrt(_x);
, // !GPUCA_DETERMINISTIC_CODE
#if defined(__CUDACC__) || defined(__HIPCC__)
return __frsqrt_rn(_x);
#elif defined(__OPENCL__) && defined(__clang__)
return 1.f / sqrt(_x);
#elif !defined(__OPENCL__) && (defined(__FAST_MATH__) || defined(__clang__))
return 1.f / sqrtf(_x);
#else
union {
float f;
int32_t i;
} x = {_x};
const float xhalf = 0.5f * x.f;
x.i = 0x5f3759df - (x.i >> 1);
x.f = x.f * (1.5f - xhalf * x.f * x.f);
return x.f;
union {
float f;
int32_t i;
} x = {_x};
const float xhalf = 0.5f * x.f;
x.i = 0x5f3759df - (x.i >> 1);
x.f = x.f * (1.5f - xhalf * x.f * x.f);
return x.f;
#endif
) // clang-format on
}

template <>
Expand Down
9 changes: 3 additions & 6 deletions GPU/GPUTracking/Definitions/GPUDefGPUParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#error Please include GPUDef.h
#endif

#include "GPUCommonDef.h"
#include "GPUDefMacros.h"

// GPU Run Configuration
Expand Down Expand Up @@ -566,12 +567,8 @@
#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
#endif
#ifdef GPUCA_DETERMINISTIC_MODE
#undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
#undef GPUCA_DEDX_STORAGE_TYPE
#define GPUCA_DEDX_STORAGE_TYPE float
#endif
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE)
#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE)

#ifndef GPUCA_WARP_SIZE
#ifdef GPUCA_GPUCODE
Expand Down
160 changes: 60 additions & 100 deletions GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -723,17 +723,9 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea

if (iThread == 0) {
if (iBlock == 0) {
#ifdef GPUCA_DETERMINISTIC_MODE
GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); });
#else
GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; });
#endif
GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); });
} else if (iBlock == 1) {
#ifdef GPUCA_DETERMINISTIC_MODE
GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); });
#else
GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; });
#endif
GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); });
}
}
#else
Expand All @@ -749,21 +741,13 @@ namespace // anonymous
struct MergeBorderTracks_compMax {
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
{
#ifdef GPUCA_DETERMINISTIC_MODE
return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId);
#else
return a.fMax < b.fMax;
#endif
return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax);
}
};
struct MergeBorderTracks_compMin {
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
{
#ifdef GPUCA_DETERMINISTIC_MODE
return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId);
#else
return a.fMin < b.fMin;
#endif
return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin);
}
};
} // anonymous namespace
Expand Down Expand Up @@ -904,11 +888,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea

mTrackLinks[b1.TrackID()] = iBest2;
if (mergeMode > 0) {
#ifdef GPUCA_DETERMINISTIC_MODE
CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID());
#else
mTrackLinks[iBest2] = b1.TrackID();
#endif
GPUCA_DETERMINISTIC_CODE(CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()), mTrackLinks[iBest2] = b1.TrackID());
}
}
// GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged);
Expand Down Expand Up @@ -1467,14 +1447,7 @@ struct GPUTPCGMMerger_CompareClusterIdsLooper {
if (a1.row != b1.row) {
return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards);
}
#ifdef GPUCA_DETERMINISTIC_MODE
if (a1.id != b1.id) {
return (a1.id > b1.id);
}
return aa > bb;
#else
return a1.id > b1.id;
#endif
return GPUCA_DETERMINISTIC_CODE((a1.id != b1.id) ? (a1.id > b1.id) : (aa > bb), a1.id > b1.id);
}
};

Expand All @@ -1488,14 +1461,7 @@ struct GPUTPCGMMerger_CompareClusterIds {
if (a.row != b.row) {
return (a.row > b.row);
}
#ifdef GPUCA_DETERMINISTIC_MODE
if (a.id != b.id) {
return (a.id > b.id);
}
return aa > bb;
#else
return (a.id > b.id);
#endif
return GPUCA_DETERMINISTIC_CODE((a.id != b.id) ? (a.id > b.id) : (aa > bb), a.id > b.id);
}
};
} // anonymous namespace
Expand Down Expand Up @@ -1567,20 +1533,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
// unpack and sort clusters
if (nParts > 1 && leg == 0) {
GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) {
#ifdef GPUCA_DETERMINISTIC_MODE
if (a->X() != b->X()) {
GPUCA_DETERMINISTIC_CODE( // clang-format off
if (a->X() != b->X()) {
return (a->X() > b->X());
}
if (a->Y() != b->Y()) {
return (a->Y() > b->Y());
}
if (a->Z() != b->Z()) {
return (a->Z() > b->Z());
}
return a->QPt() > b->QPt();
, // !GPUCA_DETERMINISTIC_CODE
return (a->X() > b->X());
}
if (a->Y() != b->Y()) {
return (a->Y() > b->Y());
}
if (a->Z() != b->Z()) {
return (a->Z() > b->Z());
}
return a->QPt() > b->QPt();
#else
return (a->X() > b->X());
#endif
) // clang-format on
});
}

Expand Down Expand Up @@ -1832,20 +1798,18 @@ struct GPUTPCGMMergerSortTracks_comp {
if (a.Legs() != b.Legs()) {
return a.Legs() > b.Legs();
}
#ifdef GPUCA_DETERMINISTIC_MODE
if (a.NClusters() != b.NClusters()) {
GPUCA_DETERMINISTIC_CODE( // clang-format off
if (a.NClusters() != b.NClusters()) {
return a.NClusters() > b.NClusters();
} if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
return a.GetParam().GetY() > b.GetParam().GetY();
}
return aa > bb;
, // !GPUCA_DETERMINISTIC_CODE
return a.NClusters() > b.NClusters();
}
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
}
if (a.GetParam().GetY() != b.GetParam().GetY()) {
return a.GetParam().GetY() > b.GetParam().GetY();
}
return aa > bb;
#else
return a.NClusters() > b.NClusters();
#endif
) // clang-format on
}
};

Expand All @@ -1856,17 +1820,16 @@ struct GPUTPCGMMergerSortTracksQPt_comp {
{
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
#ifdef GPUCA_DETERMINISTIC_MODE
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
GPUCA_DETERMINISTIC_CODE( // clang-format off
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
return a.GetParam().GetY() > b.GetParam().GetY();
}
return a.GetParam().GetZ() > b.GetParam().GetZ();
, // !GPUCA_DETERMINISTIC_CODE
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
}
if (a.GetParam().GetY() != b.GetParam().GetY()) {
return a.GetParam().GetY() > b.GetParam().GetY();
}
return a.GetParam().GetZ() > b.GetParam().GetZ();
#else
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
#endif
) // clang-format on
}
};
} // anonymous namespace
Expand Down Expand Up @@ -1901,20 +1864,18 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_
if (a.Legs() != b.Legs()) {
return a.Legs() > b.Legs();
}
#ifdef GPUCA_DETERMINISTIC_MODE
if (a.NClusters() != b.NClusters()) {
GPUCA_DETERMINISTIC_CODE( // clang-format off
if (a.NClusters() != b.NClusters()) {
return a.NClusters() > b.NClusters();
} if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
return a.GetParam().GetY() > b.GetParam().GetY();
}
return aa > bb;
, // !GPUCA_DETERMINISTIC_CODE
return a.NClusters() > b.NClusters();
}
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
}
if (a.GetParam().GetY() != b.GetParam().GetY()) {
return a.GetParam().GetY() > b.GetParam().GetY();
}
return aa > bb;
#else
return a.NClusters() > b.NClusters();
#endif
) // clang-format on
};

GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp);
Expand All @@ -1931,17 +1892,16 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int
auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) {
const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa];
const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb];
#ifdef GPUCA_DETERMINISTIC_MODE
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
GPUCA_DETERMINISTIC_CODE( // clang-format off
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
return a.GetParam().GetY() > b.GetParam().GetY();
}
return a.GetParam().GetZ() > b.GetParam().GetZ();
, // !GPUCA_DETERMINISTIC_CODE
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
}
if (a.GetParam().GetY() != b.GetParam().GetY()) {
return a.GetParam().GetY() > b.GetParam().GetY();
}
return a.GetParam().GetZ() > b.GetParam().GetZ();
#else
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
#endif
) // clang-format on
};

GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp);
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ enum attachTypes { attachAttached = 0x40000000,

struct InterpolationErrorHit {
float posY, posZ;
GPUCA_MERGER_INTERPOLATION_ERROR_TYPE errorY, errorZ;
GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ;
};

struct InterpolationErrors {
Expand Down
Loading