Skip to content

Commit e451741

Browse files
authored
Merge branch 'AliceO2Group:dev' into new-detector4
2 parents 3976bb1 + 8b6d22e commit e451741

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+380
-379
lines changed

CODEOWNERS

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
/DataFormats/Detectors/GlobalTracking @shahor02
3535
/DataFormats/Detectors/GlobalTrackingWorkflow @shahor02
3636
/DataFormats/Detectors/HMPID @gvolpe79
37-
/DataFormats/Detectors/ITSMFT @mcoquet642 @mconcas @shahor02
37+
/DataFormats/Detectors/ITSMFT @fprino @mcoquet642 @mconcas @shahor02
3838
/DataFormats/Detectors/MUON @AliceO2Group/muon-experts @shahor02
3939
/DataFormats/Detectors/PHOS @peressounko @kharlov
4040
/DataFormats/Detectors/Passive @sawenzel
@@ -65,7 +65,7 @@
6565
/Detectors/GlobalTracking @shahor02
6666
/Detectors/GlobalTrackingWorkflow @shahor02
6767
/Detectors/HMPID @gvolpe79
68-
/Detectors/ITSMFT @mcoquet642 @mconcas @shahor02
68+
/Detectors/ITSMFT @fprino @mcoquet642 @mconcas @shahor02
6969
/Detectors/MUON @AliceO2Group/muon-experts @shahor02
7070
/Detectors/PHOS @peressounko @kharlov
7171
/Detectors/Passive @sawenzel

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,8 @@
4343
#define THRUST_NAMESPACE thrust::hip
4444
#endif
4545

46-
#ifdef GPUCA_DETERMINISTIC_MODE
47-
#define GPU_BLOCKS 1
48-
#define GPU_THREADS 1
49-
#else
50-
#define GPU_BLOCKS 99999
51-
#define GPU_THREADS 99999
52-
#endif
46+
#define GPU_BLOCKS GPUCA_DETERMINISTIC_CODE(1, 99999)
47+
#define GPU_THREADS GPUCA_DETERMINISTIC_CODE(1, 99999)
5348

5449
// O2 track model
5550
#include "ReconstructionDataFormats/Track.h"

GPU/Common/GPUCommonAlgorithm.h

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@
2424

2525
// ----------------------------- SORTING -----------------------------
2626

27-
namespace o2
28-
{
29-
namespace gpu
27+
namespace o2::gpu
3028
{
3129
class GPUCommonAlgorithm
3230
{
@@ -43,6 +41,10 @@ class GPUCommonAlgorithm
4341
GPUd() static void sortInBlock(T* begin, T* end, const S& comp);
4442
template <class T, class S>
4543
GPUd() static void sortDeviceDynamic(T* begin, T* end, const S& comp);
44+
#ifndef __OPENCL__
45+
template <class T, class S>
46+
GPUh() static void sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp);
47+
#endif
4648
template <class T>
4749
GPUd() static void swap(T& a, T& b);
4850

@@ -71,13 +73,6 @@ class GPUCommonAlgorithm
7173
template <typename I>
7274
GPUd() static void IterSwap(I a, I b) noexcept;
7375
};
74-
} // namespace gpu
75-
} // namespace o2
76-
77-
namespace o2
78-
{
79-
namespace gpu
80-
{
8176

8277
#ifndef GPUCA_ALGORITHM_STD
8378
template <typename I>
@@ -217,18 +212,15 @@ GPUdi() void GPUCommonAlgorithm::QuickSort(I f, I l) noexcept
217212

218213
typedef GPUCommonAlgorithm CAAlgo;
219214

220-
} // namespace gpu
221-
} // namespace o2
215+
} // namespace o2::gpu
222216

223217
#if (((defined(__CUDACC__) && !defined(__clang__)) || defined(__HIPCC__))) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_HOSTONLY)
224218

225219
#include "GPUCommonAlgorithmThrust.h"
226220

227221
#else
228222

229-
namespace o2
230-
{
231-
namespace gpu
223+
namespace o2::gpu
232224
{
233225

234226
template <class T>
@@ -247,15 +239,12 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& co
247239
GPUCommonAlgorithm::sort(begin, end, comp);
248240
}
249241

250-
} // namespace gpu
251-
} // namespace o2
242+
} // namespace o2::gpu
252243

253244
#endif // THRUST
254245
// sort and sortInBlock below are not taken from Thrust, since our implementations are faster
255246

256-
namespace o2
257-
{
258-
namespace gpu
247+
namespace o2::gpu
259248
{
260249

261250
template <class T>
@@ -328,8 +317,7 @@ GPUdi() void GPUCommonAlgorithm::swap(T& a, T& b)
328317
}
329318
#endif
330319

331-
} // namespace gpu
332-
} // namespace o2
320+
} // namespace o2::gpu
333321

334322
// ----------------------------- WORK GROUP FUNCTIONS -----------------------------
335323

@@ -458,4 +446,8 @@ GPUdi() T warp_broadcast(T v, int32_t i)
458446

459447
#endif
460448

449+
#ifdef GPUCA_ALGORITHM_STD
450+
#undef GPUCA_ALGORITHM_STD
451+
#endif
452+
461453
#endif

GPU/Common/GPUCommonAlgorithmThrust.h

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,19 @@
2323
#pragma GCC diagnostic pop
2424

2525
#include "GPUCommonDef.h"
26+
#include "GPUCommonHelpers.h"
2627

27-
#ifdef __CUDACC__
28+
#ifndef __HIPCC__ // CUDA
2829
#define GPUCA_THRUST_NAMESPACE thrust::cuda
29-
#else
30+
#define GPUCA_CUB_NAMESPACE cub
31+
#include <cub/cub.cuh>
32+
#else // HIP
3033
#define GPUCA_THRUST_NAMESPACE thrust::hip
34+
#define GPUCA_CUB_NAMESPACE hipcub
35+
#include <hipcub/hipcub.hpp>
3136
#endif
3237

33-
namespace o2
34-
{
35-
namespace gpu
38+
namespace o2::gpu
3639
{
3740

3841
// - Our quicksort and bubble sort implementations are faster
@@ -54,7 +57,7 @@ GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end, const S& comp)
5457
}
5558
5659
template <class T>
57-
GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end)
60+
GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end) // TODO: Try cub::BlockMergeSort
5861
{
5962
if (get_local_id(0) == 0) {
6063
sortDeviceDynamic(begin, end);
@@ -87,7 +90,24 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& co
8790
thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd, comp);
8891
}
8992

90-
} // namespace gpu
91-
} // namespace o2
93+
template <class T, class S>
94+
GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp)
95+
{
96+
thrust::device_ptr<T> p(begin);
97+
#if 0 // Use Thrust
98+
auto alloc = rec->getThrustVolatileDeviceAllocator();
99+
thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(rec->mInternals->Streams[stream]), p, p + N, comp);
100+
#else // Use CUB
101+
size_t tempSize = 0;
102+
void* tempMem = nullptr;
103+
GPUChkErrS(GPUCA_CUB_NAMESPACE::DeviceMergeSort::SortKeys(tempMem, tempSize, begin, N, comp, rec->mInternals->Streams[stream]));
104+
tempMem = rec->AllocateVolatileDeviceMemory(tempSize);
105+
GPUChkErrS(GPUCA_CUB_NAMESPACE::DeviceMergeSort::SortKeys(tempMem, tempSize, begin, N, comp, rec->mInternals->Streams[stream]));
106+
#endif
107+
}
108+
} // namespace o2::gpu
109+
110+
#undef GPUCA_THRUST_NAMESPACE
111+
#undef GPUCA_CUB_NAMESPACE
92112

93113
#endif

GPU/Common/GPUCommonDef.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,18 @@
6868
#define GPUCA_DEBUG_STREAMER_CHECK(...)
6969
#endif
7070

71-
#ifndef GPUCA_RTC_SPECIAL_CODE
71+
#ifndef GPUCA_RTC_SPECIAL_CODE // By default, we ignore special RTC code
7272
#define GPUCA_RTC_SPECIAL_CODE(...)
7373
#endif
7474

75+
#ifndef GPUCA_DETERMINISTIC_CODE
76+
#ifdef GPUCA_DETERMINISTIC_MODE
77+
#define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path
78+
#else
79+
#define GPUCA_DETERMINISTIC_CODE(det, indet) indet // otherwise the fast default code path
80+
#endif
81+
#endif
82+
7583
// API Definitions for GPU Compilation
7684
#include "GPUCommonDefAPI.h"
7785

GPU/Common/GPUCommonHelpers.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "GPUCommonDef.h"
3636
#include "GPUCommonLogger.h"
3737
#include <cstdint>
38+
#include <functional>
3839

3940
namespace o2::gpu::internal
4041
{
@@ -60,4 +61,22 @@ static inline int32_t GPUReconstructionChkErr(const int64_t error, const char* f
6061
#undef GPUCOMMON_INTERNAL_CAT
6162
} // namespace o2::gpu::internal
6263

64+
namespace o2::gpu
65+
{
66+
class GPUReconstruction;
67+
class ThrustVolatileAllocator
68+
{
69+
public:
70+
typedef char value_type;
71+
72+
char* allocate(std::ptrdiff_t n);
73+
void deallocate(char* ptr, size_t);
74+
75+
private:
76+
ThrustVolatileAllocator(GPUReconstruction* r);
77+
std::function<char*(size_t)> mAlloc;
78+
friend class GPUReconstruction;
79+
};
80+
} // namespace o2::gpu
81+
6382
#endif

GPU/Common/GPUCommonMath.h

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,7 @@
4242
#define GPUCA_CHOICE(c1, c2, c3) (c1) // Select first option for Host
4343
#endif // clang-format on
4444

45-
namespace o2
46-
{
47-
namespace gpu
45+
namespace o2::gpu
4846
{
4947

5048
class GPUCommonMath
@@ -250,7 +248,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
250248
#endif
251249
}
252250

253-
#ifdef GPUCA_DETERMINISTIC_MODE
251+
GPUCA_DETERMINISTIC_CODE( // clang-format off
254252
GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); }
255253
GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
256254
GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
@@ -266,7 +264,7 @@ GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float
266264
GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); }
267265
GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); }
268266
GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); }
269-
#else
267+
, // !GPUCA_DETERMINISTIC_CODE
270268
GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); }
271269
GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); }
272270
GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); }
@@ -282,20 +280,22 @@ GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x
282280
GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); }
283281
GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return true; }
284282
GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; }
285-
#endif
283+
) // clang-format on
286284

287285
GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)
288286
{
289-
#if defined(GPUCA_DETERMINISTIC_MODE) && !defined(__OPENCL__)
290-
s = sin((double)x);
291-
c = cos((double)x);
292-
#elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
293-
__sincosf(x, &s, &c);
287+
GPUCA_DETERMINISTIC_CODE( // clang-format off
288+
s = sin((double)x);
289+
c = cos((double)x);
290+
, // !GPUCA_DETERMINISTIC_CODE
291+
#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__)
292+
__sincosf(x, &s, &c);
294293
#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
295-
sincosf(x, &s, &c);
294+
sincosf(x, &s, &c);
296295
#else
297-
GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c));
296+
GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c));
298297
#endif
298+
) // clang-format on
299299
}
300300

301301
GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c)
@@ -392,22 +392,26 @@ GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ,
392392

393393
GPUdi() float GPUCommonMath::InvSqrt(float _x)
394394
{
395-
#if defined(GPUCA_DETERMINISTIC_MODE) || defined(__OPENCL__)
396-
return 1.f / Sqrt(_x);
397-
#elif defined(__CUDACC__) || defined(__HIPCC__)
398-
return __frsqrt_rn(_x);
399-
#elif defined(__FAST_MATH__)
400-
return 1.f / sqrtf(_x);
395+
GPUCA_DETERMINISTIC_CODE( // clang-format off
396+
return 1.f / Sqrt(_x);
397+
, // !GPUCA_DETERMINISTIC_CODE
398+
#if defined(__CUDACC__) || defined(__HIPCC__)
399+
return __frsqrt_rn(_x);
400+
#elif defined(__OPENCL__) && defined(__clang__)
401+
return 1.f / sqrt(_x);
402+
#elif !defined(__OPENCL__) && (defined(__FAST_MATH__) || defined(__clang__))
403+
return 1.f / sqrtf(_x);
401404
#else
402-
union {
403-
float f;
404-
int32_t i;
405-
} x = {_x};
406-
const float xhalf = 0.5f * x.f;
407-
x.i = 0x5f3759df - (x.i >> 1);
408-
x.f = x.f * (1.5f - xhalf * x.f * x.f);
409-
return x.f;
405+
union {
406+
float f;
407+
int32_t i;
408+
} x = {_x};
409+
const float xhalf = 0.5f * x.f;
410+
x.i = 0x5f3759df - (x.i >> 1);
411+
x.f = x.f * (1.5f - xhalf * x.f * x.f);
412+
return x.f;
410413
#endif
414+
) // clang-format on
411415
}
412416

413417
template <>
@@ -540,7 +544,6 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt
540544

541545
#undef GPUCA_CHOICE
542546

543-
} // namespace gpu
544-
} // namespace o2
547+
} // namespace o2::gpu
545548

546549
#endif // GPUCOMMONMATH_H

GPU/Common/GPUCommonTransform3D.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717

1818
#include "GPUCommonDef.h"
1919

20-
namespace o2
21-
{
22-
namespace gpu
20+
namespace o2::gpu
2321
{
2422
class Transform3D
2523
{
@@ -79,7 +77,6 @@ class Transform3D
7977
kZZ = 10,
8078
kDZ = 11 };
8179
};
82-
} // namespace gpu
83-
} // namespace o2
80+
} // namespace o2::gpu
8481

8582
#endif

GPU/Common/GPUROOTCartesianFwd.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ class DefaultCoordinateSystemTag;
4646
} // namespace Math
4747
} // namespace ROOT
4848

49-
namespace o2
50-
{
51-
namespace math_utils
49+
namespace o2::math_utils
5250
{
5351

5452
namespace detail
@@ -79,7 +77,6 @@ template <typename T>
7977
using Vector3D = detail::GPUPoint3D<T, 1>;
8078
#endif
8179

82-
} // namespace math_utils
83-
} // namespace o2
80+
} // namespace o2::math_utils
8481

8582
#endif

0 commit comments

Comments
 (0)