3333#include < cstdint>
3434#endif
3535
36+ // GPUCA_CHOICE Syntax: GPUCA_CHOICE(Host, CUDA&HIP, OpenCL)
37+ #if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off
38+ #define GPUCA_CHOICE (c1, c2, c3 ) (c2) // Select second option for CUDA and HIP
39+ #elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__)
40+ #define GPUCA_CHOICE (c1, c2, c3 ) (c3) // Select third option for OpenCL
41+ #else
42+ #define GPUCA_CHOICE (c1, c2, c3 ) (c1) // Select first option for Host
43+ #endif // clang-format on
44+
3645namespace o2
3746{
3847namespace gpu
@@ -44,9 +53,15 @@ class GPUCommonMath
4453 GPUd () static float2 MakeFloat2 (float x, float y); // TODO: Find better appraoch that is constexpr
4554
4655 template <class T >
47- GPUhd () static T Min (const T x, const T y);
56+ GPUhd () static T Min (const T x, const T y)
57+ {
58+ return GPUCA_CHOICE (std::min (x, y), min (x, y), min (x, y));
59+ }
4860 template <class T >
49- GPUhd () static T Max (const T x, const T y);
61+ GPUhd () static T Max (const T x, const T y)
62+ {
63+ return GPUCA_CHOICE (std::max (x, y), max (x, y), max (x, y));
64+ }
5065 template <class T , class S , class R >
5166 GPUd () static T MinWithRef (T x, T y, S refX, S refY, R& r);
5267 template <class T , class S , class R >
@@ -74,28 +89,28 @@ class GPUCommonMath
7489 GPUd () static float Pow (float x, float y);
7590 GPUd () static float Log (float x);
7691 GPUd () static float Exp (float x);
77- GPUhdni () static float Copysign (float x, float y);
92+ GPUhdni () static float Copysign (float x, float y) { return GPUCA_CHOICE ( std::copysignf (x, y), copysignf (x, y), copysign (x, y)); }
7893 GPUd () static constexpr float TwoPi () { return 6 .2831853f ; }
7994 GPUd () static constexpr float Pi () { return 3 .1415927f ; }
8095 GPUd () static float Round (float x);
81- GPUd () static float Floor (float x);
96+ GPUd () static float Floor (float x) { return GPUCA_CHOICE ( floorf (x), floorf (x), floor (x)); }
8297 GPUd () static uint32_t Float2UIntReint (const float & x);
83- GPUd () static uint32_t Float2UIntRn (float x);
98+ GPUd () static uint32_t Float2UIntRn (float x) { return ( uint32_t )( int32_t )(x + 0 . 5f ); }
8499 GPUd () static int32_t Float2IntRn (float x);
85100 GPUd () static float Modf (float x, float y);
86- GPUd () static bool Finite (float x);
87- GPUd () static bool IsNaN (float x);
101+ GPUd () static bool Finite (float x) { return GPUCA_CHOICE ( std::isfinite (x), isfinite (x), isfinite (x)); }
102+ GPUd () static bool IsNaN (float x) { return GPUCA_CHOICE ( std::isnan (x), isnan (x), isnan (x)); }
88103 GPUd () static bool FiniteRelaxed (float x); // always true if not using NO_FAST_MATH
89104 GPUd () static bool IsNaNRelaxed (float x); // always true if not using NO_FAST_MATH
90- GPUd () static float QuietNaN ();
105+ GPUd () static float QuietNaN () { return GPUCA_CHOICE (std::numeric_limits< float >:: quiet_NaN (), __builtin_nanf ( " " ), nan ( 0u )); }
91106 GPUd () static uint32_t Clz (uint32_t val);
92107 GPUd () static uint32_t Popcount (uint32_t val);
93108
94109 GPUd () static void memcpy (void * dst, const void * src, size_t size);
95110
96- GPUhdni () static float Hypot (float x, float y);
97- GPUhdni () static float Hypot (float x, float y, float z);
98- GPUhdni () static float Hypot (float x, float y, float z, float w);
111+ GPUhdi () static float Hypot (float x, float y) { return Sqrt (x * x + y * y); }
112+ GPUhdi () static float Hypot (float x, float y, float z) { return Sqrt (x * x + y * y + z * z); }
113+ GPUhdi () static float Hypot (float x, float y, float z, float w) { return Sqrt (x * x + y * y + z * z + w * w); }
99114
100115 template <typename T>
101116 GPUhd () static void Swap (T& a, T& b);
@@ -154,15 +169,7 @@ class GPUCommonMath
154169 GPUd () constexpr static T nextMultipleOf (T val);
155170
156171 template <typename ... Args>
157- GPUdi () static float Sum2 (float w, Args... args)
158- {
159- if constexpr (sizeof ...(Args) == 0 ) {
160- return w * w;
161- } else {
162- return w * w + Sum2 (args...);
163- }
164- return 0 ;
165- }
172+ GPUhdni () static float Sum2 (float w, Args... args);
166173
167174 private:
168175 template <class S , class T >
@@ -179,14 +186,16 @@ class GPUCommonMath
179186
180187typedef GPUCommonMath CAMath;
181188
182- // CHOICE Syntax: CHOICE(Host, CUDA&HIP, OpenCL)
183- #if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off
184- #define CHOICE (c1, c2, c3 ) (c2) // Select second option for CUDA and HIP
185- #elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__)
186- #define CHOICE (c1, c2, c3 ) (c3) // Select third option for OpenCL
187- #else
188- #define CHOICE (c1, c2, c3 ) (c1) // Select first option for Host
189- #endif // clang-format on
189+ template <typename ... Args>
190+ GPUhdi () float GPUCommonMath::Sum2 (float w, Args... args)
191+ {
192+ if constexpr (sizeof ...(Args) == 0 ) {
193+ return w * w;
194+ } else {
195+ return w * w + Sum2 (args...);
196+ }
197+ return 0 ;
198+ }
190199
191200GPUdi () void GPUCommonMath::memcpy (void * dst, const void * src, size_t size)
192201{
@@ -230,7 +239,7 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y)
230239#endif // GPUCA_GPUCODE
231240}
232241
233- GPUdi () float GPUCommonMath::Modf (float x, float y) { return CHOICE (fmodf (x, y), fmodf (x, y), fmod (x, y)); }
242+ GPUdi () float GPUCommonMath::Modf (float x, float y) { return GPUCA_CHOICE (fmodf (x, y), fmodf (x, y), fmod (x, y)); }
234243
235244GPUdi () uint32_t GPUCommonMath::Float2UIntReint (const float & x)
236245{
@@ -243,42 +252,36 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
243252#endif
244253}
245254
246- GPUdi () uint32_t GPUCommonMath::Float2UIntRn (float x) { return (uint32_t )(int32_t )(x + 0 .5f ); }
247- GPUdi () float GPUCommonMath::Floor (float x) { return CHOICE (floorf (x), floorf (x), floor (x)); }
248-
249- GPUdi () bool GPUCommonMath::Finite (float x) { return CHOICE (std::isfinite (x), isfinite (x), isfinite (x)); }
250- GPUdi () bool GPUCommonMath::IsNaN (float x) { return CHOICE (std::isnan (x), isnan (x), isnan (x)); }
251- GPUdi () float GPUCommonMath::QuietNaN () { return CHOICE (std::numeric_limits<float >::quiet_NaN (), __builtin_nanf (" " ), nan (0u )); }
252255#ifdef GPUCA_NO_FAST_MATH
253- GPUdi () float GPUCommonMath::Round(float x) { return CHOICE (roundf (x), roundf (x), round (x)); }
256+ GPUdi () float GPUCommonMath::Round(float x) { return GPUCA_CHOICE (roundf (x), roundf (x), round (x)); }
254257GPUdi () int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t )Round (x); }
255- GPUhdi () float GPUCommonMath::Sqrt(float x) { return CHOICE (sqrtf (x), (float )sqrt ((double )x), sqrt (x)); }
256- GPUdi () float GPUCommonMath::ATan(float x) { return CHOICE ((float )atan ((double )x), (float )atan ((double )x), atan (x)); }
257- GPUhdi () float GPUCommonMath::ATan2(float y, float x) { return CHOICE ((float )atan2 ((double )y, (double )x), (float )atan2 ((double )y, (double )x), atan2 (y, x)); }
258- GPUdi () float GPUCommonMath::Sin(float x) { return CHOICE ((float )sin ((double )x), (float )sin ((double )x), sin (x)); }
259- GPUdi () float GPUCommonMath::Cos(float x) { return CHOICE ((float )cos ((double )x), (float )cos ((double )x), cos (x)); }
260- GPUdi () float GPUCommonMath::Tan(float x) { return CHOICE ((float )tanf ((double )x), (float )tanf ((double )x), tan (x)); }
261- GPUdi () float GPUCommonMath::Pow(float x, float y) { return CHOICE ((float )pow ((double )x, (double )y), pow ((double )x, (double )y), pow (x, y)); }
262- GPUdi () float GPUCommonMath::ASin(float x) { return CHOICE ((float )asin ((double )x), (float )asin ((double )x), asin (x)); }
263- GPUdi () float GPUCommonMath::ACos(float x) { return CHOICE ((float )acos ((double )x), (float )acos ((double )x), acos (x)); }
264- GPUdi () float GPUCommonMath::Log(float x) { return CHOICE ((float )log ((double )x), (float )log ((double )x), log (x)); }
265- GPUdi () float GPUCommonMath::Exp(float x) { return CHOICE ((float )exp ((double )x), (float )exp ((double )x), exp (x)); }
258+ GPUhdi () float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE (sqrtf (x), (float )sqrt ((double )x), sqrt (x)); }
259+ GPUdi () float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE ((float )atan ((double )x), (float )atan ((double )x), atan (x)); }
260+ GPUhdi () float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE ((float )atan2 ((double )y, (double )x), (float )atan2 ((double )y, (double )x), atan2 (y, x)); }
261+ GPUdi () float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE ((float )sin ((double )x), (float )sin ((double )x), sin (x)); }
262+ GPUdi () float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE ((float )cos ((double )x), (float )cos ((double )x), cos (x)); }
263+ GPUdi () float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE ((float )tanf ((double )x), (float )tanf ((double )x), tan (x)); }
264+ GPUdi () float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE ((float )pow ((double )x, (double )y), pow ((double )x, (double )y), pow (x, y)); }
265+ GPUdi () float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE ((float )asin ((double )x), (float )asin ((double )x), asin (x)); }
266+ GPUdi () float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE ((float )acos ((double )x), (float )acos ((double )x), acos (x)); }
267+ GPUdi () float GPUCommonMath::Log(float x) { return GPUCA_CHOICE ((float )log ((double )x), (float )log ((double )x), log (x)); }
268+ GPUdi () float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE ((float )exp ((double )x), (float )exp ((double )x), exp (x)); }
266269GPUdi () bool GPUCommonMath::FiniteRelaxed(float x) { return Finite (x); }
267270GPUdi () bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN (x); }
268271#else
269- GPUdi () float GPUCommonMath::Round(float x) { return CHOICE (roundf (x), rintf (x), rint (x)); }
270- GPUdi () int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE ((int32_t )Round (x), __float2int_rn (x), (int32_t )Round (x)); }
271- GPUhdi () float GPUCommonMath::Sqrt(float x) { return CHOICE (sqrtf (x), sqrtf (x), sqrt (x)); }
272- GPUdi () float GPUCommonMath::ATan(float x) { return CHOICE (atanf (x), atanf (x), atan (x)); }
273- GPUhdi () float GPUCommonMath::ATan2(float y, float x) { return CHOICE (atan2f (y, x), atan2f (y, x), atan2 (y, x)); }
274- GPUdi () float GPUCommonMath::Sin(float x) { return CHOICE (sinf (x), sinf (x), sin (x)); }
275- GPUdi () float GPUCommonMath::Cos(float x) { return CHOICE (cosf (x), cosf (x), cos (x)); }
276- GPUdi () float GPUCommonMath::Tan(float x) { return CHOICE (tanf (x), tanf (x), tan (x)); }
277- GPUdi () float GPUCommonMath::Pow(float x, float y) { return CHOICE (powf (x, y), powf (x, y), pow (x, y)); }
278- GPUdi () float GPUCommonMath::ASin(float x) { return CHOICE (asinf (x), asinf (x), asin (x)); }
279- GPUdi () float GPUCommonMath::ACos(float x) { return CHOICE (acosf (x), acosf (x), acos (x)); }
280- GPUdi () float GPUCommonMath::Log(float x) { return CHOICE (logf (x), logf (x), log (x)); }
281- GPUdi () float GPUCommonMath::Exp(float x) { return CHOICE (expf (x), expf (x), exp (x)); }
272+ GPUdi () float GPUCommonMath::Round(float x) { return GPUCA_CHOICE (roundf (x), rintf (x), rint (x)); }
273+ GPUdi () int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE ((int32_t )Round (x), __float2int_rn (x), (int32_t )Round (x)); }
274+ GPUhdi () float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE (sqrtf (x), sqrtf (x), sqrt (x)); }
275+ GPUdi () float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE (atanf (x), atanf (x), atan (x)); }
276+ GPUhdi () float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE (atan2f (y, x), atan2f (y, x), atan2 (y, x)); }
277+ GPUdi () float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE (sinf (x), sinf (x), sin (x)); }
278+ GPUdi () float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE (cosf (x), cosf (x), cos (x)); }
279+ GPUdi () float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE (tanf (x), tanf (x), tan (x)); }
280+ GPUdi () float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE (powf (x, y), powf (x, y), pow (x, y)); }
281+ GPUdi () float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE (asinf (x), asinf (x), asin (x)); }
282+ GPUdi () float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE (acosf (x), acosf (x), acos (x)); }
283+ GPUdi () float GPUCommonMath::Log(float x) { return GPUCA_CHOICE (logf (x), logf (x), log (x)); }
284+ GPUdi () float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE (expf (x), expf (x), exp (x)); }
282285GPUdi () bool GPUCommonMath::FiniteRelaxed(float x) { return true ; }
283286GPUdi () bool GPUCommonMath::IsNaNRelaxed(float x) { return false ; }
284287#endif
@@ -293,7 +296,7 @@ GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)
293296#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
294297 sincosf (x, &s, &c);
295298#else
296- CHOICE ((void )((s = sinf (x)) + (c = cosf (x))), sincosf (x, &s, &c), s = sincos (x, &c));
299+ GPUCA_CHOICE ((void )((s = sinf (x)) + (c = cosf (x))), sincosf (x, &s, &c), s = sincos (x, &c));
297300#endif
298301}
299302
@@ -304,14 +307,14 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c)
304307#elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE))
305308 sincos (x, &s, &c);
306309#else
307- CHOICE ((void )((s = sin (x)) + (c = cos (x))), sincos (x, &s, &c), s = sincos (x, &c));
310+ GPUCA_CHOICE ((void )((s = sin (x)) + (c = cos (x))), sincos (x, &s, &c), s = sincos (x, &c));
308311#endif
309312}
310313
311314GPUdi () uint32_t GPUCommonMath::Clz (uint32_t x)
312315{
313316#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__))
314- return x == 0 ? 32 : CHOICE (__builtin_clz (x), __clz (x), __builtin_clz (x)); // use builtin if available
317+ return x == 0 ? 32 : GPUCA_CHOICE (__builtin_clz (x), __clz (x), __builtin_clz (x)); // use builtin if available
315318#else
316319 for (int32_t i = 31 ; i >= 0 ; i--) {
317320 if (x & (1u << i)) {
@@ -326,53 +329,24 @@ GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x)
326329{
327330#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed
328331 // use builtin if available
329- return CHOICE (__builtin_popcount (x), __popc (x), __builtin_popcount (x));
332+ return GPUCA_CHOICE (__builtin_popcount (x), __popc (x), __builtin_popcount (x));
330333#else
331334 x = x - ((x >> 1 ) & 0x55555555 );
332335 x = (x & 0x33333333 ) + ((x >> 2 ) & 0x33333333 );
333336 return (((x + (x >> 4 )) & 0x0F0F0F0F ) * 0x01010101 ) >> 24 ;
334337#endif
335338}
336339
337- GPUhdi () float GPUCommonMath::Hypot (float x, float y)
338- {
339- return Sqrt (x * x + y * y);
340- }
341-
342- GPUhdi () float GPUCommonMath::Hypot (float x, float y, float z)
343- {
344- return Sqrt (x * x + y * y + z * z);
345- }
346-
347- GPUhdi () float GPUCommonMath::Hypot (float x, float y, float z, float w)
348- {
349- return Sqrt (x * x + y * y + z * z + w * w);
350- }
351-
352340template <typename T>
353- GPUd () void _swap (T& a, T& b)
341+ GPUhdi () void GPUCommonMath::Swap (T& a, T& b)
354342{
343+ #ifndef GPUCA_GPUCODE_DEVICE
344+ std::swap (a, b);
345+ #else
355346 T tmp = a;
356347 a = b;
357348 b = tmp;
358- }
359-
360- template <typename T>
361- GPUhdi () void GPUCommonMath::Swap (T& a, T& b)
362- {
363- CHOICE (std::swap (a, b), _swap<T>(a, b), _swap<T>(a, b));
364- }
365-
366- template <class T >
367- GPUhdi () T GPUCommonMath::Min (const T x, const T y)
368- {
369- return CHOICE (std::min (x, y), min (x, y), min (x, y));
370- }
371-
372- template <class T >
373- GPUhdi () T GPUCommonMath::Max (const T x, const T y)
374- {
375- return CHOICE (std::max (x, y), max (x, y), max (x, y));
349+ #endif
376350}
377351
378352template <class T , class S , class R >
@@ -441,32 +415,19 @@ GPUdi() float GPUCommonMath::InvSqrt(float _x)
441415template <>
442416GPUhdi () float GPUCommonMath::Abs<float >(float x)
443417{
444- return CHOICE (fabsf (x), fabsf (x), fabs (x));
418+ return GPUCA_CHOICE (fabsf (x), fabsf (x), fabs (x));
445419}
446420
447- #if !defined(__OPENCL__) || defined(cl_khr_fp64)
448421template <>
449422GPUhdi () double GPUCommonMath::Abs<double >(double x)
450423{
451- return CHOICE (fabs (x), fabs (x), fabs (x));
424+ return GPUCA_CHOICE (fabs (x), fabs (x), fabs (x));
452425}
453- #endif
454426
455427template <>
456428GPUhdi () int32_t GPUCommonMath::Abs<int32_t >(int32_t x)
457429{
458- return CHOICE (abs (x), abs (x), abs (x));
459- }
460-
461- GPUhdi () float GPUCommonMath::Copysign (float x, float y)
462- {
463- #if defined(__OPENCL__)
464- return copysign (x, y);
465- #elif defined(GPUCA_GPUCODE) && !defined(__OPENCL__)
466- return copysignf (x, y);
467- #else
468- return std::copysignf (x, y);
469- #endif // GPUCA_GPUCODE
430+ return GPUCA_CHOICE (abs (x), abs (x), abs (x));
470431}
471432
472433template <class S , class T >
@@ -579,7 +540,7 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt
579540}
580541#endif
581542
582- #undef CHOICE
543+ #undef GPUCA_CHOICE
583544
584545} // namespace gpu
585546} // namespace o2
0 commit comments