@@ -275,7 +275,7 @@ union float32_bits {
275275}; // namespace detail
276276
277277template <class Derived >
278- GPUd () inline constexpr uint16_t Float16Impl<Derived>::ToUint16Impl(float v) noexcept
278+ GPUdi () constexpr uint16_t Float16Impl<Derived>::ToUint16Impl(float v) noexcept
279279{
280280 detail::float32_bits f{};
281281 f.f = v;
@@ -324,7 +324,7 @@ GPUd() inline constexpr uint16_t Float16Impl<Derived>::ToUint16Impl(float v) noe
324324}
325325
326326template <class Derived >
327- GPUd () inline float Float16Impl<Derived>::ToFloatImpl() const noexcept
327+ GPUdi () float Float16Impl<Derived>::ToFloatImpl() const noexcept
328328{
329329 constexpr detail::float32_bits magic = {113 << 23 };
330330 constexpr unsigned int shifted_exp = 0x7c00 << 13 ; // exponent mask after shift
@@ -528,7 +528,7 @@ struct BFloat16Impl {
528528};
529529
530530template <class Derived >
531- GPUd () inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
531+ GPUdi () uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
532532{
533533 uint16_t result;
534534 if (o2::gpu::CAMath::IsNaN (v)) {
@@ -537,7 +537,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
537537 auto get_msb_half = [](float fl) {
538538 uint16_t result;
539539#ifdef GPUCA_GPUCODE
540- result = 0 ;
540+ o2::gpu::CAMath::memcpy (& result, reinterpret_cast < char *>(&fl) + sizeof ( uint16_t ), sizeof ( uint16_t )) ;
541541#else
542542#ifdef __cpp_if_constexpr
543543 if constexpr (detail::endian::native == detail::endian::little)
@@ -547,6 +547,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
547547 {
548548 std::memcpy (&result, reinterpret_cast <char *>(&fl) + sizeof (uint16_t ), sizeof (uint16_t ));
549549 } else {
550+ static_assert (false , " ERROR!!!" );
550551 std::memcpy (&result, &fl, sizeof (uint16_t ));
551552 }
552553 return result;
@@ -566,17 +567,18 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
566567 }
567568
568569 template <class Derived >
569- GPUd () inline float BFloat16Impl<Derived>::ToFloatImpl () const noexcept
570+ GPUdi () float BFloat16Impl<Derived>::ToFloatImpl () const noexcept
570571 {
571572 if (IsNaN ()) {
572- return std::numeric_limits< float >:: quiet_NaN ();
573+ return o2::gpu::CAMath::QuietNaN ();
573574 }
574575 float result;
576+ char * const first = reinterpret_cast <char *>(&result);
577+ char * const second = first + sizeof (uint16_t );
575578#ifdef GPUCA_GPUCODE
576- result = 0 ; // Fixme: implement memcpy
579+ first[0 ] = first[1 ] = 0 ;
580+ o2::gpu::CAMath::memcpy (second, &val, sizeof (uint16_t ));
577581#else
578- char * const first = reinterpret_cast <char *>(&result);
579- char * const second = first + sizeof (uint16_t );
580582#ifdef __cpp_if_constexpr
581583 if constexpr (detail::endian::native == detail::endian::little)
582584#else
@@ -726,7 +728,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
726728 // / <summary>
727729 // / User defined conversion operator. Converts Float16_t to float.
728730 // / </summary>
729- explicit operator float () const noexcept { return ToFloat (); }
731+ GPUdi () explicit operator float () const noexcept { return ToFloat (); }
730732
731733 using Base::operator ==;
732734 using Base::operator !=;
@@ -867,7 +869,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
867869 // / <summary>
868870 // / User defined conversion operator. Converts BFloat16_t to float.
869871 // / </summary>
870- explicit operator float () const noexcept { return ToFloat (); }
872+ GPUdi () explicit operator float () const noexcept { return ToFloat (); }
871873
872874 // We do not have an inherited impl for the below operators
873875 // as the internal class implements them a little differently
0 commit comments