Skip to content

Commit cc6c05c

Browse files
committed
Adjusting for comments
1 parent bd3c8d1 commit cc6c05c

File tree

2 files changed

+16
-12
lines changed

2 files changed

+16
-12
lines changed

Common/ML/include/ML/3rdparty/GPUORTFloat16.h

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ union float32_bits {
275275
}; // namespace detail
276276

277277
template <class Derived>
278-
GPUd() inline constexpr uint16_t Float16Impl<Derived>::ToUint16Impl(float v) noexcept
278+
GPUdi() constexpr uint16_t Float16Impl<Derived>::ToUint16Impl(float v) noexcept
279279
{
280280
detail::float32_bits f{};
281281
f.f = v;
@@ -324,7 +324,7 @@ GPUd() inline constexpr uint16_t Float16Impl<Derived>::ToUint16Impl(float v) noe
324324
}
325325

326326
template <class Derived>
327-
GPUd() inline float Float16Impl<Derived>::ToFloatImpl() const noexcept
327+
GPUdi() float Float16Impl<Derived>::ToFloatImpl() const noexcept
328328
{
329329
constexpr detail::float32_bits magic = {113 << 23};
330330
constexpr unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
@@ -528,7 +528,7 @@ struct BFloat16Impl {
528528
};
529529

530530
template <class Derived>
531-
GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
531+
GPUdi() uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
532532
{
533533
uint16_t result;
534534
if (o2::gpu::CAMath::IsNaN(v)) {
@@ -537,7 +537,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
537537
auto get_msb_half = [](float fl) {
538538
uint16_t result;
539539
#ifdef GPUCA_GPUCODE
540-
result = 0;
540+
o2::gpu::CAMath::memcpy(&result, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
541541
#else
542542
#ifdef __cpp_if_constexpr
543543
if constexpr (detail::endian::native == detail::endian::little)
@@ -547,6 +547,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
547547
{
548548
std::memcpy(&result, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
549549
} else {
550+
static_assert(false, "ERROR!!!");
550551
std::memcpy(&result, &fl, sizeof(uint16_t));
551552
}
552553
return result;
@@ -566,17 +567,18 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
566567
}
567568

568569
template <class Derived>
569-
GPUd() inline float BFloat16Impl<Derived>::ToFloatImpl() const noexcept
570+
GPUdi() float BFloat16Impl<Derived>::ToFloatImpl() const noexcept
570571
{
571572
if (IsNaN()) {
572-
return std::numeric_limits<float>::quiet_NaN();
573+
return o2::gpu::CAMath::QuietNaN();
573574
}
574575
float result;
576+
char* const first = reinterpret_cast<char*>(&result);
577+
char* const second = first + sizeof(uint16_t);
575578
#ifdef GPUCA_GPUCODE
576-
result = 0; // Fixme: implement memcpy
579+
first[0] = first[1] = 0;
580+
o2::gpu::CAMath::memcpy(second, &val, sizeof(uint16_t));
577581
#else
578-
char* const first = reinterpret_cast<char*>(&result);
579-
char* const second = first + sizeof(uint16_t);
580582
#ifdef __cpp_if_constexpr
581583
if constexpr (detail::endian::native == detail::endian::little)
582584
#else
@@ -726,7 +728,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
726728
/// <summary>
727729
/// User defined conversion operator. Converts Float16_t to float.
728730
/// </summary>
729-
explicit operator float() const noexcept { return ToFloat(); }
731+
GPUdi() explicit operator float() const noexcept { return ToFloat(); }
730732

731733
using Base::operator==;
732734
using Base::operator!=;
@@ -867,7 +869,7 @@ GPUd() inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept
867869
/// <summary>
868870
/// User defined conversion operator. Converts BFloat16_t to float.
869871
/// </summary>
870-
explicit operator float() const noexcept { return ToFloat(); }
872+
GPUdi() explicit operator float() const noexcept { return ToFloat(); }
871873

872874
// We do not have an inherited impl for the below operators
873875
// as the internal class implements them a little differently

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
899899
clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity;
900900
}
901901

902+
int evalDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos;
903+
clustererNN.nnClusterizerDtype = evalDtype;
904+
902905
// Settings for the NN evaluation
903906
clustererNN.nnClassThreshold = nn_settings.nnClassThreshold;
904907
clustererNN.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold;
@@ -920,7 +923,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
920923
}
921924

922925
float time_clusterizer = 0, time_fill = 0;
923-
int evalDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos;
924926

925927
for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNN.nnClusterizerBatchedMode); batch++) {
926928
uint batchStart = batch * clustererNN.nnClusterizerBatchedMode;

0 commit comments

Comments
 (0)