Skip to content

Commit 80e4a6c

Browse files
authored
Refactor float_to_float16_untyped_slow for clarity and optimization (#330)
This pull request refactors the `float_to_float16_untyped_slow` function in `float16.h` to improve code clarity and maintainability. The logic is now expressed with clear conditional branches instead of a single complex expression, making it easier to understand and maintain while preserving the original functionality. > Note: Change in PR is inspired by #324 made by @mnorris11 **Refactoring and code clarity:** * Replaced the single complex return statement in `float_to_float16_untyped_slow` with explicit conditional branches for handling saturation, normalization, denormalization, and sign extraction, improving readability and maintainability. * Refactored code is clear and simple. Modern C++ compilers generate well-optimized assembly for the new code - better than previous. See QuickBench [results](https://quick-bench.com/q/AUjkLSDctpsAf6tKavxwDzxrFsM) for benchmark code: [float_to_float16_bench.cpp](https://github.com/user-attachments/files/27627387/float_to_float16_bench.cpp)
1 parent dc52d34 commit 80e4a6c

1 file changed

Lines changed: 20 additions & 6 deletions

File tree

include/svs/lib/float16.h

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,26 @@ inline uint16_t float_to_float16_untyped_slow(const float x) {
5656
const uint32_t b = bitcast_float_to_uint32(x) + 0x00001000;
5757
const uint32_t e = (b & 0x7F800000) >> 23; // exponent
5858
const uint32_t m = b & 0x007FFFFF; // mantissa
59-
return (b & 0x80000000) >> 16 |
60-
static_cast<uint32_t>(e > 112) * ((((e - 112) << 10) & 0x7C00) | m >> 13) |
61-
static_cast<uint32_t>((e < 113) && (e > 101)) *
62-
((((0x007FF000 + m) >> (125 - e)) + 1) >> 1) |
63-
static_cast<uint32_t>(e > 143) *
64-
0x7FFF; // sign : normalized : denormalized : saturate
59+
60+
// Code below is clear and simple, so modern C++ compilers will optimize it pretty well.
61+
const uint32_t sign = static_cast<uint32_t>((b & 0x80000000) >> 16);
62+
if (e > 143) {
63+
return static_cast<uint16_t>(sign | 0x7FFF); // saturate
64+
}
65+
66+
if (e > 112) {
67+
const uint32_t normalized = (((e - 112) << 10) & 0x7C00) | (m >> 13);
68+
return static_cast<uint16_t>(sign | normalized);
69+
}
70+
71+
if (e > 101) {
72+
// Safe: for e in [102, 112], shift is in [13, 23].
73+
const uint32_t shift = 125 - e;
74+
const uint32_t denormalized = ((((0x007FF000 + m) >> shift) + 1) >> 1);
75+
return static_cast<uint16_t>(sign | denormalized);
76+
}
77+
78+
return static_cast<uint16_t>(sign);
6579
}
6680

6781
inline float float16_to_float_untyped(const uint16_t x) {

0 commit comments

Comments
 (0)