Refactor float_to_float16_untyped_slow for clarity and optimization (#330)

rfsaliev · web-flow · commit 80e4a6c7ed71 · 2026-05-14T16:03:05.000Z
This pull request refactors the `float_to_float16_untyped_slow` function in `float16.h` to improve code clarity and maintainability. The logic is now expressed with clear conditional branches instead of a single complex expression, making it easier to understand and maintain while preserving the original functionality. > Note: Change in PR is inspired by #324 made by @mnorris11 **Refactoring and code clarity:** * Replaced the single complex return statement in `float_to_float16_untyped_slow` with explicit conditional branches for handling saturation, normalization, denormalization, and sign extraction, improving readability and maintainability. * Refactored code is clear and simple. Modern C++ compilers generate well-optimized assembly for the new code - better than previous. See QuickBench [results](https://quick-bench.com/q/AUjkLSDctpsAf6tKavxwDzxrFsM) for benchmark code: [float_to_float16_bench.cpp](https://github.com/user-attachments/files/27627387/float_to_float16_bench.cpp)
diff --git a/include/svs/lib/float16.h b/include/svs/lib/float16.h
@@ -56,12 +56,26 @@ inline uint16_t float_to_float16_untyped_slow(const float x) {
     const uint32_t b = bitcast_float_to_uint32(x) + 0x00001000;
     const uint32_t e = (b & 0x7F800000) >> 23; // exponent
     const uint32_t m = b & 0x007FFFFF;         // mantissa
-    return (b & 0x80000000) >> 16 |
-           static_cast<uint32_t>(e > 112) * ((((e - 112) << 10) & 0x7C00) | m >> 13) |
-           static_cast<uint32_t>((e < 113) && (e > 101)) *
-               ((((0x007FF000 + m) >> (125 - e)) + 1) >> 1) |
-           static_cast<uint32_t>(e > 143) *
-               0x7FFF; // sign : normalized : denormalized : saturate
+
+    // Code below is clear and simple, so modern C++ compilers will optimize it pretty well.
+    const uint32_t sign = static_cast<uint32_t>((b & 0x80000000) >> 16);
+    if (e > 143) {
+        return static_cast<uint16_t>(sign | 0x7FFF); // saturate
+    }
+
+    if (e > 112) {
+        const uint32_t normalized = (((e - 112) << 10) & 0x7C00) | (m >> 13);
+        return static_cast<uint16_t>(sign | normalized);
+    }
+
+    if (e > 101) {
+        // Safe: for e in [102, 112], shift is in [13, 23].
+        const uint32_t shift = 125 - e;
+        const uint32_t denormalized = ((((0x007FF000 + m) >> shift) + 1) >> 1);
+        return static_cast<uint16_t>(sign | denormalized);
+    }
+
+    return static_cast<uint16_t>(sign);
 }
 
 inline float float16_to_float_untyped(const uint16_t x) {