Skip to content
26 changes: 26 additions & 0 deletions benchmarks/src/integer_to_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <algorithm>
#include <array>
#include <benchmark/benchmark.h>
#include <charconv>
#include <cmath>
#include <cstdint>
#include <limits>
Expand Down Expand Up @@ -38,6 +39,26 @@ auto generate_array() {
return a;
}

template <class T, double M, double S>
void integer_to_chars(benchmark::State& state) {
auto a = generate_array<T, M, S>();
char buff[20]; // can hold -2^63 and 2^64 - 1

auto it = a.begin();
for (auto _ : state) {
auto i = *it;
benchmark::DoNotOptimize(i);
auto s = to_chars(begin(buff), end(buff), i);
benchmark::DoNotOptimize(s.ec);
benchmark::DoNotOptimize(s.ptr);

++it;
if (it == a.end()) {
it = a.begin();
}
}
}

template <class CharT, class T, double M, double S>
void internal_integer_to_buff(benchmark::State& state) {
auto a = generate_array<T, M, S>();
Expand Down Expand Up @@ -77,6 +98,11 @@ void integer_to_string(benchmark::State& state) {
}
}

BENCHMARK(integer_to_chars<uint8_t, 2.5, 1.5>);
BENCHMARK(integer_to_chars<uint16_t, 5.0, 3.0>);
BENCHMARK(integer_to_chars<uint32_t, 10.0, 6.0>);
BENCHMARK(integer_to_chars<uint64_t, 20.0, 12.0>);

BENCHMARK(internal_integer_to_buff<char, uint8_t, 2.5, 1.5>);
BENCHMARK(internal_integer_to_buff<char, uint16_t, 5.0, 3.0>);
BENCHMARK(internal_integer_to_buff<char, uint32_t, 10.0, 6.0>);
Expand Down
68 changes: 50 additions & 18 deletions stl/inc/charconv
Original file line number Diff line number Diff line change
Expand Up @@ -64,31 +64,63 @@ _NODISCARD _CONSTEXPR23 to_chars_result _Integer_to_chars(
switch (_Base) {
case 10:
{ // Derived from _UIntegral_to_buff()
// Performance note: Ryu's digit table should be faster here.
constexpr bool _Use_chunks = sizeof(_Unsigned) > sizeof(size_t);

if constexpr (_Use_chunks) { // For 64-bit numbers on 32-bit platforms, work in chunks to avoid 64-bit
// divisions.
while (_Value > 0xFFFF'FFFFU) {
// Performance note: Ryu's division workaround would be faster here.
unsigned long _Chunk = static_cast<unsigned long>(_Value % 1'000'000'000);
_Value = static_cast<_Unsigned>(_Value / 1'000'000'000);

for (int _Idx = 0; _Idx != 9; ++_Idx) {
*--_RNext = static_cast<char>('0' + _Chunk % 10);
_Chunk /= 10;
#ifdef _WIN64
auto _Trunc = _Value;
#else // ^^^ 64-bit / 32-bit vvv
if constexpr (sizeof(_Unsigned) > 4) { // For 64-bit numbers, work in chunks to avoid 64-bit divisions.
while (_Value > 0xFFFFFFFFU) {
auto _Value_chunk = static_cast<unsigned long>(_Value % 100000000);
_Value /= 100000000;

for (int _Idx = 0; _Idx != 3; ++_Idx) {
const unsigned long _Value_chunk_part = _Value_chunk % 100;
_Value_chunk /= 100;
_RNext -= 2;
if (!_STD _Is_constant_evaluated()) {
_CSTD memcpy(_RNext, _Digit_pairs<char>._Data[_Value_chunk_part], 2);
} else {
_RNext[0] = _Digit_pairs<char>._Data[_Value_chunk_part][0];
_RNext[1] = _Digit_pairs<char>._Data[_Value_chunk_part][1];
}
}

_RNext -= 2;
if (!_STD _Is_constant_evaluated()) {
_CSTD memcpy(_RNext, _Digit_pairs<char>._Data[_Value_chunk], 2);
} else {
_RNext[0] = _Digit_pairs<char>._Data[_Value_chunk][0];
_RNext[1] = _Digit_pairs<char>._Data[_Value_chunk][1];
}
}
}

using _Truncated = conditional_t<_Use_chunks, unsigned long, _Unsigned>;
auto _Trunc = static_cast<unsigned long>(_Value);
#endif // ^^^ 32-bit ^^^

_Truncated _Trunc = static_cast<_Truncated>(_Value);
// If we have a single digit, print [0, 9] and return. (This is necessary to correctly handle 0.)
if (_Trunc < 10) {
*--_RNext = static_cast<char>('0' + _Trunc);
break;
}

// Print one or more pairs of digits.
do {
*--_RNext = static_cast<char>('0' + _Trunc % 10);
_Trunc /= 10;
} while (_Trunc != 0);
const auto _Trunc_part = static_cast<unsigned long>(_Trunc % 100);
_Trunc /= 100;
_RNext -= 2;
if (!_STD _Is_constant_evaluated()) {
_CSTD memcpy(_RNext, _Digit_pairs<char>._Data[_Trunc_part], 2);
} else {
_RNext[0] = _Digit_pairs<char>._Data[_Trunc_part][0];
_RNext[1] = _Digit_pairs<char>._Data[_Trunc_part][1];
}
} while (_Trunc >= 10);

// If we have an unpaired digit, print it.
// For example, 1729 is printed as 17 29, and 19937 is printed as 1 99 37.
if (_Trunc != 0) {
*--_RNext = static_cast<char>('0' + _Trunc);
}
break;
}

Expand Down
20 changes: 1 addition & 19 deletions stl/inc/xmemory
Original file line number Diff line number Diff line change
Expand Up @@ -2775,24 +2775,6 @@ template <class>
constexpr bool _Has_guaranteed_append_range = false; // N5032 [sequence.reqmts]/112; used by flat_(multi)set::insert.
#endif // _HAS_CXX23

template <class _Elem>
struct _Digit_pair_table {
_Elem _Data[100][2];

constexpr explicit _Digit_pair_table() : _Data{} {
for (int _Idx = 0; _Idx != 100; ++_Idx) {
_Data[_Idx][0] = static_cast<_Elem>('0' + _Idx / 10);
_Data[_Idx][1] = static_cast<_Elem>('0' + _Idx % 10);
}
}

_Digit_pair_table(const _Digit_pair_table&) = delete;
_Digit_pair_table& operator=(const _Digit_pair_table&) = delete;
};

template <class _Elem>
constexpr _Digit_pair_table<_Elem> _Digit_pairs{};

template <class _Elem, class _UTy>
_NODISCARD _Elem* _UIntegral_to_buff(_Elem* _RNext, _UTy _UVal) { // used by both to_string and thread::id output
// format _UVal into buffer *ending at* _RNext
Expand Down Expand Up @@ -2830,7 +2812,7 @@ _NODISCARD _Elem* _UIntegral_to_buff(_Elem* _RNext, _UTy _UVal) { // used by bot

// Print one or more pairs of digits.
do {
const unsigned long _UVal_trunc_part = _UVal_trunc % 100;
const auto _UVal_trunc_part = static_cast<unsigned long>(_UVal_trunc % 100);
_UVal_trunc /= 100;
_RNext -= 2;
_CSTD memcpy(_RNext, _Digit_pairs<_Elem>._Data[_UVal_trunc_part], 2 * sizeof(_Elem));
Expand Down
18 changes: 18 additions & 0 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -7971,6 +7971,24 @@ constexpr bool _Equivalence_is_equality =
_Is_any_of_v<_Compare, _RANGES less, _RANGES greater>
|| (_Is_any_of_v<_Compare, less<>, less<_Key>, greater<>, greater<_Key>> && _Equivalence_is_equality_impl<_Key>);
#endif // _HAS_CXX23

template <class _Elem>
struct _Digit_pair_table {
_Elem _Data[100][2];

constexpr explicit _Digit_pair_table() : _Data{} {
for (int _Idx = 0; _Idx != 100; ++_Idx) {
_Data[_Idx][0] = static_cast<_Elem>('0' + _Idx / 10);
_Data[_Idx][1] = static_cast<_Elem>('0' + _Idx % 10);
}
}

_Digit_pair_table(const _Digit_pair_table&) = delete;
_Digit_pair_table& operator=(const _Digit_pair_table&) = delete;
};

template <class _Elem>
constexpr _Digit_pair_table<_Elem> _Digit_pairs{};
_STD_END

// TRANSITION, non-_Ugly attribute tokens
Expand Down