Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ add_benchmark(shuffle src/shuffle.cpp)
add_benchmark(std_copy src/std_copy.cpp)
add_benchmark(sv_equal src/sv_equal.cpp)
add_benchmark(swap_ranges src/swap_ranges.cpp)
add_benchmark(uninitialized_copy src/uninitialized_copy.cpp)
add_benchmark(unique src/unique.cpp)
add_benchmark(vector_bool_copy src/vector_bool_copy.cpp)
add_benchmark(vector_bool_copy_n src/vector_bool_copy_n.cpp)
Expand Down
56 changes: 56 additions & 0 deletions benchmarks/src/uninitialized_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <benchmark/benchmark.h>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>

#include "skewed_allocator.hpp"

using namespace std;

template <size_t N, class T, template <class> class Padder>
void bm_uninitialized_copy(benchmark::State& state) {
Padder<T[N]> padded_a;
auto a = &padded_a.value[0];
memset(a, 'a', sizeof(T) * N);
Comment thread
StephanTLavavej marked this conversation as resolved.
Padder<T[N]> padded_b;
auto b = &padded_b.value[0];
memset(b, 'b', sizeof(T) * N);

for (auto _ : state) {
benchmark::DoNotOptimize(a);
uninitialized_copy(a, a + N, b);
benchmark::DoNotOptimize(b);
}
}

BENCHMARK(bm_uninitialized_copy<1, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<5, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<15, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<26, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<32, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<38, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<60, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<64, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<125, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<800, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<3000, uint8_t, highly_aligned>);
BENCHMARK(bm_uninitialized_copy<9000, uint8_t, highly_aligned>);

BENCHMARK(bm_uninitialized_copy<1, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<5, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<15, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<26, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<32, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<38, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<60, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<64, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<125, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<800, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<3000, uint8_t, not_highly_aligned>);
BENCHMARK(bm_uninitialized_copy<9000, uint8_t, not_highly_aligned>);

BENCHMARK_MAIN();
8 changes: 4 additions & 4 deletions stl/inc/memory
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ _NoThrowFwdIt uninitialized_copy_n(const _InIt _First, const _Diff _Count_raw, _
auto _UFirst = _STD _Get_unwrapped_n(_First, _Count);
auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count);
if constexpr (_Iter_copy_cat<decltype(_UFirst), decltype(_UDest)>::_Bitcopy_constructible) {
_UDest = _STD _Copy_memmove_n(_UFirst, static_cast<size_t>(_Count), _UDest);
_UDest = _STD _Copy_memcpy_n(_UFirst, static_cast<size_t>(_Count), _UDest);
} else {
_Uninitialized_backout<decltype(_UDest)> _Backout{_UDest};

Expand Down Expand Up @@ -295,7 +295,7 @@ pair<_InIt, _NoThrowFwdIt> uninitialized_move_n(_InIt _First, const _Diff _Count
auto _UFirst = _STD _Get_unwrapped_n(_First, _Count);
auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count);
if constexpr (_Iter_move_cat<decltype(_UFirst), decltype(_UDest)>::_Bitcopy_constructible) {
_UDest = _STD _Copy_memmove_n(_UFirst, static_cast<size_t>(_Count), _UDest);
_UDest = _STD _Copy_memcpy_n(_UFirst, static_cast<size_t>(_Count), _UDest);
_UFirst += _Count;
} else {
_Uninitialized_backout<decltype(_UDest)> _Backout{_UDest};
Expand Down Expand Up @@ -2286,7 +2286,7 @@ template <class _Ty, size_t _Size>
void _Uninitialized_copy_multidimensional(const _Ty (&_In)[_Size], _Ty (&_Out)[_Size]) {
using _Item = remove_all_extents_t<_Ty>;
if constexpr (conjunction_v<is_trivially_copy_constructible<_Item>, is_trivially_destructible<_Item>>) {
_STD _Copy_memmove_n(_In, _Size, _Out);
_STD _Copy_memcpy_n(_In, _Size, _Out);
} else if constexpr (is_array_v<_Ty>) {
_Reverse_destroy_multidimensional_n_guard<_Ty> _Guard{_Out, 0};
for (size_t& _Idx = _Guard._Index; _Idx < _Size; ++_Idx) {
Expand Down Expand Up @@ -2651,7 +2651,7 @@ void _Uninitialized_copy_multidimensional_al(const _Ty (&_In)[_Size], _Ty (&_Out
using _Item = remove_all_extents_t<_Ty>;
if constexpr (conjunction_v<is_trivially_copy_constructible<_Item>, is_trivially_destructible<_Item>,
_Uses_default_construct<_Alloc, _Item*, const _Item&>>) {
_STD _Copy_memmove_n(_In, _Size, _Out);
_STD _Copy_memcpy_n(_In, _Size, _Out);
} else if constexpr (is_array_v<_Ty>) {
_Reverse_destroy_multidimensional_n_al_guard<_Ty, _Alloc> _Guard{_Out, 0, _Al};
for (size_t& _Idx = _Guard._Index; _Idx < _Size; ++_Idx) {
Expand Down
22 changes: 11 additions & 11 deletions stl/inc/xmemory
Original file line number Diff line number Diff line change
Expand Up @@ -1688,7 +1688,7 @@ _NoThrowFwdIt _Uninitialized_move_unchecked(_InIt _First, const _InIt _Last, _No
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX26
{
return _STD _Copy_memmove(_First, _Last, _Dest);
return _STD _Copy_memcpy(_First, _Last, _Dest);
}
}
_Uninitialized_backout<_NoThrowFwdIt> _Backout{_Dest};
Expand Down Expand Up @@ -1901,20 +1901,20 @@ _CONSTEXPR20 _Alloc_ptr_t<_Alloc> _Uninitialized_copy(
auto _ULast = _STD _Get_unwrapped(_STD move(_Last));
#endif // ^^^ !_HAS_CXX20 ^^^

constexpr bool _Can_memmove = _Sent_copy_cat<decltype(_UFirst), decltype(_ULast), _Ptrval>::_Bitcopy_constructible
&& _Uses_default_construct<_Alloc, _Ptrval, decltype(*_UFirst)>::value;
constexpr bool _Can_memcpy = _Sent_copy_cat<decltype(_UFirst), decltype(_ULast), _Ptrval>::_Bitcopy_constructible
&& _Uses_default_construct<_Alloc, _Ptrval, decltype(*_UFirst)>::value;

if constexpr (_Can_memmove) {
if constexpr (_Can_memcpy) {
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
if constexpr (is_same_v<decltype(_UFirst), decltype(_ULast)>) {
_STD _Copy_memmove(_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _Unfancy(_Dest));
_STD _Copy_memcpy(_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _Unfancy(_Dest));
_Dest += _ULast - _UFirst;
} else {
const auto _Count = static_cast<size_t>(_STD _Contiguous_iter_distance(_UFirst, _ULast));
_STD _Copy_memmove_n(_STD _To_address(_UFirst), _Count, _STD _Unfancy(_Dest));
_STD _Copy_memcpy_n(_STD _To_address(_UFirst), _Count, _STD _Unfancy(_Dest));
_Dest += _Count;
}
return _Dest;
Expand Down Expand Up @@ -1942,16 +1942,16 @@ _CONSTEXPR20 _Alloc_ptr_t<_Alloc> _Uninitialized_copy_n(
auto _UFirst = _STD _Get_unwrapped(_STD move(_First));
#endif // ^^^ No checking ^^^

constexpr bool _Can_memmove =
constexpr bool _Can_memcpy =
conjunction_v<bool_constant<_Iter_copy_cat<decltype(_UFirst), _Ptrval>::_Bitcopy_constructible>,
_Uses_default_construct<_Alloc, _Ptrval, decltype(*_UFirst)>>;

if constexpr (_Can_memmove) {
if constexpr (_Can_memcpy) {
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
_STD _Copy_memmove_n(_UFirst, _Count, _STD _Unfancy(_Dest));
_STD _Copy_memcpy_n(_UFirst, _Count, _STD _Unfancy(_Dest));
_Dest += _Count;
return _Dest;
}
Expand All @@ -1973,7 +1973,7 @@ _NoThrowFwdIt _Uninitialized_copy_unchecked(_InIt _First, const _InIt _Last, _No
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX26
{
return _STD _Copy_memmove(_First, _Last, _Dest);
return _STD _Copy_memcpy(_First, _Last, _Dest);
}
}

Expand Down Expand Up @@ -2013,7 +2013,7 @@ _CONSTEXPR20 _Alloc_ptr_t<_Alloc> _Uninitialized_move(
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
_STD _Copy_memmove(_UFirst, _ULast, _STD _Unfancy(_Dest));
_STD _Copy_memcpy(_UFirst, _ULast, _STD _Unfancy(_Dest));
return _Dest + (_ULast - _UFirst);
}
}
Expand Down
44 changes: 35 additions & 9 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -4937,14 +4937,20 @@ _CONSTEXPR20 void _Verify_ranges_do_not_overlap(const _Iter1& _First1, const _Se
#endif // _ITERATOR_DEBUG_LEVEL != 2 ^^^
}

template <class _OutCtgIt>
_OutCtgIt _Copy_memmove_tail(
template <bool _Use_memcpy, class _OutCtgIt>
_OutCtgIt _Impl_copy_memmeow_tail(
const char* const _First_ch, const _OutCtgIt _Dest, const size_t _Byte_count, const size_t _Object_count) {
_STL_INTERNAL_CHECK(_Byte_count == _Object_count * sizeof(*_Dest));
// (pre-verified contiguous iterator)
const auto _Dest_ptr = _STD _To_address(_Dest);
const auto _Dest_ch = const_cast<char*>(reinterpret_cast<const volatile char*>(_Dest_ptr));
_CSTD memmove(_Dest_ch, _First_ch, _Byte_count);

if constexpr (_Use_memcpy) {
_CSTD memcpy(_Dest_ch, _First_ch, _Byte_count);
} else {
_CSTD memmove(_Dest_ch, _First_ch, _Byte_count);
}

if constexpr (is_pointer_v<_OutCtgIt>) {
(void) _Object_count;
// CodeQL [SM02986] This cast is correct: we're bypassing pointer arithmetic for performance.
Expand All @@ -4954,8 +4960,8 @@ _OutCtgIt _Copy_memmove_tail(
}
}

template <class _CtgIt, class _OutCtgIt>
_OutCtgIt _Copy_memmove(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
template <bool _Use_memcpy, class _CtgIt, class _OutCtgIt>
_OutCtgIt _Impl_copy_memmeow(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
_STL_INTERNAL_CHECK(_First <= _Last);
const auto _First_ptr = _STD _To_address(_First);
const auto _Last_ptr = _STD _To_address(_Last);
Expand All @@ -4965,17 +4971,37 @@ _OutCtgIt _Copy_memmove(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
const auto _Last_ch = const_cast<const char*>(reinterpret_cast<const volatile char*>(_Last_ptr));
const auto _Byte_count = static_cast<size_t>(_Last_ch - _First_ch);
_STD _Contiguous_iter_verify(_Dest, static_cast<_Iter_diff_t<_OutCtgIt>>(_Ptr_diff));
return _STD _Copy_memmove_tail(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
return _STD _Impl_copy_memmeow_tail<_Use_memcpy>(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
}

template <class _CtgIt, class _OutCtgIt>
_OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _Dest) {
template <bool _Use_memcpy, class _CtgIt, class _OutCtgIt>
_OutCtgIt _Impl_copy_memmeow_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _Dest) {
_STD _Contiguous_iter_verify(_First, static_cast<_Iter_diff_t<_CtgIt>>(_Object_count));
_STD _Contiguous_iter_verify(_Dest, static_cast<_Iter_diff_t<_OutCtgIt>>(_Object_count));
const auto _First_ptr = _STD _To_address(_First);
const auto _First_ch = const_cast<const char*>(reinterpret_cast<const volatile char*>(_First_ptr));
const auto _Byte_count = _Object_count * sizeof(*_First_ptr);
return _STD _Copy_memmove_tail(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
return _STD _Impl_copy_memmeow_tail<_Use_memcpy>(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
}

template <class _CtgIt, class _OutCtgIt>
_OutCtgIt _Copy_memcpy(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
return _STD _Impl_copy_memmeow<true>(_First, _Last, _Dest);
}

template <class _CtgIt, class _OutCtgIt>
_OutCtgIt _Copy_memcpy_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _Dest) {
return _STD _Impl_copy_memmeow_n<true>(_First, _Object_count, _Dest);
}

template <class _CtgIt, class _OutCtgIt>
_OutCtgIt _Copy_memmove(_CtgIt _First, _CtgIt _Last, _OutCtgIt _Dest) {
return _STD _Impl_copy_memmeow<false>(_First, _Last, _Dest);
}

template <class _CtgIt, class _OutCtgIt>
_OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _Dest) {
return _STD _Impl_copy_memmeow_n<false>(_First, _Object_count, _Dest);
}

template <class _Fn>
Expand Down