Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 20 additions & 31 deletions src/boxed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ use alloc::{alloc::Layout, string::String};
use core::{
mem::align_of,
ops::{Deref, DerefMut},
ptr::NonNull,
};

use crate::{ops::GenericString, MAX_INLINE};
use crate::TaggedPtr;

#[cfg(target_endian = "little")]
#[repr(C)]
pub(crate) struct BoxedString {
ptr: NonNull<u8>,
ptr: TaggedPtr,
cap: usize,
len: usize,
}
Expand All @@ -24,16 +24,7 @@ pub(crate) struct BoxedString {
pub(crate) struct BoxedString {
len: usize,
cap: usize,
ptr: NonNull<u8>,
}

/// Checks if a pointer is aligned to an even address (good)
/// or an odd address (either actually an InlineString or very, very bad).
///
/// Returns `true` if aligned to an odd address, `false` if even. The sense of
/// the boolean is "does this look like an InlineString? true/false"
fn check_alignment(ptr: *const u8) -> bool {
ptr.align_offset(2) > 0
ptr: TaggedPtr,
}

impl GenericString for BoxedString {
Expand All @@ -45,23 +36,19 @@ impl GenericString for BoxedString {
fn as_mut_capacity_slice(&mut self) -> &mut [u8] {
#[allow(unsafe_code)]
unsafe {
core::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.capacity())
core::slice::from_raw_parts_mut(self.ptr.as_non_null().as_ptr(), self.capacity())
}
}
}

impl BoxedString {
const MINIMAL_CAPACITY: usize = MAX_INLINE * 2;

pub(crate) fn check_alignment(this: &Self) -> bool {
check_alignment(this.ptr.as_ptr())
}

fn layout_for(cap: usize) -> Layout {
// Always request memory that is specifically aligned to at least 2, so
// the least significant bit is guaranteed to be 0.
// Always request memory that is specifically aligned to at least 4, so
// the least significant two bits are guaranteed to be 0.
let layout = Layout::array::<u8>(cap)
.and_then(|layout| layout.align_to(align_of::<u16>()))
.and_then(|layout| layout.align_to(align_of::<u32>()))
.unwrap();
assert!(
layout.size() <= isize::MAX as usize,
Expand All @@ -70,29 +57,29 @@ impl BoxedString {
layout
}

fn alloc(cap: usize) -> NonNull<u8> {
fn alloc(cap: usize) -> TaggedPtr {
let layout = Self::layout_for(cap);
#[allow(unsafe_code)]
let ptr = match NonNull::new(unsafe { alloc::alloc::alloc(layout) }) {
let ptr = match TaggedPtr::new(unsafe { alloc::alloc::alloc(layout) }) {
Some(ptr) => ptr,
None => alloc::alloc::handle_alloc_error(layout),
};
debug_assert!(ptr.as_ptr().align_offset(2) == 0);
debug_assert!(ptr.as_non_null().as_ptr().align_offset(4) == 0);
ptr
}

fn realloc(&mut self, cap: usize) {
let layout = Self::layout_for(cap);
let old_layout = Self::layout_for(self.cap);
let old_ptr = self.ptr.as_ptr();
let old_ptr = self.ptr.as_non_null().as_ptr();
#[allow(unsafe_code)]
let ptr = unsafe { alloc::alloc::realloc(old_ptr, old_layout, layout.size()) };
self.ptr = match NonNull::new(ptr) {
self.ptr = match TaggedPtr::new(ptr) {
Some(ptr) => ptr,
None => alloc::alloc::handle_alloc_error(layout),
};
self.cap = cap;
debug_assert!(self.ptr.as_ptr().align_offset(2) == 0);
debug_assert!(self.ptr.as_non_null().as_ptr().align_offset(4) == 0);
}

pub(crate) fn ensure_capacity(&mut self, target_cap: usize) {
Expand Down Expand Up @@ -132,7 +119,7 @@ impl Drop for BoxedString {
fn drop(&mut self) {
#[allow(unsafe_code)]
unsafe {
alloc::alloc::dealloc(self.ptr.as_ptr(), Self::layout_for(self.cap))
alloc::alloc::dealloc(self.ptr.as_non_null().as_ptr(), Self::layout_for(self.cap))
}
}
}
Expand All @@ -149,7 +136,7 @@ impl Deref for BoxedString {
fn deref(&self) -> &Self::Target {
#[allow(unsafe_code)]
unsafe {
core::str::from_utf8_unchecked(core::slice::from_raw_parts(self.ptr.as_ptr(), self.len))
core::str::from_utf8_unchecked(core::slice::from_raw_parts(self.ptr.as_non_null().as_ptr(), self.len))
}
}
}
Expand All @@ -159,7 +146,7 @@ impl DerefMut for BoxedString {
#[allow(unsafe_code)]
unsafe {
core::str::from_utf8_unchecked_mut(core::slice::from_raw_parts_mut(
self.ptr.as_ptr(),
self.ptr.as_non_null().as_ptr(),
self.len,
))
}
Expand All @@ -174,6 +161,8 @@ impl From<String> for BoxedString {
} else {
#[cfg(has_allocator)]
{
use core::ptr::NonNull;

// TODO: Use String::into_raw_parts when stabilised, meanwhile let's get unsafe
let len = s.len();
let cap = s.capacity();
Expand All @@ -190,7 +179,7 @@ impl From<String> for BoxedString {
Self {
cap,
len,
ptr: aligned_ptr.cast(),
ptr: TaggedPtr::new(aligned_ptr.as_ptr() as *mut _).unwrap(),
}
} else {
Self::from_str(cap, &s)
Expand All @@ -215,7 +204,7 @@ impl From<BoxedString> for String {
use alloc::alloc::Allocator;
let allocator = alloc::alloc::Global;
if let Ok(aligned_ptr) =
unsafe { allocator.grow(ptr, BoxedString::layout_for(cap), new_layout) }
unsafe { allocator.grow(ptr.as_non_null(), BoxedString::layout_for(cap), new_layout) }
{
core::mem::forget(s);
unsafe { String::from_raw_parts(aligned_ptr.as_ptr().cast(), len, cap) }
Expand Down
45 changes: 21 additions & 24 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ use core::{
hash::{Hash, Hasher},
iter::FromIterator,
marker::PhantomData,
mem::{forget, MaybeUninit},
mem::forget,
ops::{
Add, Deref, DerefMut, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull,
RangeInclusive, RangeTo, RangeToInclusive,
Expand All @@ -126,15 +126,17 @@ use core::{
str::FromStr,
};

#[cfg(feature = "std")]
use std::borrow::Cow;
use alloc::borrow::Cow;

mod config;
pub use config::{Compact, LazyCompact, SmartStringMode, MAX_INLINE};

mod marker_byte;
use marker_byte::Discriminant;

mod tagged_ptr;
use tagged_ptr::TaggedPtr;

mod inline;
use inline::InlineString;

Expand Down Expand Up @@ -189,7 +191,7 @@ pub mod alias {
/// one - not without also storing that state in the inline representation, which
/// would waste precious bytes for inline string data.
pub struct SmartString<Mode: SmartStringMode> {
data: MaybeUninit<InlineString>,
data: InlineString,
mode: PhantomData<Mode>,
}

Expand Down Expand Up @@ -248,7 +250,7 @@ impl SmartString<LazyCompact> {
/// once this happens.
pub const fn new_const() -> Self {
Self {
data: MaybeUninit::new(InlineString::new()),
data: InlineString::new(),
mode: PhantomData,
}
}
Expand All @@ -263,7 +265,7 @@ impl SmartString<Compact> {
/// once this happens.
pub const fn new_const() -> Self {
Self {
data: MaybeUninit::new(InlineString::new()),
data: InlineString::new(),
mode: PhantomData,
}
}
Expand All @@ -278,10 +280,10 @@ impl<Mode: SmartStringMode> SmartString<Mode> {

fn from_boxed(boxed: BoxedString) -> Self {
let mut out = Self {
data: MaybeUninit::uninit(),
data: InlineString::new(),
mode: PhantomData,
};
let data_ptr: *mut BoxedString = out.data.as_mut_ptr().cast();
let data_ptr: *mut BoxedString = &mut out.data as *mut _ as *mut BoxedString;
#[allow(unsafe_code)]
unsafe {
data_ptr.write(boxed)
Expand All @@ -291,43 +293,39 @@ impl<Mode: SmartStringMode> SmartString<Mode> {

fn from_inline(inline: InlineString) -> Self {
Self {
data: MaybeUninit::new(inline),
data: inline,
mode: PhantomData,
}
}

fn discriminant(&self) -> Discriminant {
// unsafe { self.data.assume_init() }.marker.discriminant()
let str_ptr: *const BoxedString =
self.data.as_ptr().cast() as *const _ as *const BoxedString;
#[allow(unsafe_code)]
Discriminant::from_bit(BoxedString::check_alignment(unsafe { &*str_ptr }))
self.data.marker.discriminant()
}

fn cast(&self) -> StringCast<'_> {
#[allow(unsafe_code)]
match self.discriminant() {
Discriminant::Inline => StringCast::Inline(unsafe { &*self.data.as_ptr() }),
Discriminant::Boxed => StringCast::Boxed(unsafe { &*self.data.as_ptr().cast() }),
Discriminant::Inline => StringCast::Inline(&self.data),
Discriminant::Boxed => StringCast::Boxed(unsafe { &*(&self.data as *const _ as *const BoxedString) }),
}
}

fn cast_mut(&mut self) -> StringCastMut<'_> {
#[allow(unsafe_code)]
match self.discriminant() {
Discriminant::Inline => StringCastMut::Inline(unsafe { &mut *self.data.as_mut_ptr() }),
Discriminant::Inline => StringCastMut::Inline(&mut self.data),
Discriminant::Boxed => {
StringCastMut::Boxed(unsafe { &mut *self.data.as_mut_ptr().cast() })
StringCastMut::Boxed(unsafe { &mut *(&mut self.data as *mut _ as *mut BoxedString) })
}
}
}

fn cast_into(mut self) -> StringCastInto {
#[allow(unsafe_code)]
match self.discriminant() {
Discriminant::Inline => StringCastInto::Inline(unsafe { self.data.assume_init() }),
Discriminant::Inline => StringCastInto::Inline(self.data),
Discriminant::Boxed => StringCastInto::Boxed(unsafe {
let boxed_ptr: *mut BoxedString = self.data.as_mut_ptr().cast();
let boxed_ptr: *mut BoxedString = &mut self.data as *mut _ as *mut BoxedString;
let string = boxed_ptr.read();
forget(self);
string
Expand All @@ -337,7 +335,7 @@ impl<Mode: SmartStringMode> SmartString<Mode> {

fn promote_from(&mut self, string: BoxedString) {
debug_assert!(self.discriminant() == Discriminant::Inline);
let data: *mut BoxedString = self.data.as_mut_ptr().cast();
let data: *mut BoxedString = &mut self.data as *mut _ as *mut BoxedString;
#[allow(unsafe_code)]
unsafe {
data.write(string)
Expand All @@ -362,11 +360,11 @@ impl<Mode: SmartStringMode> SmartString<Mode> {
false
} else {
let s: &str = string.deref();
let inlined = s.into();
let inlined: InlineString = s.into();
#[allow(unsafe_code)]
unsafe {
drop_in_place(string);
self.data.as_mut_ptr().write(inlined);
core::ptr::write(&mut self.data, inlined);
}
true
}
Expand Down Expand Up @@ -693,7 +691,6 @@ impl<Mode: SmartStringMode> From<Box<str>> for SmartString<Mode> {
}
}

#[cfg(feature = "std")]
impl<Mode: SmartStringMode> From<Cow<'_, str>> for SmartString<Mode> {
fn from(string: Cow<'_, str>) -> Self {
if string.len() > MAX_INLINE {
Expand Down
28 changes: 21 additions & 7 deletions src/marker_byte.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

use core::num::NonZeroU8;

#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub(crate) enum Discriminant {
Boxed,
Expand All @@ -27,13 +29,27 @@ impl Discriminant {
}
}

/// We now use this type to facilitate Option size optimization.
/// The low two bits are used to determine both the discriminant and the None state.
///
/// 00000000 - None
/// xxxxxx01 - unused
/// xxxxxx10 - BoxedString
/// xxxxxx11 - InlineString
///
/// BoxedString now uses TaggedPtr to ensure the low two bits form the 10 pattern.
/// This guarantees the in-memory NonZeroU8 value is always in a valid state and that it matches the
/// tagging convention of Marker.
#[derive(Clone, Copy, Debug)]
pub(crate) struct Marker(u8);
pub(crate) struct Marker(NonZeroU8);

impl Marker {
#[inline(always)]
const fn assemble(discriminant: Discriminant, data: u8) -> u8 {
data << 1 | discriminant.bit()
const fn assemble(discriminant: Discriminant, data: u8) -> NonZeroU8 {
debug_assert!(data < 0x40);

#[allow(unsafe_code)]
unsafe { NonZeroU8::new_unchecked((data << 2) | 2 | discriminant.bit()) } // SAFETY: (2 | x) != 0 is guaranteed for all x
}

#[inline(always)]
Expand All @@ -43,23 +59,21 @@ impl Marker {

#[inline(always)]
pub(crate) const fn new_inline(data: u8) -> Self {
debug_assert!(data < 0x80);
Self(Self::assemble(Discriminant::Inline, data))
}

#[inline(always)]
pub(crate) const fn discriminant(self) -> Discriminant {
Discriminant::from_bit(self.0 & 0x01 != 0)
Discriminant::from_bit(self.0.get() & 0x01 != 0)
}

#[inline(always)]
pub(crate) const fn data(self) -> u8 {
self.0 >> 1
self.0.get() >> 2
}

#[inline(always)]
pub(crate) fn set_data(&mut self, byte: u8) {
debug_assert!(byte < 0x80);
self.0 = Self::assemble(self.discriminant(), byte);
}
}
Loading