Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'lib/smol_str/src/lib.rs')
| -rw-r--r-- | lib/smol_str/src/lib.rs | 970 |
1 files changed, 970 insertions, 0 deletions
diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs new file mode 100644 index 0000000000..31695b8117 --- /dev/null +++ b/lib/smol_str/src/lib.rs @@ -0,0 +1,970 @@ +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(docsrs, feature(doc_cfg))] + +extern crate alloc; + +use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc}; +use core::{ + borrow::Borrow, + cmp::{self, Ordering}, + convert::Infallible, + fmt, hash, iter, mem, ops, + str::FromStr, +}; + +/// A `SmolStr` is a string type that has the following properties: +/// +/// * `size_of::<SmolStr>() == 24` (therefor `== size_of::<String>()` on 64 bit platforms) +/// * `Clone` is `O(1)` +/// * Strings are stack-allocated if they are: +/// * Up to 23 bytes long +/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist +/// solely of consecutive newlines, followed by consecutive spaces +/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated +/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation +/// +/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for +/// `SmolStr` is a good enough default storage for tokens of typical programming +/// languages. Strings consisting of a series of newlines, followed by a series of +/// whitespace are a typical pattern in computer programs because of indentation. +/// Note that a specialized interner might be a better solution for some use cases. +/// +/// `WS`: A string of 32 newlines followed by 128 spaces. +pub struct SmolStr(Repr); + +impl SmolStr { + /// Constructs an inline variant of `SmolStr`. + /// + /// This never allocates. + /// + /// # Panics + /// + /// Panics if `text.len() > 23`. + #[inline] + pub const fn new_inline(text: &str) -> SmolStr { + assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop + + let text = text.as_bytes(); + let mut buf = [0; INLINE_CAP]; + let mut i = 0; + while i < text.len() { + buf[i] = text[i]; + i += 1 + } + SmolStr(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we asserted it. + len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) }, + buf, + }) + } + + /// Constructs a `SmolStr` from a statically allocated string. + /// + /// This never allocates. + #[inline(always)] + pub const fn new_static(text: &'static str) -> SmolStr { + // NOTE: this never uses the inline storage; if a canonical + // representation is needed, we could check for `len() < INLINE_CAP` + // and call `new_inline`, but this would mean an extra branch. + SmolStr(Repr::Static(text)) + } + + /// Constructs a `SmolStr` from a `str`, heap-allocating if necessary. + #[inline(always)] + pub fn new(text: impl AsRef<str>) -> SmolStr { + SmolStr(Repr::new(text.as_ref())) + } + + /// Returns a `&str` slice of this `SmolStr`. + #[inline(always)] + pub fn as_str(&self) -> &str { + self.0.as_str() + } + + /// Returns the length of `self` in bytes. + #[inline(always)] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns `true` if `self` has a length of zero bytes. + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns `true` if `self` is heap-allocated. + #[inline(always)] + pub const fn is_heap_allocated(&self) -> bool { + matches!(self.0, Repr::Heap(..)) + } +} + +impl Clone for SmolStr { + #[inline] + fn clone(&self) -> Self { + if !self.is_heap_allocated() { + // SAFETY: We verified that the payload of `Repr` is a POD + return unsafe { core::ptr::read(self as *const SmolStr) }; + } + Self(self.0.clone()) + } +} + +impl Default for SmolStr { + #[inline(always)] + fn default() -> SmolStr { + SmolStr(Repr::Inline { len: InlineSize::_V0, buf: [0; INLINE_CAP] }) + } +} + +impl ops::Deref for SmolStr { + type Target = str; + + #[inline(always)] + fn deref(&self) -> &str { + self.as_str() + } +} + +// region: PartialEq implementations + +impl Eq for SmolStr {} +impl PartialEq<SmolStr> for SmolStr { + fn eq(&self, other: &SmolStr) -> bool { + self.0.ptr_eq(&other.0) || self.as_str() == other.as_str() + } +} + +impl PartialEq<str> for SmolStr { + #[inline(always)] + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq<SmolStr> for str { + #[inline(always)] + fn eq(&self, other: &SmolStr) -> bool { + other == self + } +} + +impl<'a> PartialEq<&'a str> for SmolStr { + #[inline(always)] + fn eq(&self, other: &&'a str) -> bool { + self == *other + } +} + +impl PartialEq<SmolStr> for &str { + #[inline(always)] + fn eq(&self, other: &SmolStr) -> bool { + *self == other + } +} + +impl PartialEq<String> for SmolStr { + #[inline(always)] + fn eq(&self, other: &String) -> bool { + self.as_str() == other + } +} + +impl PartialEq<SmolStr> for String { + #[inline(always)] + fn eq(&self, other: &SmolStr) -> bool { + other == self + } +} + +impl<'a> PartialEq<&'a String> for SmolStr { + #[inline(always)] + fn eq(&self, other: &&'a String) -> bool { + self == *other + } +} + +impl PartialEq<SmolStr> for &String { + #[inline(always)] + fn eq(&self, other: &SmolStr) -> bool { + *self == other + } +} +// endregion: PartialEq implementations + +impl Ord for SmolStr { + fn cmp(&self, other: &SmolStr) -> Ordering { + self.as_str().cmp(other.as_str()) + } +} + +impl PartialOrd for SmolStr { + fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl hash::Hash for SmolStr { + fn hash<H: hash::Hasher>(&self, hasher: &mut H) { + self.as_str().hash(hasher); + } +} + +impl fmt::Debug for SmolStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl fmt::Display for SmolStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +impl iter::FromIterator<char> for SmolStr { + fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr { + from_char_iter(iter.into_iter()) + } +} + +#[inline] +fn from_char_iter(iter: impl Iterator<Item = char>) -> SmolStr { + from_buf_and_chars([0; _], 0, iter) +} + +fn from_buf_and_chars( + mut buf: [u8; INLINE_CAP], + buf_len: usize, + mut iter: impl Iterator<Item = char>, +) -> SmolStr { + let min_size = iter.size_hint().0 + buf_len; + if min_size > INLINE_CAP { + let heap: String = + core::str::from_utf8(&buf[..buf_len]).unwrap().chars().chain(iter).collect(); + if heap.len() <= INLINE_CAP { + // size hint lied + return SmolStr::new_inline(&heap); + } + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + let mut len = buf_len; + while let Some(ch) = iter.next() { + let size = ch.len_utf8(); + if size + len > INLINE_CAP { + let (min_remaining, _) = iter.size_hint(); + let mut heap = String::with_capacity(size + len + min_remaining); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); + heap.push(ch); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + ch.encode_utf8(&mut buf[len..]); + len += size; + } + SmolStr(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) +} + +fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr +where + T: AsRef<str>, + String: iter::Extend<T>, +{ + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + while let Some(slice) = iter.next() { + let slice = slice.as_ref(); + let size = slice.len(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(slice); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + buf[len..][..size].copy_from_slice(slice.as_bytes()); + len += size; + } + SmolStr(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) +} + +impl iter::FromIterator<String> for SmolStr { + fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr { + build_from_str_iter(iter.into_iter()) + } +} + +impl<'a> iter::FromIterator<&'a String> for SmolStr { + fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr { + SmolStr::from_iter(iter.into_iter().map(|x| x.as_str())) + } +} + +impl<'a> iter::FromIterator<&'a str> for SmolStr { + fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr { + build_from_str_iter(iter.into_iter()) + } +} + +impl AsRef<str> for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl AsRef<[u8]> for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &[u8] { + self.as_str().as_bytes() + } +} + +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] +impl AsRef<std::ffi::OsStr> for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &std::ffi::OsStr { + AsRef::<std::ffi::OsStr>::as_ref(self.as_str()) + } +} + +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] +impl AsRef<std::path::Path> for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &std::path::Path { + AsRef::<std::path::Path>::as_ref(self.as_str()) + } +} + +impl From<&str> for SmolStr { + #[inline] + fn from(s: &str) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<&mut str> for SmolStr { + #[inline] + fn from(s: &mut str) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<&String> for SmolStr { + #[inline] + fn from(s: &String) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<String> for SmolStr { + #[inline(always)] + fn from(text: String) -> Self { + Self::new(text) + } +} + +impl From<Box<str>> for SmolStr { + #[inline] + fn from(s: Box<str>) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<Arc<str>> for SmolStr { + #[inline] + fn from(s: Arc<str>) -> SmolStr { + let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s)); + Self(repr) + } +} + +impl<'a> From<Cow<'a, str>> for SmolStr { + #[inline] + fn from(s: Cow<'a, str>) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<SmolStr> for Arc<str> { + #[inline(always)] + fn from(text: SmolStr) -> Self { + match text.0 { + Repr::Heap(data) => data, + _ => text.as_str().into(), + } + } +} + +impl From<SmolStr> for String { + #[inline(always)] + fn from(text: SmolStr) -> Self { + text.as_str().into() + } +} + +impl Borrow<str> for SmolStr { + #[inline(always)] + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl FromStr for SmolStr { + type Err = Infallible; + + #[inline] + fn from_str(s: &str) -> Result<SmolStr, Self::Err> { + Ok(SmolStr::from(s)) + } +} + +const INLINE_CAP: usize = InlineSize::_V23 as usize; +const N_NEWLINES: usize = 32; +const N_SPACES: usize = 128; +const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; +const _: () = { + assert!(WS.len() == N_NEWLINES + N_SPACES); + assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n'); + assert!(WS.as_bytes()[N_NEWLINES] == b' '); +}; + +/// A [`u8`] with a bunch of niches. +#[derive(Clone, Copy, Debug, PartialEq)] +#[repr(u8)] +enum InlineSize { + _V0 = 0, + _V1, + _V2, + _V3, + _V4, + _V5, + _V6, + _V7, + _V8, + _V9, + _V10, + _V11, + _V12, + _V13, + _V14, + _V15, + _V16, + _V17, + _V18, + _V19, + _V20, + _V21, + _V22, + _V23, +} + +impl InlineSize { + /// SAFETY: `value` must be less than or equal to [`INLINE_CAP`] + #[inline(always)] + const unsafe fn transmute_from_u8(value: u8) -> Self { + debug_assert!(value <= InlineSize::_V23 as u8); + // SAFETY: The caller is responsible to uphold this invariant + unsafe { mem::transmute::<u8, Self>(value) } + } +} + +#[derive(Clone, Debug)] +enum Repr { + Inline { len: InlineSize, buf: [u8; INLINE_CAP] }, + Static(&'static str), + Heap(Arc<str>), +} + +impl Repr { + /// This function tries to create a new Repr::Inline or Repr::Static + /// If it isn't possible, this function returns None + fn new_on_stack<T>(text: T) -> Option<Self> + where + T: AsRef<str>, + { + let text = text.as_ref(); + + let len = text.len(); + if len <= INLINE_CAP { + let mut buf = [0; INLINE_CAP]; + buf[..len].copy_from_slice(text.as_bytes()); + return Some(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }); + } + + if len <= N_NEWLINES + N_SPACES { + let bytes = text.as_bytes(); + let possible_newline_count = cmp::min(len, N_NEWLINES); + let newlines = + bytes[..possible_newline_count].iter().take_while(|&&b| b == b'\n').count(); + let possible_space_count = len - newlines; + if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { + let spaces = possible_space_count; + let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; + return Some(Repr::Static(substring)); + } + } + None + } + + fn new(text: &str) -> Self { + Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text))) + } + + #[inline(always)] + fn len(&self) -> usize { + match self { + Repr::Heap(data) => data.len(), + Repr::Static(data) => data.len(), + Repr::Inline { len, .. } => *len as usize, + } + } + + #[inline(always)] + fn is_empty(&self) -> bool { + match self { + Repr::Heap(data) => data.is_empty(), + Repr::Static(data) => data.is_empty(), + &Repr::Inline { len, .. } => len as u8 == 0, + } + } + + #[inline] + fn as_str(&self) -> &str { + match self { + Repr::Heap(data) => data, + Repr::Static(data) => data, + Repr::Inline { len, buf } => { + let len = *len as usize; + // SAFETY: len is guaranteed to be <= INLINE_CAP + let buf = unsafe { buf.get_unchecked(..len) }; + // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes + unsafe { ::core::str::from_utf8_unchecked(buf) } + } + } + } + + fn ptr_eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0), + (Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0), + (Self::Inline { len: l_len, buf: l_buf }, Self::Inline { len: r_len, buf: r_buf }) => { + l_len == r_len && l_buf == r_buf + } + _ => false, + } + } +} + +/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating. +/// +/// Almost identical to [`ToString`], but converts to `SmolStr` instead. +pub trait ToSmolStr { + fn to_smolstr(&self) -> SmolStr; +} + +/// [`str`] methods producing [`SmolStr`]s. +pub trait StrExt: private::Sealed { + /// Returns the lowercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_lowercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn to_lowercase_smolstr(&self) -> SmolStr; + + /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_uppercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn to_uppercase_smolstr(&self) -> SmolStr; + + /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_ascii_lowercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn to_ascii_lowercase_smolstr(&self) -> SmolStr; + + /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_ascii_uppercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn to_ascii_uppercase_smolstr(&self) -> SmolStr; + + /// Replaces all matches of a &str with another &str returning a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::replace`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr; + + /// Replaces first N matches of a &str with another &str returning a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::replacen`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr; +} + +impl StrExt for str { + #[inline] + fn to_lowercase_smolstr(&self) -> SmolStr { + let len = self.len(); + if len <= INLINE_CAP { + let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase); + from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_lowercase())) + } else { + self.to_lowercase().into() + } + } + + #[inline] + fn to_uppercase_smolstr(&self) -> SmolStr { + let len = self.len(); + if len <= INLINE_CAP { + let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase); + from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_uppercase())) + } else { + self.to_uppercase().into() + } + } + + #[inline] + fn to_ascii_lowercase_smolstr(&self) -> SmolStr { + let len = self.len(); + if len <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + buf[..len].copy_from_slice(self.as_bytes()); + buf[..len].make_ascii_lowercase(); + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) + } else { + self.to_ascii_lowercase().into() + } + } + + #[inline] + fn to_ascii_uppercase_smolstr(&self) -> SmolStr { + let len = self.len(); + if len <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + buf[..len].copy_from_slice(self.as_bytes()); + buf[..len].make_ascii_uppercase(); + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) + } else { + self.to_ascii_uppercase().into() + } + } + + #[inline] + fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr { + self.replacen_smolstr(from, to, usize::MAX) + } + + #[inline] + fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr { + // Fast path for replacing a single ASCII character with another inline. + if let [from_u8] = from.as_bytes() + && let [to_u8] = to.as_bytes() + { + return if self.len() <= count { + // SAFETY: `from_u8` & `to_u8` are ascii + unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) } + } else { + unsafe { + replacen_1_ascii(self, |b| { + if b == from_u8 && count != 0 { + count -= 1; + *to_u8 + } else { + *b + } + }) + } + }; + } + + let mut result = SmolStrBuilder::new(); + let mut last_end = 0; + for (start, part) in self.match_indices(from).take(count) { + // SAFETY: `start` is guaranteed to be within the bounds of `self` as per + // `match_indices` and last_end is always less than or equal to `start` + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + // SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is + // always less than or equal to `self.len()` + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + SmolStr::from(result) + } +} + +/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes. +#[inline] +unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr { + if src.len() <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + for (idx, b) in src.as_bytes().iter().enumerate() { + buf[idx] = map(b); + } + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) }, + buf, + }) + } else { + let out = src.as_bytes().iter().map(map).collect(); + // SAFETY: We replaced ascii with ascii on valid utf8 strings. + unsafe { String::from_utf8_unchecked(out).into() } + } +} + +/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23. +#[inline] +fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) { + // Process the input in chunks of 16 bytes to enable auto-vectorization. + // Previously the chunk size depended on the size of `usize`, + // but on 32-bit platforms with sse or neon is also the better choice. + // The only downside on other platforms would be a bit more loop-unrolling. + const N: usize = 16; + + debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings"); + + let mut slice = s.as_bytes(); + let mut out = [0u8; INLINE_CAP]; + let mut out_slice = &mut out[..slice.len()]; + let mut is_ascii = [false; N]; + + while slice.len() >= N { + // SAFETY: checked in loop condition + let chunk = unsafe { slice.get_unchecked(..N) }; + // SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets + let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) }; + + for j in 0..N { + is_ascii[j] = chunk[j] <= 127; + } + + // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk + // size gives the best result, specifically a pmovmsk instruction on x86. + // See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not + // currently recognize other similar idioms. + if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N { + break; + } + + for j in 0..N { + out_chunk[j] = convert(&chunk[j]); + } + + slice = unsafe { slice.get_unchecked(N..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(N..) }; + } + + // handle the remainder as individual bytes + while !slice.is_empty() { + let byte = slice[0]; + if byte > 127 { + break; + } + // SAFETY: out_slice has at least same length as input slice + unsafe { + *out_slice.get_unchecked_mut(0) = convert(&byte); + } + slice = unsafe { slice.get_unchecked(1..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(1..) }; + } + + unsafe { + // SAFETY: we know this is a valid char boundary + // since we only skipped over leading ascii bytes + let rest = core::str::from_utf8_unchecked(slice); + (out, rest) + } +} + +impl<T> ToSmolStr for T +where + T: fmt::Display + ?Sized, +{ + fn to_smolstr(&self) -> SmolStr { + format_smolstr!("{}", self) + } +} + +mod private { + /// No downstream impls allowed. + pub trait Sealed {} + impl Sealed for str {} +} + +/// Formats arguments to a [`SmolStr`], potentially without allocating. +/// +/// See [`alloc::format!`] or [`format_args!`] for syntax documentation. +#[macro_export] +macro_rules! format_smolstr { + ($($tt:tt)*) => {{ + let mut w = $crate::SmolStrBuilder::new(); + ::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error"); + w.finish() + }}; +} + +/// A builder that can be used to efficiently build a [`SmolStr`]. +/// +/// This won't allocate if the final string fits into the inline buffer. +#[derive(Clone, Default, Debug, PartialEq, Eq)] +pub struct SmolStrBuilder(SmolStrBuilderRepr); + +#[derive(Clone, Debug, PartialEq, Eq)] +enum SmolStrBuilderRepr { + Inline { len: usize, buf: [u8; INLINE_CAP] }, + Heap(String), +} + +impl Default for SmolStrBuilderRepr { + #[inline] + fn default() -> Self { + SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 } + } +} + +impl SmolStrBuilder { + /// Creates a new empty [`SmolStrBuilder`]. + #[must_use] + pub const fn new() -> Self { + Self(SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 }) + } + + /// Builds a [`SmolStr`] from `self`. + #[must_use] + pub fn finish(&self) -> SmolStr { + SmolStr(match &self.0 { + &SmolStrBuilderRepr::Inline { len, buf } => { + debug_assert!(len <= INLINE_CAP); + Repr::Inline { + // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + } + } + SmolStrBuilderRepr::Heap(heap) => Repr::new(heap), + }) + } + + /// Appends the given [`char`] to the end of `self`'s buffer. + pub fn push(&mut self, c: char) { + match &mut self.0 { + SmolStrBuilderRepr::Inline { len, buf } => { + let char_len = c.len_utf8(); + let new_len = *len + char_len; + if new_len <= INLINE_CAP { + c.encode_utf8(&mut buf[*len..]); + *len += char_len; + } else { + let mut heap = String::with_capacity(new_len); + // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes + unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) }; + heap.push(c); + self.0 = SmolStrBuilderRepr::Heap(heap); + } + } + SmolStrBuilderRepr::Heap(h) => h.push(c), + } + } + + /// Appends a given string slice onto the end of `self`'s buffer. + pub fn push_str(&mut self, s: &str) { + match &mut self.0 { + SmolStrBuilderRepr::Inline { len, buf } => { + let old_len = *len; + *len += s.len(); + + // if the new length will fit on the stack (even if it fills it entirely) + if *len <= INLINE_CAP { + buf[old_len..*len].copy_from_slice(s.as_bytes()); + return; // skip the heap push below + } + + let mut heap = String::with_capacity(*len); + + // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes + unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) }; + heap.push_str(s); + self.0 = SmolStrBuilderRepr::Heap(heap); + } + SmolStrBuilderRepr::Heap(heap) => heap.push_str(s), + } + } +} + +impl fmt::Write for SmolStrBuilder { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); + Ok(()) + } +} + +impl From<SmolStrBuilder> for SmolStr { + fn from(value: SmolStrBuilder) -> Self { + value.finish() + } +} + +#[cfg(feature = "arbitrary")] +#[cfg_attr(docsrs, doc(cfg(feature = "arbitrary")))] +impl<'a> arbitrary::Arbitrary<'a> for SmolStr { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> { + let s = <&str>::arbitrary(u)?; + Ok(SmolStr::new(s)) + } +} + +#[cfg(feature = "borsh")] +#[cfg_attr(docsrs, doc(cfg(feature = "borsh")))] +mod borsh; +#[cfg(feature = "serde")] +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] +mod serde; + +#[test] +fn from_buf_and_chars_size_hinted_heap() { + let str = from_buf_and_chars( + *b"abcdefghijklmnopqr00000", + 18, + "_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(), + ); + + assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13"); +} |