//! Interning of slices, potentially with a header. //! //! See [`crate::intern`] for an explanation of interning modes. Note that slice interning is currently //! available only in GC mode (there is no other need). //! //! [`InternedSlice`] and [`InternedSliceRef`] are essentially [`Interned<(Header, Box<[SliceType]>)>`][crate::Interned] //! and [`InternedRef`][crate::InternedRef] with the same types, but more optimized. There is only one //! allocation and the pointer is thin. use std::{ ffi::c_void, fmt::{self, Debug}, hash::{BuildHasher, Hash, Hasher}, marker::PhantomData, mem::ManuallyDrop, ops::Deref, ptr::{self, NonNull}, sync::OnceLock, }; use dashmap::{DashMap, SharedValue}; use hashbrown::raw::RawTable; use rustc_hash::FxBuildHasher; use triomphe::{HeaderSlice, HeaderWithLength, ThinArc}; type InternMap = DashMap< ThinArc<::Header, ::SliceType>, (), FxBuildHasher, >; type Guard = dashmap::RwLockWriteGuard< 'static, RawTable<( ThinArc<::Header, ::SliceType>, SharedValue<()>, )>, >; type Pointee = HeaderSlice< HeaderWithLength<::Header>, [::SliceType], >; pub struct InternedSlice { arc: ThinArc, } impl InternedSlice { #[inline] pub fn from_header_and_slice<'a>( header: T::Header, slice: &[T::SliceType], ) -> InternedSliceRef<'a, T> { const { assert!(T::USE_GC) }; let storage = T::storage().get(); let (mut shard, hash) = Self::select(storage, &header, slice); // Atomically, // - check if `obj` is already in the map // - if so, clone its `Arc` and return it // - if not, box it up, insert it, and return a clone // This needs to be atomic (locking the shard) to avoid races with other thread, which could // insert the same object between us looking it up and inserting it. let bucket = match shard.find_or_find_insert_slot( hash, |(other, _)| other.header.header == header && other.slice == *slice, |(x, _)| storage.hasher().hash_one(x), ) { Ok(bucket) => bucket, // SAFETY: The slot came from `find_or_find_insert_slot()`, and the table wasn't modified since then. Err(insert_slot) => unsafe { shard.insert_in_slot( hash, insert_slot, (ThinArc::from_header_and_slice(header, slice), SharedValue::new(())), ) }, }; // SAFETY: We just retrieved/inserted this bucket. // `NonNull::new_unchecked()` is safe because the pointer originates from a `ThinArc`. unsafe { InternedSliceRef { // INVARIANT: We create it from a `ThinArc`. ptr: NonNull::new_unchecked(ThinArc::as_ptr(&bucket.as_ref().0).cast_mut()), _marker: PhantomData, } } } #[inline] fn select( storage: &'static InternMap, header: &T::Header, slice: &[T::SliceType], ) -> (Guard, u64) { let hash = Self::hash(storage, header, slice); let shard_idx = storage.determine_shard(hash as usize); let shard = &storage.shards()[shard_idx]; (shard.write(), hash) } #[inline] fn hash(storage: &'static InternMap, header: &T::Header, slice: &[T::SliceType]) -> u64 { storage.hasher().hash_one(HeaderSlice { header: HeaderWithLength { header, length: slice.len() }, slice, }) } #[inline(always)] fn ptr(&self) -> *const c_void { self.arc.as_ptr() } #[inline] pub fn as_ref(&self) -> InternedSliceRef<'_, T> { InternedSliceRef { // SAFETY: `self.ptr` comes from a valid `ThinArc`, so non null. // INVARIANT: We create it from a `ThinArc`. ptr: unsafe { NonNull::new_unchecked(self.ptr().cast_mut()) }, _marker: PhantomData, } } } /// Compares interned `Ref`s using pointer equality. impl PartialEq for InternedSlice { // NOTE: No `?Sized` because `ptr_eq` doesn't work right with trait objects. #[inline] fn eq(&self, other: &Self) -> bool { self.arc.as_ptr() == other.arc.as_ptr() } } impl Eq for InternedSlice {} impl Hash for InternedSlice { #[inline] fn hash(&self, state: &mut H) { state.write_usize(self.ptr().addr()) } } impl Deref for InternedSlice { type Target = Pointee; #[inline] fn deref(&self) -> &Self::Target { &self.arc } } impl Clone for InternedSlice { #[inline] fn clone(&self) -> Self { Self { arc: self.arc.clone() } } } impl Debug for InternedSlice where T: SliceInternable, T::SliceType: Debug, T::Header: Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { (*self.arc).fmt(f) } } #[repr(transparent)] pub struct InternedSliceRef<'a, T> { /// # Invariant /// /// There is no `ThinArcBorrow` unfortunately, so this is basically a `ManuallyDrop`, /// except that can't be `Copy`, so we store a raw pointer instead. ptr: NonNull, _marker: PhantomData<&'a T>, } // SAFETY: This is essentially a `ThinArc`, implemented as a raw pointer because there is no `ThinArcBorrowed`. unsafe impl Send for InternedSliceRef<'_, T> {} unsafe impl Sync for InternedSliceRef<'_, T> {} impl<'a, T: SliceInternable> InternedSliceRef<'a, T> { #[inline(always)] fn arc(self) -> ManuallyDrop> { // SAFETY: `self.ptr`'s invariant. unsafe { ManuallyDrop::new(ThinArc::from_raw(self.ptr.as_ptr())) } } #[inline] pub fn to_owned(self) -> InternedSlice { InternedSlice { arc: (*self.arc()).clone() } } #[inline] pub fn get(self) -> &'a Pointee { // SAFETY: This is a lifetime extension, valid because we live for `'a`. unsafe { &*ptr::from_ref::>(&*self.arc()) } } /// # Safety /// /// You have to make sure the data is not referenced after the refcount reaches zero; beware the interning /// map also keeps a reference to the value. #[inline] pub unsafe fn decrement_refcount(self) { drop(ManuallyDrop::into_inner(self.arc())); } #[inline] pub(crate) fn strong_count(self) -> usize { ThinArc::strong_count(&self.arc()) } #[inline] pub(crate) fn as_raw(self) -> *const c_void { self.arc().as_ptr() } /// **Available only on GC mode**. /// /// Changes the attached lifetime, as in GC mode, the lifetime is more kind of a lint to prevent misuse /// than actual soundness check. #[inline] pub fn change_lifetime<'b>(self) -> InternedSliceRef<'b, T> { const { assert!(T::USE_GC) }; // SAFETY: The lifetime on `InternedSliceRef` is essentially advisory only for GCed types. unsafe { std::mem::transmute::, InternedSliceRef<'b, T>>(self) } } } impl Clone for InternedSliceRef<'_, T> { #[inline] fn clone(&self) -> Self { *self } } impl Copy for InternedSliceRef<'_, T> {} impl Hash for InternedSliceRef<'_, T> { #[inline] fn hash(&self, state: &mut H) { state.write_usize(self.ptr.as_ptr().addr()); } } impl PartialEq for InternedSliceRef<'_, T> { #[inline] fn eq(&self, other: &Self) -> bool { self.ptr == other.ptr } } impl Eq for InternedSliceRef<'_, T> {} impl Deref for InternedSliceRef<'_, T> { type Target = Pointee; #[inline] fn deref(&self) -> &Self::Target { self.get() } } impl Debug for InternedSliceRef<'_, T> where T: SliceInternable, T::SliceType: Debug, T::Header: Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { (**self).fmt(f) } } pub struct InternSliceStorage { map: OnceLock>, } #[allow( clippy::new_without_default, reason = "this a const fn, so it can't be default yet. See " )] impl InternSliceStorage { pub const fn new() -> Self { Self { map: OnceLock::new() } } } impl InternSliceStorage { pub(crate) fn get(&self) -> &InternMap { self.map.get_or_init(|| { DashMap::with_capacity_and_hasher( (64 * 1024) / std::mem::size_of::(), Default::default(), ) }) } } pub trait SliceInternable: Sized + 'static { const USE_GC: bool; type Header: Eq + Hash + Send + Sync; type SliceType: Eq + Hash + Send + Sync + Copy + 'static; fn storage() -> &'static InternSliceStorage; } /// Implements `SliceInternable` for a given list of types, making them usable with `InternedSlice`. #[macro_export] #[doc(hidden)] macro_rules! _impl_slice_internable { ( gc; $tag:ident, $h:ty, $t:ty $(,)? ) => { #[allow(unreachable_pub)] pub struct $tag; impl $crate::SliceInternable for $tag { const USE_GC: bool = true; type Header = $h; type SliceType = $t; fn storage() -> &'static $crate::InternSliceStorage { static STORAGE: $crate::InternSliceStorage<$tag> = $crate::InternSliceStorage::new(); &STORAGE } } }; } pub use crate::_impl_slice_internable as impl_slice_internable;