Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/intern/src/intern.rs')
| -rw-r--r-- | crates/intern/src/intern.rs | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/crates/intern/src/intern.rs b/crates/intern/src/intern.rs new file mode 100644 index 0000000000..b7acd6624b --- /dev/null +++ b/crates/intern/src/intern.rs @@ -0,0 +1,372 @@ +//! Interning of single values. +//! +//! Interning supports two modes: GC and non-GC. +//! +//! In non-GC mode, you create [`Interned`]s, and can create `Copy` handles to them +//! that can still be upgraded back to [`Interned`] ([`InternedRef`]) via [`Interned::as_ref`]. +//! Generally, letting the [`InternedRef`] to outlive the [`Interned`] is a soundness bug and can +//! lead to UB. When all [`Interned`]s of some value are dropped, the value is freed (newer interns +//! may re-create it, not necessarily in the same place). +//! +//! In GC mode, you generally operate on [`InternedRef`]s. They are `Copy` and comfortable. To intern +//! a value you call [`Interned::new_gc`], which returns an [`InternedRef`]. Having all [`Interned`]s +//! of some value be dropped will *not* immediately free the value. Instead, a mark-and-sweep GC can +//! be initiated, which will free all values which have no live [`Interned`]s. +//! +//! Generally, in GC mode, you operate on [`InternedRef`], but when you need to store some long-term +//! value (e.g. a Salsa query output), you convert it to an [`Interned`]. This ensures that an eventual +//! GC will not free it as long as it is alive. +//! +//! Making mistakes is hard due to GC [`InternedRef`] wrappers not implementing `salsa::Update`, meaning +//! Salsa will ensure you do not store them in queries or Salsa-interneds. However it's still *possible* +//! without unsafe code (for example, by storing them in a `static`), which is why triggering GC is unsafe. +//! +//! For more information about GC see [`crate::gc`]. + +use std::{ + fmt::{self, Debug, Display}, + hash::{BuildHasher, Hash, Hasher}, + ops::Deref, + ptr, + sync::OnceLock, +}; + +use dashmap::{DashMap, SharedValue}; +use hashbrown::raw::RawTable; +use rustc_hash::FxBuildHasher; +use triomphe::{Arc, ArcBorrow}; + +type InternMap<T> = DashMap<Arc<T>, (), FxBuildHasher>; +type Guard<T> = dashmap::RwLockWriteGuard<'static, RawTable<(Arc<T>, SharedValue<()>)>>; + +pub struct Interned<T: Internable> { + arc: Arc<T>, +} + +impl<T: Internable> Interned<T> { + #[inline] + pub fn new(obj: T) -> Self { + const { assert!(!T::USE_GC) }; + + let storage = T::storage().get(); + let (mut shard, hash) = Self::select(storage, &obj); + // Atomically, + // - check if `obj` is already in the map + // - if so, clone its `Arc` and return it + // - if not, box it up, insert it, and return a clone + // This needs to be atomic (locking the shard) to avoid races with other thread, which could + // insert the same object between us looking it up and inserting it. + let bucket = match shard.find_or_find_insert_slot( + hash, + |(other, _)| **other == obj, + |(x, _)| Self::hash(storage, x), + ) { + Ok(bucket) => bucket, + // SAFETY: The slot came from `find_or_find_insert_slot()`, and the table wasn't modified since then. + Err(insert_slot) => unsafe { + shard.insert_in_slot(hash, insert_slot, (Arc::new(obj), SharedValue::new(()))) + }, + }; + // SAFETY: We just retrieved/inserted this bucket. + unsafe { Self { arc: bucket.as_ref().0.clone() } } + } + + #[inline] + pub fn new_gc<'a>(obj: T) -> InternedRef<'a, T> { + const { assert!(T::USE_GC) }; + + let storage = T::storage().get(); + let (mut shard, hash) = Self::select(storage, &obj); + // Atomically, + // - check if `obj` is already in the map + // - if so, clone its `Arc` and return it + // - if not, box it up, insert it, and return a clone + // This needs to be atomic (locking the shard) to avoid races with other thread, which could + // insert the same object between us looking it up and inserting it. + let bucket = match shard.find_or_find_insert_slot( + hash, + |(other, _)| **other == obj, + |(x, _)| Self::hash(storage, x), + ) { + Ok(bucket) => bucket, + // SAFETY: The slot came from `find_or_find_insert_slot()`, and the table wasn't modified since then. + Err(insert_slot) => unsafe { + shard.insert_in_slot(hash, insert_slot, (Arc::new(obj), SharedValue::new(()))) + }, + }; + // SAFETY: We just retrieved/inserted this bucket. + unsafe { InternedRef { arc: Arc::borrow_arc(&bucket.as_ref().0) } } + } + + #[inline] + fn select(storage: &'static InternMap<T>, obj: &T) -> (Guard<T>, u64) { + let hash = Self::hash(storage, obj); + let shard_idx = storage.determine_shard(hash as usize); + let shard = &storage.shards()[shard_idx]; + (shard.write(), hash) + } + + #[inline] + fn hash(storage: &'static InternMap<T>, obj: &T) -> u64 { + storage.hasher().hash_one(obj) + } + + /// # Safety + /// + /// The pointer should originate from an `Interned` or an `InternedRef`. + #[inline] + pub unsafe fn from_raw(ptr: *const T) -> Self { + // SAFETY: Our precondition. + Self { arc: unsafe { Arc::from_raw(ptr) } } + } + + #[inline] + pub fn as_ref(&self) -> InternedRef<'_, T> { + InternedRef { arc: self.arc.borrow_arc() } + } +} + +impl<T: Internable> Drop for Interned<T> { + #[inline] + fn drop(&mut self) { + // When the last `Ref` is dropped, remove the object from the global map. + if !T::USE_GC && Arc::count(&self.arc) == 2 { + // Only `self` and the global map point to the object. + + self.drop_slow(); + } + } +} + +impl<T: Internable> Interned<T> { + #[cold] + fn drop_slow(&mut self) { + let storage = T::storage().get(); + let (mut shard, hash) = Self::select(storage, &self.arc); + + if Arc::count(&self.arc) != 2 { + // Another thread has interned another copy + return; + } + + shard.remove_entry(hash, |(other, _)| **other == **self); + + // Shrink the backing storage if the shard is less than 50% occupied. + if shard.len() * 2 < shard.capacity() { + let len = shard.len(); + shard.shrink_to(len, |(x, _)| Self::hash(storage, x)); + } + } +} + +/// Compares interned `Ref`s using pointer equality. +impl<T: Internable> PartialEq for Interned<T> { + // NOTE: No `?Sized` because `ptr_eq` doesn't work right with trait objects. + + #[inline] + fn eq(&self, other: &Self) -> bool { + Arc::ptr_eq(&self.arc, &other.arc) + } +} + +impl<T: Internable> Eq for Interned<T> {} + +impl<T: Internable> Hash for Interned<T> { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + state.write_usize(self.arc.as_ptr().addr()) + } +} + +impl<T: Internable> AsRef<T> for Interned<T> { + #[inline] + fn as_ref(&self) -> &T { + self + } +} + +impl<T: Internable> Deref for Interned<T> { + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.arc + } +} + +impl<T: Internable> Clone for Interned<T> { + #[inline] + fn clone(&self) -> Self { + Self { arc: self.arc.clone() } + } +} + +impl<T: Debug + Internable> Debug for Interned<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + <T as Debug>::fmt(&**self, f) + } +} + +impl<T: Display + Internable> Display for Interned<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + <T as Display>::fmt(&**self, f) + } +} + +#[repr(transparent)] +pub struct InternedRef<'a, T> { + arc: ArcBorrow<'a, T>, +} + +impl<'a, T: Internable> InternedRef<'a, T> { + #[inline] + pub fn as_raw(self) -> *const T { + // Not `ptr::from_ref(&*self.arc)`, because we need to keep the provenance. + self.arc.with_arc(|arc| Arc::as_ptr(arc)) + } + + /// # Safety + /// + /// The pointer needs to originate from `Interned` or `InternedRef`. + #[inline] + pub unsafe fn from_raw(ptr: *const T) -> Self { + // SAFETY: Our precondition. + Self { arc: unsafe { ArcBorrow::from_ptr(ptr) } } + } + + #[inline] + pub fn to_owned(self) -> Interned<T> { + Interned { arc: self.arc.clone_arc() } + } + + #[inline] + pub fn get(self) -> &'a T { + self.arc.get() + } + + /// # Safety + /// + /// You have to make sure the data is not referenced after the refcount reaches zero; beware the interning + /// map also keeps a reference to the value. + #[inline] + pub unsafe fn decrement_refcount(self) { + // SAFETY: Our precondition. + unsafe { drop(Arc::from_raw(self.as_raw())) } + } + + #[inline] + pub(crate) fn strong_count(self) -> usize { + ArcBorrow::strong_count(&self.arc) + } + + /// **Available only on GC mode**. + /// + /// Changes the attached lifetime, as in GC mode, the lifetime is more kind of a lint to prevent misuse + /// than actual soundness check. + #[inline] + pub fn change_lifetime<'b>(self) -> InternedRef<'b, T> { + const { assert!(T::USE_GC) }; + // SAFETY: The lifetime on `InternedRef` is essentially advisory only for GCed types. + unsafe { std::mem::transmute::<InternedRef<'a, T>, InternedRef<'b, T>>(self) } + } +} + +impl<T> Clone for InternedRef<'_, T> { + #[inline] + fn clone(&self) -> Self { + *self + } +} + +impl<T> Copy for InternedRef<'_, T> {} + +impl<T: Hash> Hash for InternedRef<'_, T> { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + let ptr = ptr::from_ref::<T>(&*self.arc); + state.write_usize(ptr.addr()); + } +} + +impl<T: PartialEq> PartialEq for InternedRef<'_, T> { + #[inline] + fn eq(&self, other: &Self) -> bool { + ArcBorrow::ptr_eq(&self.arc, &other.arc) + } +} + +impl<T: Eq> Eq for InternedRef<'_, T> {} + +impl<T> Deref for InternedRef<'_, T> { + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.arc + } +} + +impl<T: Debug> Debug for InternedRef<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*self.arc).fmt(f) + } +} + +impl<T: Display> Display for InternedRef<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*self.arc).fmt(f) + } +} + +pub struct InternStorage<T: ?Sized> { + map: OnceLock<InternMap<T>>, +} + +#[allow( + clippy::new_without_default, + reason = "this a const fn, so it can't be default yet. See <https://github.com/rust-lang/rust/issues/63065>" +)] +impl<T: ?Sized> InternStorage<T> { + pub const fn new() -> Self { + Self { map: OnceLock::new() } + } +} + +impl<T: Internable + ?Sized> InternStorage<T> { + pub(crate) fn get(&self) -> &InternMap<T> { + self.map.get_or_init(DashMap::default) + } +} + +pub trait Internable: Hash + Eq + Send + Sync + 'static { + const USE_GC: bool; + + fn storage() -> &'static InternStorage<Self>; +} + +/// Implements `Internable` for a given list of types, making them usable with `Interned`. +#[macro_export] +#[doc(hidden)] +macro_rules! _impl_internable { + ( gc; $($t:ty),+ $(,)? ) => { $( + impl $crate::Internable for $t { + const USE_GC: bool = true; + + fn storage() -> &'static $crate::InternStorage<Self> { + static STORAGE: $crate::InternStorage<$t> = $crate::InternStorage::new(); + &STORAGE + } + } + )+ }; + ( $($t:ty),+ $(,)? ) => { $( + impl $crate::Internable for $t { + const USE_GC: bool = false; + + fn storage() -> &'static $crate::InternStorage<Self> { + static STORAGE: $crate::InternStorage<$t> = $crate::InternStorage::new(); + &STORAGE + } + } + )+ }; +} +pub use crate::_impl_internable as impl_internable; |