Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/intern/src/gc.rs')
| -rw-r--r-- | crates/intern/src/gc.rs | 330 |
1 files changed, 330 insertions, 0 deletions
diff --git a/crates/intern/src/gc.rs b/crates/intern/src/gc.rs new file mode 100644 index 0000000000..0d500a9714 --- /dev/null +++ b/crates/intern/src/gc.rs @@ -0,0 +1,330 @@ +//! Garbage collection of interned values. +//! +//! The GC is a simple mark-and-sweep GC: you first mark all storages, then the +//! GC visits them, and each live value they refer, recursively, then removes +//! those not marked. The sweep phase is done in parallel. + +use std::{hash::Hash, marker::PhantomData, ops::ControlFlow}; + +use dashmap::DashMap; +use hashbrown::raw::RawTable; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use rustc_hash::{FxBuildHasher, FxHashSet}; +use triomphe::{Arc, ThinArc}; + +use crate::{Internable, InternedRef, InternedSliceRef, SliceInternable}; + +trait Storage { + fn len(&self) -> usize; + + fn mark(&self, gc: &mut GarbageCollector); + + fn sweep(&self, gc: &GarbageCollector); +} + +struct InternedStorage<T>(PhantomData<fn() -> T>); + +impl<T: Internable + GcInternedVisit> Storage for InternedStorage<T> { + fn len(&self) -> usize { + T::storage().get().len() + } + + fn mark(&self, gc: &mut GarbageCollector) { + let storage = T::storage().get(); + for item in storage { + let item = item.key(); + let addr = Arc::as_ptr(item).addr(); + if Arc::strong_count(item) > 1 { + // The item is referenced from the outside. + gc.alive.insert(addr); + item.visit_with(gc); + } + } + } + + fn sweep(&self, gc: &GarbageCollector) { + let storage = T::storage().get(); + gc.sweep_storage(storage, |item| item.as_ptr().addr()); + } +} + +struct InternedSliceStorage<T>(PhantomData<fn() -> T>); + +impl<T: SliceInternable + GcInternedSliceVisit> Storage for InternedSliceStorage<T> { + fn len(&self) -> usize { + T::storage().get().len() + } + + fn mark(&self, gc: &mut GarbageCollector) { + let storage = T::storage().get(); + for item in storage { + let item = item.key(); + let addr = ThinArc::as_ptr(item).addr(); + if ThinArc::strong_count(item) > 1 { + // The item is referenced from the outside. + gc.alive.insert(addr); + T::visit_header(&item.header.header, gc); + T::visit_slice(&item.slice, gc); + } + } + } + + fn sweep(&self, gc: &GarbageCollector) { + let storage = T::storage().get(); + gc.sweep_storage(storage, |item| item.as_ptr().addr()); + } +} + +pub trait GcInternedVisit { + fn visit_with(&self, gc: &mut GarbageCollector); +} + +pub trait GcInternedSliceVisit: SliceInternable { + fn visit_header(header: &Self::Header, gc: &mut GarbageCollector); + fn visit_slice(header: &[Self::SliceType], gc: &mut GarbageCollector); +} + +#[derive(Default)] +pub struct GarbageCollector { + alive: FxHashSet<usize>, + storages: Vec<Box<dyn Storage + Send + Sync>>, +} + +impl GarbageCollector { + pub fn add_storage<T: Internable + GcInternedVisit>(&mut self) { + const { assert!(T::USE_GC) }; + + self.storages.push(Box::new(InternedStorage::<T>(PhantomData))); + } + + pub fn add_slice_storage<T: SliceInternable + GcInternedSliceVisit>(&mut self) { + const { assert!(T::USE_GC) }; + + self.storages.push(Box::new(InternedSliceStorage::<T>(PhantomData))); + } + + /// # Safety + /// + /// - This cannot be called if there are some not-yet-recorded type values. + /// - All relevant storages must have been added; that is, within the full graph of values, + /// the added storages must form a DAG. + /// - [`GcInternedVisit`] and [`GcInternedSliceVisit`] must mark all values reachable from the node. + pub unsafe fn collect(mut self) { + let total_nodes = self.storages.iter().map(|storage| storage.len()).sum(); + self.alive = FxHashSet::with_capacity_and_hasher(total_nodes, FxBuildHasher); + + let storages = std::mem::take(&mut self.storages); + + for storage in &storages { + storage.mark(&mut self); + } + + // Miri doesn't support rayon. + if cfg!(miri) { + storages.iter().for_each(|storage| storage.sweep(&self)); + } else { + storages.par_iter().for_each(|storage| storage.sweep(&self)); + } + } + + pub fn mark_interned_alive<T: Internable>( + &mut self, + interned: InternedRef<'_, T>, + ) -> ControlFlow<()> { + if interned.strong_count() > 1 { + // It will be visited anyway, so short-circuit + return ControlFlow::Break(()); + } + let addr = interned.as_raw().addr(); + if !self.alive.insert(addr) { ControlFlow::Break(()) } else { ControlFlow::Continue(()) } + } + + pub fn mark_interned_slice_alive<T: SliceInternable>( + &mut self, + interned: InternedSliceRef<'_, T>, + ) -> ControlFlow<()> { + if interned.strong_count() > 1 { + // It will be visited anyway, so short-circuit + return ControlFlow::Break(()); + } + let addr = interned.as_raw().addr(); + if !self.alive.insert(addr) { ControlFlow::Break(()) } else { ControlFlow::Continue(()) } + } + + fn sweep_storage<T: Hash + Eq + Send + Sync>( + &self, + storage: &DashMap<T, (), FxBuildHasher>, + get_addr: impl Fn(&T) -> usize + Send + Sync, + ) { + // Miri doesn't support rayon. + if cfg!(miri) { + storage.shards().iter().for_each(|shard| { + self.retain_only_alive(&mut *shard.write(), |item| get_addr(&item.0)) + }); + } else { + storage.shards().par_iter().for_each(|shard| { + self.retain_only_alive(&mut *shard.write(), |item| get_addr(&item.0)) + }); + } + } + + #[inline] + fn retain_only_alive<T>(&self, map: &mut RawTable<T>, mut get_addr: impl FnMut(&T) -> usize) { + // This code was copied from DashMap's retain() - which we can't use because we want to run in parallel. + unsafe { + // Here we only use `iter` as a temporary, preventing use-after-free + for bucket in map.iter() { + let item = bucket.as_mut(); + let addr = get_addr(item); + if !self.alive.contains(&addr) { + map.erase(bucket); + } + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::{ + GarbageCollector, GcInternedSliceVisit, GcInternedVisit, Interned, InternedSliceRef, + }; + + crate::impl_internable!(String); + + #[test] + fn simple_interned() { + let a = Interned::new("abc".to_owned()); + let b = Interned::new("abc".to_owned()); + assert_eq!(a, b); + assert_eq!(a.as_ref(), b.as_ref()); + assert_eq!(a.as_ref(), a.as_ref()); + assert_eq!(a, a.clone()); + assert_eq!(a, a.clone().clone()); + assert_eq!(b.clone(), a.clone().clone()); + assert_eq!(*a, "abc"); + assert_eq!(*b, "abc"); + assert_eq!(b.as_ref().to_owned(), a); + let c = Interned::new("def".to_owned()); + assert_ne!(a, c); + assert_ne!(b, c); + assert_ne!(b.as_ref(), c.as_ref()); + assert_eq!(*c.as_ref(), "def"); + drop(c); + assert_eq!(*a, "abc"); + assert_eq!(*b, "abc"); + drop(a); + assert_eq!(*b, "abc"); + drop(b); + } + + #[test] + fn simple_gc() { + #[derive(Debug, PartialEq, Eq, Hash)] + struct GcString(String); + + crate::impl_internable!(gc; GcString); + + impl GcInternedVisit for GcString { + fn visit_with(&self, _gc: &mut GarbageCollector) {} + } + + crate::impl_slice_internable!(gc; StringSlice, String, u32); + type InternedSlice = crate::InternedSlice<StringSlice>; + + impl GcInternedSliceVisit for StringSlice { + fn visit_header(_header: &Self::Header, _gc: &mut GarbageCollector) {} + + fn visit_slice(_header: &[Self::SliceType], _gc: &mut GarbageCollector) {} + } + + let (a, d) = { + let a = Interned::new_gc(GcString("abc".to_owned())).to_owned(); + let b = Interned::new_gc(GcString("abc".to_owned())).to_owned(); + assert_eq!(a, b); + assert_eq!(a.as_ref(), b.as_ref()); + assert_eq!(a.as_ref(), a.as_ref()); + assert_eq!(a, a.clone()); + assert_eq!(a, a.clone().clone()); + assert_eq!(b.clone(), a.clone().clone()); + assert_eq!(a.0, "abc"); + assert_eq!(b.0, "abc"); + assert_eq!(b.as_ref().to_owned(), a); + let c = Interned::new_gc(GcString("def".to_owned())).to_owned(); + assert_ne!(a, c); + assert_ne!(b, c); + assert_ne!(b.as_ref(), c.as_ref()); + assert_eq!(c.as_ref().0, "def"); + + let d = InternedSlice::from_header_and_slice("abc".to_owned(), &[123, 456]); + let e = InternedSlice::from_header_and_slice("abc".to_owned(), &[123, 456]); + assert_eq!(d, e); + assert_eq!(d.to_owned(), e.to_owned()); + assert_eq!(d.header.length, 2); + assert_eq!(d.header.header, "abc"); + assert_eq!(d.slice, [123, 456]); + (a, d.to_owned()) + }; + + let mut gc = GarbageCollector::default(); + gc.add_slice_storage::<StringSlice>(); + gc.add_storage::<GcString>(); + unsafe { gc.collect() }; + + assert_eq!(a.0, "abc"); + assert_eq!(d.header.length, 2); + assert_eq!(d.header.header, "abc"); + assert_eq!(d.slice, [123, 456]); + + drop(a); + drop(d); + + let mut gc = GarbageCollector::default(); + gc.add_slice_storage::<StringSlice>(); + gc.add_storage::<GcString>(); + unsafe { gc.collect() }; + } + + #[test] + fn gc_visit() { + #[derive(PartialEq, Eq, Hash)] + struct GcInterned(InternedSliceRef<'static, StringSlice>); + + crate::impl_internable!(gc; GcInterned); + + impl GcInternedVisit for GcInterned { + fn visit_with(&self, gc: &mut GarbageCollector) { + _ = gc.mark_interned_slice_alive(self.0); + } + } + + crate::impl_slice_internable!(gc; StringSlice, String, i32); + type InternedSlice = crate::InternedSlice<StringSlice>; + + impl GcInternedSliceVisit for StringSlice { + fn visit_header(_header: &Self::Header, _gc: &mut GarbageCollector) {} + + fn visit_slice(_header: &[Self::SliceType], _gc: &mut GarbageCollector) {} + } + + let outer = { + let inner = InternedSlice::from_header_and_slice("abc".to_owned(), &[123, 456, 789]); + Interned::new_gc(GcInterned(inner)).to_owned() + }; + + let mut gc = GarbageCollector::default(); + gc.add_slice_storage::<StringSlice>(); + gc.add_storage::<GcInterned>(); + unsafe { gc.collect() }; + + assert_eq!(outer.0.header.header, "abc"); + assert_eq!(outer.0.slice, [123, 456, 789]); + + drop(outer); + + let mut gc = GarbageCollector::default(); + gc.add_slice_storage::<StringSlice>(); + gc.add_storage::<GcInterned>(); + unsafe { gc.collect() }; + } +} |