//! Garbage collection of interned values. //! //! The GC is a simple mark-and-sweep GC: you first mark all storages, then the //! GC visits them, and each live value they refer, recursively, then removes //! those not marked. The sweep phase is done in parallel. use std::{hash::Hash, marker::PhantomData, ops::ControlFlow}; use dashmap::DashMap; use hashbrown::raw::RawTable; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use rustc_hash::{FxBuildHasher, FxHashSet}; use triomphe::{Arc, ThinArc}; use crate::{Internable, InternedRef, InternedSliceRef, SliceInternable}; trait Storage { fn len(&self) -> usize; fn mark(&self, gc: &mut GarbageCollector); fn sweep(&self, gc: &GarbageCollector); } struct InternedStorage(PhantomData T>); impl Storage for InternedStorage { fn len(&self) -> usize { T::storage().get().len() } fn mark(&self, gc: &mut GarbageCollector) { let storage = T::storage().get(); for item in storage { let item = item.key(); let addr = Arc::as_ptr(item).addr(); if Arc::strong_count(item) > 1 { // The item is referenced from the outside. gc.alive.insert(addr); item.visit_with(gc); } } } fn sweep(&self, gc: &GarbageCollector) { let storage = T::storage().get(); gc.sweep_storage(storage, |item| item.as_ptr().addr()); } } struct InternedSliceStorage(PhantomData T>); impl Storage for InternedSliceStorage { fn len(&self) -> usize { T::storage().get().len() } fn mark(&self, gc: &mut GarbageCollector) { let storage = T::storage().get(); for item in storage { let item = item.key(); let addr = ThinArc::as_ptr(item).addr(); if ThinArc::strong_count(item) > 1 { // The item is referenced from the outside. gc.alive.insert(addr); T::visit_header(&item.header.header, gc); T::visit_slice(&item.slice, gc); } } } fn sweep(&self, gc: &GarbageCollector) { let storage = T::storage().get(); gc.sweep_storage(storage, |item| item.as_ptr().addr()); } } pub trait GcInternedVisit { fn visit_with(&self, gc: &mut GarbageCollector); } pub trait GcInternedSliceVisit: SliceInternable { fn visit_header(header: &Self::Header, gc: &mut GarbageCollector); fn visit_slice(header: &[Self::SliceType], gc: &mut GarbageCollector); } #[derive(Default)] pub struct GarbageCollector { alive: FxHashSet, storages: Vec<&'static (dyn Storage + Send + Sync)>, } impl GarbageCollector { pub fn add_storage(&mut self) { const { assert!(T::USE_GC) }; self.storages.push(&InternedStorage::(PhantomData)); } pub fn add_slice_storage(&mut self) { const { assert!(T::USE_GC) }; self.storages.push(&InternedSliceStorage::(PhantomData)); } /// # Safety /// /// - This cannot be called if there are some not-yet-recorded type values. /// - All relevant storages must have been added; that is, within the full graph of values, /// the added storages must form a DAG. /// - [`GcInternedVisit`] and [`GcInternedSliceVisit`] must mark all values reachable from the node. pub unsafe fn collect(mut self) { let total_nodes = self.storages.iter().map(|storage| storage.len()).sum(); self.alive.clear(); self.alive.reserve(total_nodes); let storages = std::mem::take(&mut self.storages); for &storage in &storages { storage.mark(&mut self); } // Miri doesn't support rayon. if cfg!(miri) { storages.iter().for_each(|storage| storage.sweep(&self)); } else { storages.par_iter().for_each(|storage| storage.sweep(&self)); } } pub fn mark_interned_alive( &mut self, interned: InternedRef<'_, T>, ) -> ControlFlow<()> { if interned.strong_count() > 1 { // It will be visited anyway, so short-circuit return ControlFlow::Break(()); } let addr = interned.as_raw().addr(); if !self.alive.insert(addr) { ControlFlow::Break(()) } else { ControlFlow::Continue(()) } } pub fn mark_interned_slice_alive( &mut self, interned: InternedSliceRef<'_, T>, ) -> ControlFlow<()> { if interned.strong_count() > 1 { // It will be visited anyway, so short-circuit return ControlFlow::Break(()); } let addr = interned.as_raw().addr(); if !self.alive.insert(addr) { ControlFlow::Break(()) } else { ControlFlow::Continue(()) } } fn sweep_storage( &self, storage: &DashMap, get_addr: impl Fn(&T) -> usize + Send + Sync, ) { // Miri doesn't support rayon. if cfg!(miri) { storage.shards().iter().for_each(|shard| { self.retain_only_alive(&mut *shard.write(), |item| get_addr(&item.0)) }); } else { storage.shards().par_iter().for_each(|shard| { self.retain_only_alive(&mut *shard.write(), |item| get_addr(&item.0)) }); } } #[inline] fn retain_only_alive(&self, map: &mut RawTable, mut get_addr: impl FnMut(&T) -> usize) { // This code was copied from DashMap's retain() - which we can't use because we want to run in parallel. unsafe { // Here we only use `iter` as a temporary, preventing use-after-free for bucket in map.iter() { let item = bucket.as_mut(); let addr = get_addr(item); if !self.alive.contains(&addr) { map.erase(bucket); } } } } } #[cfg(test)] mod tests { use crate::{ GarbageCollector, GcInternedSliceVisit, GcInternedVisit, Interned, InternedSliceRef, }; crate::impl_internable!(String); #[test] fn simple_interned() { let a = Interned::new("abc".to_owned()); let b = Interned::new("abc".to_owned()); assert_eq!(a, b); assert_eq!(a.as_ref(), b.as_ref()); assert_eq!(a.as_ref(), a.as_ref()); assert_eq!(a, a.clone()); assert_eq!(a, a.clone().clone()); assert_eq!(b.clone(), a.clone().clone()); assert_eq!(*a, "abc"); assert_eq!(*b, "abc"); assert_eq!(b.as_ref().to_owned(), a); let c = Interned::new("def".to_owned()); assert_ne!(a, c); assert_ne!(b, c); assert_ne!(b.as_ref(), c.as_ref()); assert_eq!(*c.as_ref(), "def"); drop(c); assert_eq!(*a, "abc"); assert_eq!(*b, "abc"); drop(a); assert_eq!(*b, "abc"); drop(b); } #[test] fn simple_gc() { #[derive(Debug, PartialEq, Eq, Hash)] struct GcString(String); crate::impl_internable!(gc; GcString); impl GcInternedVisit for GcString { fn visit_with(&self, _gc: &mut GarbageCollector) {} } crate::impl_slice_internable!(gc; StringSlice, String, u32); type InternedSlice = crate::InternedSlice; impl GcInternedSliceVisit for StringSlice { fn visit_header(_header: &Self::Header, _gc: &mut GarbageCollector) {} fn visit_slice(_header: &[Self::SliceType], _gc: &mut GarbageCollector) {} } let (a, d) = { let a = Interned::new_gc(GcString("abc".to_owned())).to_owned(); let b = Interned::new_gc(GcString("abc".to_owned())).to_owned(); assert_eq!(a, b); assert_eq!(a.as_ref(), b.as_ref()); assert_eq!(a.as_ref(), a.as_ref()); assert_eq!(a, a.clone()); assert_eq!(a, a.clone().clone()); assert_eq!(b.clone(), a.clone().clone()); assert_eq!(a.0, "abc"); assert_eq!(b.0, "abc"); assert_eq!(b.as_ref().to_owned(), a); let c = Interned::new_gc(GcString("def".to_owned())).to_owned(); assert_ne!(a, c); assert_ne!(b, c); assert_ne!(b.as_ref(), c.as_ref()); assert_eq!(c.as_ref().0, "def"); let d = InternedSlice::from_header_and_slice("abc".to_owned(), &[123, 456]); let e = InternedSlice::from_header_and_slice("abc".to_owned(), &[123, 456]); assert_eq!(d, e); assert_eq!(d.to_owned(), e.to_owned()); assert_eq!(d.header.length, 2); assert_eq!(d.header.header, "abc"); assert_eq!(d.slice, [123, 456]); (a, d.to_owned()) }; let mut gc = GarbageCollector::default(); gc.add_slice_storage::(); gc.add_storage::(); unsafe { gc.collect() }; assert_eq!(a.0, "abc"); assert_eq!(d.header.length, 2); assert_eq!(d.header.header, "abc"); assert_eq!(d.slice, [123, 456]); drop(a); drop(d); let mut gc = GarbageCollector::default(); gc.add_slice_storage::(); gc.add_storage::(); unsafe { gc.collect() }; } #[test] fn gc_visit() { #[derive(PartialEq, Eq, Hash)] struct GcInterned(InternedSliceRef<'static, StringSlice>); crate::impl_internable!(gc; GcInterned); impl GcInternedVisit for GcInterned { fn visit_with(&self, gc: &mut GarbageCollector) { _ = gc.mark_interned_slice_alive(self.0); } } crate::impl_slice_internable!(gc; StringSlice, String, i32); type InternedSlice = crate::InternedSlice; impl GcInternedSliceVisit for StringSlice { fn visit_header(_header: &Self::Header, _gc: &mut GarbageCollector) {} fn visit_slice(_header: &[Self::SliceType], _gc: &mut GarbageCollector) {} } let outer = { let inner = InternedSlice::from_header_and_slice("abc".to_owned(), &[123, 456, 789]); Interned::new_gc(GcInterned(inner)).to_owned() }; let mut gc = GarbageCollector::default(); gc.add_slice_storage::(); gc.add_storage::(); unsafe { gc.collect() }; assert_eq!(outer.0.header.header, "abc"); assert_eq!(outer.0.slice, [123, 456, 789]); drop(outer); let mut gc = GarbageCollector::default(); gc.add_slice_storage::(); gc.add_storage::(); unsafe { gc.collect() }; } }