e
Diffstat (limited to 'src/lib.rs')
| -rwxr-xr-x | src/lib.rs | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs new file mode 100755 index 0000000..6cf1b17 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,205 @@ +//! A fast, iterative, correct approach to Stackblur, resulting in a very smooth +//! and high-quality output, with no edge bleeding. +//! +//! This crate implements a tweaked version of the Stackblur algorithm requiring +//! `radius * 2 + 2` elements of space rather than `radius * 2 + 1`, which is a +//! small tradeoff for much-increased visual quality. +//! +//! The algorithm is exposed as an iterator ([`StackBlur`]) that can wrap any +//! other iterator that yields elements of [`StackBlurrable`]. The [`StackBlur`] +//! will then yield elements blurred by the specified radius. +//! +//! ## Benefits of this crate +//! +//! Stackblur is essentially constant-time. Regardless of the radius, it always +//! performs only 1 scan over the input iterator and outputs exactly the same +//! amount of elements. +//! +//! Additionally, it produces results that are comparable to slow and expensive +//! Gaussian blurs. As opposed to box blur which uses a basic rolling average, +//! Stackblur uses a weighted average where each output pixel is affected more +//! strongly by the inputs that were closest to it. +//! +//! Despite that, Stackblur does not perform much worse compared to naive box +//! blurs, and is quite cheap compared to full Gaussian blurs, at least for the +//! CPU. The implementation in this crate will most likely beat most unoptimized +//! blurs you can find on crates.io, as well as some optimized ones, and it is +//! extremely flexible and generic. +//! +//! For a full explanation of the improvements made to the Stackblur algorithm, +//! see the [`iter`] module. +//! +//! ## Comparison to the `stackblur` crate +//! +//! `stackblur` suffers from edge bleeding and flexibility problems. For +//! example, it can only operate on buffers of 32-bit integers, and expects them +//! to be packed linear ARGB pixels. Additionally, it cannot operate on a 2D +//! subslice of a buffer (like `imgref` allows for this crate), and it does not +//! offer any streaming iterators or documentation. And it also only supports +//! a blur radius of up to 255. +//! +//! ## Usage +//! +//! Aside from [`StackBlurrable`] and [`StackBlur`] which host their own +//! documentation, there are helper functions like [`blur`] and [`blur_argb`] +//! that can be used to interact with 2D image buffers, due to the fact that +//! doing so manually involves unsafe code (if you want no-copy). + +#![feature(portable_simd, stmt_expr_attributes)] +#![cfg_attr(test, feature(test))] + +use std::collections::VecDeque; +use std::simd::{LaneCount, SupportedLaneCount}; + +pub extern crate imgref; + +use imgref::ImgRefMut; + +#[cfg(test)] +mod test; + +pub mod color; +pub mod iter; +pub mod traits; + +use color::Argb; +use iter::StackBlur; +use traits::StackBlurrable; + +/// Blurs a buffer, assuming one element per pixel. +/// +/// The provided closures are used to convert from the buffer's native pixel +/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`]. +pub fn blur<T, B: StackBlurrable>( + buffer: &mut ImgRefMut<T>, + radius: usize, + mut to_blurrable: impl FnMut(&T) -> B, + mut to_pixel: impl FnMut(B) -> T, +) { + use imgref_iter::iter::{IterWindows, IterWindowsPtrMut}; + use imgref_iter::traits::{ImgIter, ImgIterMut, ImgIterPtrMut}; + + let mut ops = VecDeque::new(); + + // This is needed to avoid Undefined Behavior. Writing to the rows of the + // must be done before constructing the columns iterators, because otherwise + // the writes would invalidate their borrows. However I don't want to + // duplicate this loop, so make it a closure. + let mut blur_windows = |writer: IterWindowsPtrMut<T>, reader: IterWindows<T>| { + for (write, read) in writer.zip(reader) { + let mut blur = StackBlur::new(read.map(&mut to_blurrable), radius, &mut ops); + write.for_each(|place| unsafe { *place = to_pixel(blur.next().unwrap()) }); + } + }; + + let buffer_ptr = buffer.as_mut_ptr(); + blur_windows( + unsafe { buffer_ptr.iter_rows_ptr_mut() }, + buffer.iter_rows(), + ); + blur_windows( + unsafe { buffer_ptr.iter_cols_ptr_mut() }, + buffer.iter_cols(), + ); +} + +/// Blurs a buffer with SIMD, assuming one element per pixel. +/// +/// The provided closures are used to convert from the buffer's native pixel +/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`]. +pub fn simd_blur<T, Bsimd: StackBlurrable, Bsingle: StackBlurrable, const LANES: usize>( + buffer: &mut ImgRefMut<T>, + radius: usize, + mut to_blurrable_simd: impl FnMut([&T; LANES]) -> Bsimd, + mut to_pixel_simd: impl FnMut(Bsimd) -> [T; LANES], + mut to_blurrable_single: impl FnMut(&T) -> Bsingle, + mut to_pixel_single: impl FnMut(Bsingle) -> T, +) where + LaneCount<LANES>: SupportedLaneCount, +{ + #[cfg(not(doc))] + use imgref_iter::iter::{ + SimdIterWindow, SimdIterWindowPtrMut, SimdIterWindows, SimdIterWindowsPtrMut, + }; + #[cfg(not(doc))] + use imgref_iter::traits::{ImgIterMut, ImgSimdIter, ImgSimdIterPtrMut}; + + let mut ops_simd = VecDeque::new(); + let mut ops_single = VecDeque::new(); + + let mut simd_blur_windows = + |writer: SimdIterWindowsPtrMut<T, LANES>, + reader: SimdIterWindows<T, LANES>, + mut ops_simd: VecDeque<Bsimd>, + mut ops_single: VecDeque<Bsingle>| { + for (write, read) in writer.zip(reader) { + match (write, read) { + (SimdIterWindowPtrMut::Simd(write), SimdIterWindow::Simd(read)) => { + let mut blur = + StackBlur::new(read.map(&mut to_blurrable_simd), radius, &mut ops_simd); + write.for_each(|place| { + place + .into_iter() + .zip(to_pixel_simd(blur.next().unwrap())) + .for_each(|(place, pixel)| unsafe { *place = pixel }); + }); + } + + (SimdIterWindowPtrMut::Single(write), SimdIterWindow::Single(read)) => { + let mut blur = StackBlur::new( + read.map(&mut to_blurrable_single), + radius, + &mut ops_single, + ); + write.for_each(|place| unsafe { + *place = to_pixel_single(blur.next().unwrap()); + }); + } + + _ => unreachable!(), + } + } + + (ops_simd, ops_single) + }; + + let buffer_ptr = buffer.as_mut_ptr(); + (ops_simd, ops_single) = simd_blur_windows( + unsafe { buffer_ptr.simd_iter_rows_ptr_mut::<LANES>() }, + buffer.simd_iter_rows::<LANES>(), + ops_simd, + ops_single, + ); + simd_blur_windows( + unsafe { buffer_ptr.simd_iter_cols_ptr_mut::<LANES>() }, + buffer.simd_iter_cols::<LANES>(), + ops_simd, + ops_single, + ); +} + +/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB). +/// +/// This is a version of [`blur`] with pre-filled conversion routines that +/// provide good results for blur radii <= 4096. Larger radii may overflow. +pub fn blur_argb(buffer: &mut ImgRefMut<u32>, radius: usize) { + blur(buffer, radius, |i| Argb::from(*i), Argb::into); +} + +/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB) with SIMD. +/// +/// This is a version of [`simd_blur`] with pre-filled conversion routines that +/// provide good results for blur radii <= 4096. Larger radii may overflow. +pub fn simd_blur_argb<const LANES: usize>(buffer: &mut ImgRefMut<u32>, radius: usize) +where + LaneCount<LANES>: SupportedLaneCount, +{ + simd_blur( + buffer, + radius, + |i: [&u32; LANES]| Argb::from(i.map(u32::clone)), + Argb::into, + |i| Argb::from(*i), + Argb::into, + ); +} |