//! A fast, iterative, correct approach to Stackblur, resulting in a very smooth
//! and high-quality output, with no edge bleeding.
//!
//! This crate implements a tweaked version of the Stackblur algorithm requiring
//! `radius * 2 + 2` elements of space rather than `radius * 2 + 1`, which is a
//! small tradeoff for much-increased visual quality.
//!
//! The algorithm is exposed as an iterator ([`StackBlur`]) that can wrap any
//! other iterator that yields elements of [`StackBlurrable`]. The [`StackBlur`]
//! will then yield elements blurred by the specified radius.
//!
//! ## Benefits of this crate
//!
//! Stackblur is essentially constant-time. Regardless of the radius, it always
//! performs only 1 scan over the input iterator and outputs exactly the same
//! amount of elements.
//!
//! Additionally, it produces results that are comparable to slow and expensive
//! Gaussian blurs. As opposed to box blur which uses a basic rolling average,
//! Stackblur uses a weighted average where each output pixel is affected more
//! strongly by the inputs that were closest to it.
//!
//! Despite that, Stackblur does not perform much worse compared to naive box
//! blurs, and is quite cheap compared to full Gaussian blurs, at least for the
//! CPU. The implementation in this crate will most likely beat most unoptimized
//! blurs you can find on crates.io, as well as some optimized ones, and it is
//! extremely flexible and generic.
//!
//! For a full explanation of the improvements made to the Stackblur algorithm,
//! see the [`iter`] module.
//!
//! ## Comparison to the `stackblur` crate
//!
//! `stackblur` suffers from edge bleeding and flexibility problems. For
//! example, it can only operate on buffers of 32-bit integers, and expects them
//! to be packed linear ARGB pixels. Additionally, it cannot operate on a 2D
//! subslice of a buffer (like `imgref` allows for this crate), and it does not
//! offer any streaming iterators or documentation. And it also only supports
//! a blur radius of up to 255.
//!
//! ## Usage
//!
//! Aside from [`StackBlurrable`] and [`StackBlur`] which host their own
//! documentation, there are helper functions like [`blur`] and [`blur_argb`]
//! that can be used to interact with 2D image buffers, due to the fact that
//! doing so manually involves unsafe code (if you want no-copy).
#![feature(portable_simd, stmt_expr_attributes)]
#![cfg_attr(test, feature(test))]
use std::collections::VecDeque;
pub extern crate imgref;
use imgref::ImgRefMut;
#[cfg(test)]
mod test;
pub mod color;
pub mod iter;
pub mod traits;
use color::Argb;
use iter::StackBlur;
use traits::StackBlurrable;
/// Blurs a buffer, assuming one element per pixel.
///
/// The provided closures are used to convert from the buffer's native pixel
/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`].
pub fn blur<T, B: StackBlurrable>(
buffer: &mut ImgRefMut<T>,
radius: usize,
mut to_blurrable: impl FnMut(&T) -> B,
mut to_pixel: impl FnMut(B) -> T,
) {
use imgref_iter::iter::{IterWindows, IterWindowsPtrMut};
use imgref_iter::traits::{ImgIter, ImgIterMut, ImgIterPtrMut};
let mut ops = VecDeque::new();
// This is needed to avoid Undefined Behavior. Writing to the rows of the
// must be done before constructing the columns iterators, because otherwise
// the writes would invalidate their borrows. However I don't want to
// duplicate this loop, so make it a closure.
let mut blur_windows = |writer: IterWindowsPtrMut<T>, reader: IterWindows<T>| {
for (write, read) in writer.zip(reader) {
let mut blur = StackBlur::new(read.map(&mut to_blurrable), radius, &mut ops);
write.for_each(|place| unsafe { *place = to_pixel(blur.next().unwrap()) });
}
};
let buffer_ptr = buffer.as_mut_ptr();
blur_windows(
unsafe { buffer_ptr.iter_rows_ptr_mut() },
buffer.iter_rows(),
);
blur_windows(
unsafe { buffer_ptr.iter_cols_ptr_mut() },
buffer.iter_cols(),
);
}
/// Blurs a buffer with SIMD, assuming one element per pixel.
///
/// The provided closures are used to convert from the buffer's native pixel
/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`].
pub fn simd_blur<T, Bsimd: StackBlurrable, Bsingle: StackBlurrable, const LANES: usize>(
buffer: &mut ImgRefMut<T>,
radius: usize,
mut to_blurrable_simd: impl FnMut([&T; LANES]) -> Bsimd,
mut to_pixel_simd: impl FnMut(Bsimd) -> [T; LANES],
mut to_blurrable_single: impl FnMut(&T) -> Bsingle,
mut to_pixel_single: impl FnMut(Bsingle) -> T,
) {
#[cfg(not(doc))]
use imgref_iter::iter::{
SimdIterWindow, SimdIterWindowPtrMut, SimdIterWindows, SimdIterWindowsPtrMut,
};
#[cfg(not(doc))]
use imgref_iter::traits::{ImgIterMut, ImgSimdIter, ImgSimdIterPtrMut};
let mut ops_simd = VecDeque::new();
let mut ops_single = VecDeque::new();
let mut simd_blur_windows =
|writer: SimdIterWindowsPtrMut<T, LANES>,
reader: SimdIterWindows<T, LANES>,
mut ops_simd: VecDeque<Bsimd>,
mut ops_single: VecDeque<Bsingle>| {
for (write, read) in writer.zip(reader) {
match (write, read) {
(SimdIterWindowPtrMut::Simd(write), SimdIterWindow::Simd(read)) => {
let mut blur =
StackBlur::new(read.map(&mut to_blurrable_simd), radius, &mut ops_simd);
write.for_each(|place| {
place
.into_iter()
.zip(to_pixel_simd(blur.next().unwrap()))
.for_each(|(place, pixel)| unsafe { *place = pixel });
});
}
(SimdIterWindowPtrMut::Single(write), SimdIterWindow::Single(read)) => {
let mut blur = StackBlur::new(
read.map(&mut to_blurrable_single),
radius,
&mut ops_single,
);
write.for_each(|place| unsafe {
*place = to_pixel_single(blur.next().unwrap());
});
}
_ => unreachable!(),
}
}
(ops_simd, ops_single)
};
let buffer_ptr = buffer.as_mut_ptr();
(ops_simd, ops_single) = simd_blur_windows(
unsafe { buffer_ptr.simd_iter_rows_ptr_mut::<LANES>() },
buffer.simd_iter_rows::<LANES>(),
ops_simd,
ops_single,
);
simd_blur_windows(
unsafe { buffer_ptr.simd_iter_cols_ptr_mut::<LANES>() },
buffer.simd_iter_cols::<LANES>(),
ops_simd,
ops_single,
);
}
/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB).
///
/// This is a version of [`blur`] with pre-filled conversion routines that
/// provide good results for blur radii <= 4096. Larger radii may overflow.
pub fn blur_argb(buffer: &mut ImgRefMut<u32>, radius: usize) {
blur(buffer, radius, |i| Argb::from(*i), Argb::into);
}
/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB) with SIMD.
///
/// This is a version of [`simd_blur`] with pre-filled conversion routines that
/// provide good results for blur radii <= 4096. Larger radii may overflow.
pub fn simd_blur_argb<const LANES: usize>(buffer: &mut ImgRefMut<u32>, radius: usize) {
simd_blur(
buffer,
radius,
|i: [&u32; LANES]| Argb::from(i.map(u32::clone)),
Argb::into,
|i| Argb::from(*i),
Argb::into,
);
}