e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
//! A fast, iterative, correct approach to Stackblur, resulting in a very smooth
//! and high-quality output, with no edge bleeding.
//!
//! This crate implements a tweaked version of the Stackblur algorithm requiring
//! `radius * 2 + 2` elements of space rather than `radius * 2 + 1`, which is a
//! small tradeoff for much-increased visual quality.
//!
//! The algorithm is exposed as an iterator ([`StackBlur`]) that can wrap any
//! other iterator that yields elements of [`StackBlurrable`]. The [`StackBlur`]
//! will then yield elements blurred by the specified radius.
//!
//! ## Benefits of this crate
//!
//! Stackblur is essentially constant-time. Regardless of the radius, it always
//! performs only 1 scan over the input iterator and outputs exactly the same
//! amount of elements.
//!
//! Additionally, it produces results that are comparable to slow and expensive
//! Gaussian blurs. As opposed to box blur which uses a basic rolling average,
//! Stackblur uses a weighted average where each output pixel is affected more
//! strongly by the inputs that were closest to it.
//!
//! Despite that, Stackblur does not perform much worse compared to naive box
//! blurs, and is quite cheap compared to full Gaussian blurs, at least for the
//! CPU. The implementation in this crate will most likely beat most unoptimized
//! blurs you can find on crates.io, as well as some optimized ones, and it is
//! extremely flexible and generic.
//!
//! For a full explanation of the improvements made to the Stackblur algorithm,
//! see the [`iter`] module.
//!
//! ## Comparison to the `stackblur` crate
//!
//! `stackblur` suffers from edge bleeding and flexibility problems. For
//! example, it can only operate on buffers of 32-bit integers, and expects them
//! to be packed linear ARGB pixels. Additionally, it cannot operate on a 2D
//! subslice of a buffer (like `imgref` allows for this crate), and it does not
//! offer any streaming iterators or documentation. And it also only supports
//! a blur radius of up to 255.
//!
//! ## Usage
//!
//! Aside from [`StackBlurrable`] and [`StackBlur`] which host their own
//! documentation, there are helper functions like [`blur`] and [`blur_argb`]
//! that can be used to interact with 2D image buffers, due to the fact that
//! doing so manually involves unsafe code (if you want no-copy).

#![feature(portable_simd, stmt_expr_attributes)]
#![cfg_attr(test, feature(test))]

use std::collections::VecDeque;

pub extern crate imgref;

use imgref::ImgRefMut;

#[cfg(test)]
mod test;

pub mod color;
pub mod iter;
pub mod traits;

use color::Argb;
use iter::StackBlur;
use traits::StackBlurrable;

/// Blurs a buffer, assuming one element per pixel.
///
/// The provided closures are used to convert from the buffer's native pixel
/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`].
pub fn blur<T, B: StackBlurrable>(
    buffer: &mut ImgRefMut<T>,
    radius: usize,
    mut to_blurrable: impl FnMut(&T) -> B,
    mut to_pixel: impl FnMut(B) -> T,
) {
    use imgref_iter::iter::{IterWindows, IterWindowsPtrMut};
    use imgref_iter::traits::{ImgIter, ImgIterMut, ImgIterPtrMut};

    let mut ops = VecDeque::new();

    // This is needed to avoid Undefined Behavior. Writing to the rows of the
    // must be done before constructing the columns iterators, because otherwise
    // the writes would invalidate their borrows. However I don't want to
    // duplicate this loop, so make it a closure.
    let mut blur_windows = |writer: IterWindowsPtrMut<T>, reader: IterWindows<T>| {
        for (write, read) in writer.zip(reader) {
            let mut blur = StackBlur::new(read.map(&mut to_blurrable), radius, &mut ops);
            write.for_each(|place| unsafe { *place = to_pixel(blur.next().unwrap()) });
        }
    };

    let buffer_ptr = buffer.as_mut_ptr();
    blur_windows(
        unsafe { buffer_ptr.iter_rows_ptr_mut() },
        buffer.iter_rows(),
    );
    blur_windows(
        unsafe { buffer_ptr.iter_cols_ptr_mut() },
        buffer.iter_cols(),
    );
}

/// Blurs a buffer with SIMD, assuming one element per pixel.
///
/// The provided closures are used to convert from the buffer's native pixel
/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`].
pub fn simd_blur<T, Bsimd: StackBlurrable, Bsingle: StackBlurrable, const LANES: usize>(
    buffer: &mut ImgRefMut<T>,
    radius: usize,
    mut to_blurrable_simd: impl FnMut([&T; LANES]) -> Bsimd,
    mut to_pixel_simd: impl FnMut(Bsimd) -> [T; LANES],
    mut to_blurrable_single: impl FnMut(&T) -> Bsingle,
    mut to_pixel_single: impl FnMut(Bsingle) -> T,
) {
    #[cfg(not(doc))]
    use imgref_iter::iter::{
        SimdIterWindow, SimdIterWindowPtrMut, SimdIterWindows, SimdIterWindowsPtrMut,
    };
    #[cfg(not(doc))]
    use imgref_iter::traits::{ImgIterMut, ImgSimdIter, ImgSimdIterPtrMut};

    let mut ops_simd = VecDeque::new();
    let mut ops_single = VecDeque::new();

    let mut simd_blur_windows =
        |writer: SimdIterWindowsPtrMut<T, LANES>,
         reader: SimdIterWindows<T, LANES>,
         mut ops_simd: VecDeque<Bsimd>,
         mut ops_single: VecDeque<Bsingle>| {
            for (write, read) in writer.zip(reader) {
                match (write, read) {
                    (SimdIterWindowPtrMut::Simd(write), SimdIterWindow::Simd(read)) => {
                        let mut blur =
                            StackBlur::new(read.map(&mut to_blurrable_simd), radius, &mut ops_simd);
                        write.for_each(|place| {
                            place
                                .into_iter()
                                .zip(to_pixel_simd(blur.next().unwrap()))
                                .for_each(|(place, pixel)| unsafe { *place = pixel });
                        });
                    }

                    (SimdIterWindowPtrMut::Single(write), SimdIterWindow::Single(read)) => {
                        let mut blur = StackBlur::new(
                            read.map(&mut to_blurrable_single),
                            radius,
                            &mut ops_single,
                        );
                        write.for_each(|place| unsafe {
                            *place = to_pixel_single(blur.next().unwrap());
                        });
                    }

                    _ => unreachable!(),
                }
            }

            (ops_simd, ops_single)
        };

    let buffer_ptr = buffer.as_mut_ptr();
    (ops_simd, ops_single) = simd_blur_windows(
        unsafe { buffer_ptr.simd_iter_rows_ptr_mut::<LANES>() },
        buffer.simd_iter_rows::<LANES>(),
        ops_simd,
        ops_single,
    );
    simd_blur_windows(
        unsafe { buffer_ptr.simd_iter_cols_ptr_mut::<LANES>() },
        buffer.simd_iter_cols::<LANES>(),
        ops_simd,
        ops_single,
    );
}

/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB).
///
/// This is a version of [`blur`] with pre-filled conversion routines that
/// provide good results for blur radii <= 4096. Larger radii may overflow.
pub fn blur_argb(buffer: &mut ImgRefMut<u32>, radius: usize) {
    blur(buffer, radius, |i| Argb::from(*i), Argb::into);
}

/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB) with SIMD.
///
/// This is a version of [`simd_blur`] with pre-filled conversion routines that
/// provide good results for blur radii <= 4096. Larger radii may overflow.
pub fn simd_blur_argb<const LANES: usize>(buffer: &mut ImgRefMut<u32>, radius: usize) {
    simd_blur(
        buffer,
        radius,
        |i: [&u32; LANES]| Argb::from(i.map(u32::clone)),
        Argb::into,
        |i| Argb::from(*i),
        Argb::into,
    );
}