fast image operations
optimize transposition for powers of two
| -rw-r--r-- | Cargo.toml | 2 | ||||
| -rw-r--r-- | benches/4_128x128.imgbuf | bin | 0 -> 65536 bytes | |||
| -rw-r--r-- | benches/4_160x160.imgbuf | bin | 102400 -> 0 bytes | |||
| -rw-r--r-- | benches/affine_transformations.rs | 2 | ||||
| -rw-r--r-- | src/affine.rs | 140 | ||||
| -rw-r--r-- | src/lib.rs | 15 | ||||
| -rw-r--r-- | src/small_cat.png | bin | 91214 -> 75106 bytes |
7 files changed, 157 insertions, 2 deletions
@@ -1,6 +1,6 @@ [package] name = "fimg" -version = "0.4.1" +version = "0.4.2" authors = ["bend-n <[email protected]>"] license = "MIT" edition = "2021" diff --git a/benches/4_128x128.imgbuf b/benches/4_128x128.imgbuf Binary files differnew file mode 100644 index 0000000..07fffe9 --- /dev/null +++ b/benches/4_128x128.imgbuf diff --git a/benches/4_160x160.imgbuf b/benches/4_160x160.imgbuf Binary files differdeleted file mode 100644 index 58aabd0..0000000 --- a/benches/4_160x160.imgbuf +++ /dev/null diff --git a/benches/affine_transformations.rs b/benches/affine_transformations.rs index b74d9ba..74742e1 100644 --- a/benches/affine_transformations.rs +++ b/benches/affine_transformations.rs @@ -4,7 +4,7 @@ macro_rules! bench { (fn $name: ident() { run $fn: ident() }) => { fn $name() { let mut img: Image<_, 4> = - Image::build(160, 160).buf(include_bytes!("4_160x160.imgbuf").to_vec()); + Image::build(128, 128).buf(include_bytes!("4_128x128.imgbuf").to_vec()); for _ in 0..256 { #[allow(unused_unsafe)] unsafe { diff --git a/src/affine.rs b/src/affine.rs index 362a746..5ec795a 100644 --- a/src/affine.rs +++ b/src/affine.rs @@ -139,6 +139,22 @@ impl<const CHANNELS: usize> Image<&mut [u8], CHANNELS> { /// UB if supplied image rectangular unsafe fn transpose<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>) { debug_assert_eq!(img.width(), img.height()); + if img.width().is_power_of_two() { + // SAFETY: caller gurantees + unsafe { transpose_diag(img, 0, img.width() as usize) }; + } else { + // SAFETY: caller gurantees + unsafe { transpose_non_power_of_two(img) }; + } +} + +/// Transpose a square (non power of two) image. +/// +/// # Safety +/// +/// UB if image not square +unsafe fn transpose_non_power_of_two<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>) { + debug_assert_eq!(img.width(), img.height()); let size = img.width() as usize; // SAFETY: no half pixels let b = unsafe { img.buffer.as_chunks_unchecked_mut::<CHANNELS>() }; @@ -152,12 +168,136 @@ unsafe fn transpose<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>) } } +/// break it down until +const TILE: usize = 4; +/// # Safety +/// +/// be careful +unsafe fn transpose_tile<const CHANNELS: usize>( + img: &mut Image<&mut [u8], CHANNELS>, + row: usize, + col: usize, + size: usize, +) { + if size > TILE { + #[allow( + clippy::multiple_unsafe_ops_per_block, + clippy::undocumented_unsafe_blocks + )] + unsafe { + // top left + transpose_tile(img, row, col, size / 2); + // top right + transpose_tile(img, row, col + size / 2, size / 2); + // bottom left + transpose_tile(img, row + size / 2, col, size / 2); + // bottom right + transpose_tile(img, row + size / 2, col + size / 2, size / 2); + } + } else { + let s = img.width() as usize; + let b = img.flatten_mut(); + for i in 0..size { + for j in 0..size { + // SAFETY: this should be okay if we careful + unsafe { b.swap_unchecked((row + i) * s + (col + j), (col + j) * s + (row + i)) }; + } + } + } +} + +/// # Safety +/// +/// be careful +unsafe fn transpose_diag<const CHANNELS: usize>( + img: &mut Image<&mut [u8], CHANNELS>, + pos: usize, + size: usize, +) { + if size > TILE { + #[allow( + clippy::multiple_unsafe_ops_per_block, + clippy::undocumented_unsafe_blocks + )] + unsafe { + transpose_diag(img, pos, size / 2); + transpose_tile(img, pos, pos + size / 2, size / 2); + transpose_diag(img, pos + size / 2, size / 2); + } + } else { + let s = img.width() as usize; + let b = img.flatten_mut(); + for i in 1..size { + for j in 0..i { + // SAFETY: this is fine unless pos / size is out of bounds, which it cant be + unsafe { b.swap_unchecked((pos + i) * s + (pos + j), (pos + j) * s + (pos + i)) }; + } + } + } +} + #[cfg(test)] mod tests { use super::*; use crate::img; #[test] + fn transp() { + #[rustfmt::skip] + let mut i = Image::<_, 1>::build(8, 8).buf(vec![ + 0, 0, 1, 1, 0, 0, 1, 1, + 0, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 0, 1, 1, + 0, 1, 1, 1, 0, 0, 1, 1, + 0, 1, 1, 1, 1, 0, 1, 1, + 0, 1, 1, 1, 1, 0, 0, 1, + 0, 1, 1, 1, 1, 0, 1, 0, + 0, 0, 1, 1, 1, 0, 1, 1, + ]); + unsafe { transpose(&mut i.as_mut()) }; + #[rustfmt::skip] + assert_eq!(i.take_buffer(), vec![ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 0, + 1, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 1, 1, 1, 1, 1, + 0, 1, 1, 0, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 1 + ]); + } + + #[test] + fn transp9() { + #[rustfmt::skip] + let mut i = Image::<_, 1>::build(9, 9).buf(vec![ + 0, 0, 1, 1, 0, 0, 1, 1, 0, + 0, 1, 0, 1, 1, 0, 1, 1, 1, + 0, 1, 1, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 1, 0, 0, 1, 1, 0, + 0, 1, 1, 1, 1, 0, 1, 1, 1, + 0, 1, 1, 1, 1, 0, 0, 1, 1, + 0, 1, 1, 1, 1, 0, 1, 0, 1, + 0, 0, 1, 1, 1, 0, 1, 1, 0, + 1, 1, 1, 0, 1, 1, 0, 1, 0, + ]); + unsafe { transpose(&mut i.as_mut()) }; + #[rustfmt::skip] + assert_eq!(i.take_buffer(), vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 0, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 1, 1, 1, 1, 1, 0, + 0, 1, 1, 0, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 0, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 0, 1, 1, + 0, 1, 0, 0, 1, 1, 1, 0, 0 + ]); + } + + #[test] fn rotate_90() { let mut from = img![ [00, 01] @@ -3,6 +3,7 @@ //! Provides fast image operations, such as rotation, flipping, and overlaying. #![feature( slice_swap_unchecked, + stmt_expr_attributes, generic_const_exprs, slice_as_chunks, unchecked_math, @@ -218,6 +219,13 @@ impl<T: std::ops::Deref<Target = [u8]>, const CHANNELS: usize> Image<T, CHANNELS self.buffer.array_chunks::<CHANNELS>() } + #[inline] + /// Flatten the chunks of this image into a slice of slices. + pub fn flatten(&mut self) -> &[[u8; CHANNELS]] { + // SAFETY: buffer cannot have half pixels + unsafe { self.buffer.as_chunks_unchecked::<CHANNELS>() } + } + /// Return a pixel at (x, y). /// # Safety /// @@ -258,6 +266,13 @@ impl<T: std::ops::DerefMut<Target = [u8]>, const CHANNELS: usize> Image<T, CHANN self.buffer.array_chunks_mut::<CHANNELS>() } + #[inline] + /// Flatten the chunks of this image into a mutable slice of slices. + pub fn flatten_mut(&mut self) -> &mut [[u8; CHANNELS]] { + // SAFETY: buffer cannot have half pixels + unsafe { self.buffer.as_chunks_unchecked_mut::<CHANNELS>() } + } + /// Set the pixel at x, y /// /// # Safety diff --git a/src/small_cat.png b/src/small_cat.png Binary files differindex 3b30807..d39b082 100644 --- a/src/small_cat.png +++ b/src/small_cat.png |