fast image operations
optimize transposition for powers of two
bendn 2023-09-22
parent 05eb1c8 · commit f74d3dc
-rw-r--r--Cargo.toml2
-rw-r--r--benches/4_128x128.imgbufbin0 -> 65536 bytes
-rw-r--r--benches/4_160x160.imgbufbin102400 -> 0 bytes
-rw-r--r--benches/affine_transformations.rs2
-rw-r--r--src/affine.rs140
-rw-r--r--src/lib.rs15
-rw-r--r--src/small_cat.pngbin91214 -> 75106 bytes
7 files changed, 157 insertions, 2 deletions
diff --git a/Cargo.toml b/Cargo.toml
index d1554b0..245304f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "fimg"
-version = "0.4.1"
+version = "0.4.2"
authors = ["bend-n <[email protected]>"]
license = "MIT"
edition = "2021"
diff --git a/benches/4_128x128.imgbuf b/benches/4_128x128.imgbuf
new file mode 100644
index 0000000..07fffe9
--- /dev/null
+++ b/benches/4_128x128.imgbuf
Binary files differ
diff --git a/benches/4_160x160.imgbuf b/benches/4_160x160.imgbuf
deleted file mode 100644
index 58aabd0..0000000
--- a/benches/4_160x160.imgbuf
+++ /dev/null
Binary files differ
diff --git a/benches/affine_transformations.rs b/benches/affine_transformations.rs
index b74d9ba..74742e1 100644
--- a/benches/affine_transformations.rs
+++ b/benches/affine_transformations.rs
@@ -4,7 +4,7 @@ macro_rules! bench {
(fn $name: ident() { run $fn: ident() }) => {
fn $name() {
let mut img: Image<_, 4> =
- Image::build(160, 160).buf(include_bytes!("4_160x160.imgbuf").to_vec());
+ Image::build(128, 128).buf(include_bytes!("4_128x128.imgbuf").to_vec());
for _ in 0..256 {
#[allow(unused_unsafe)]
unsafe {
diff --git a/src/affine.rs b/src/affine.rs
index 362a746..5ec795a 100644
--- a/src/affine.rs
+++ b/src/affine.rs
@@ -139,6 +139,22 @@ impl<const CHANNELS: usize> Image<&mut [u8], CHANNELS> {
/// UB if supplied image rectangular
unsafe fn transpose<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>) {
debug_assert_eq!(img.width(), img.height());
+ if img.width().is_power_of_two() {
+ // SAFETY: caller gurantees
+ unsafe { transpose_diag(img, 0, img.width() as usize) };
+ } else {
+ // SAFETY: caller gurantees
+ unsafe { transpose_non_power_of_two(img) };
+ }
+}
+
+/// Transpose a square (non power of two) image.
+///
+/// # Safety
+///
+/// UB if image not square
+unsafe fn transpose_non_power_of_two<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>) {
+ debug_assert_eq!(img.width(), img.height());
let size = img.width() as usize;
// SAFETY: no half pixels
let b = unsafe { img.buffer.as_chunks_unchecked_mut::<CHANNELS>() };
@@ -152,12 +168,136 @@ unsafe fn transpose<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>)
}
}
+/// break it down until
+const TILE: usize = 4;
+/// # Safety
+///
+/// be careful
+unsafe fn transpose_tile<const CHANNELS: usize>(
+ img: &mut Image<&mut [u8], CHANNELS>,
+ row: usize,
+ col: usize,
+ size: usize,
+) {
+ if size > TILE {
+ #[allow(
+ clippy::multiple_unsafe_ops_per_block,
+ clippy::undocumented_unsafe_blocks
+ )]
+ unsafe {
+ // top left
+ transpose_tile(img, row, col, size / 2);
+ // top right
+ transpose_tile(img, row, col + size / 2, size / 2);
+ // bottom left
+ transpose_tile(img, row + size / 2, col, size / 2);
+ // bottom right
+ transpose_tile(img, row + size / 2, col + size / 2, size / 2);
+ }
+ } else {
+ let s = img.width() as usize;
+ let b = img.flatten_mut();
+ for i in 0..size {
+ for j in 0..size {
+ // SAFETY: this should be okay if we careful
+ unsafe { b.swap_unchecked((row + i) * s + (col + j), (col + j) * s + (row + i)) };
+ }
+ }
+ }
+}
+
+/// # Safety
+///
+/// be careful
+unsafe fn transpose_diag<const CHANNELS: usize>(
+ img: &mut Image<&mut [u8], CHANNELS>,
+ pos: usize,
+ size: usize,
+) {
+ if size > TILE {
+ #[allow(
+ clippy::multiple_unsafe_ops_per_block,
+ clippy::undocumented_unsafe_blocks
+ )]
+ unsafe {
+ transpose_diag(img, pos, size / 2);
+ transpose_tile(img, pos, pos + size / 2, size / 2);
+ transpose_diag(img, pos + size / 2, size / 2);
+ }
+ } else {
+ let s = img.width() as usize;
+ let b = img.flatten_mut();
+ for i in 1..size {
+ for j in 0..i {
+ // SAFETY: this is fine unless pos / size is out of bounds, which it cant be
+ unsafe { b.swap_unchecked((pos + i) * s + (pos + j), (pos + j) * s + (pos + i)) };
+ }
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
use crate::img;
#[test]
+ fn transp() {
+ #[rustfmt::skip]
+ let mut i = Image::<_, 1>::build(8, 8).buf(vec![
+ 0, 0, 1, 1, 0, 0, 1, 1,
+ 0, 1, 0, 1, 1, 0, 1, 1,
+ 0, 1, 1, 0, 1, 0, 1, 1,
+ 0, 1, 1, 1, 0, 0, 1, 1,
+ 0, 1, 1, 1, 1, 0, 1, 1,
+ 0, 1, 1, 1, 1, 0, 0, 1,
+ 0, 1, 1, 1, 1, 0, 1, 0,
+ 0, 0, 1, 1, 1, 0, 1, 1,
+ ]);
+ unsafe { transpose(&mut i.as_mut()) };
+ #[rustfmt::skip]
+ assert_eq!(i.take_buffer(), vec![
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 0,
+ 1, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 1, 1, 1, 1, 1,
+ 0, 1, 1, 0, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 0, 1, 1,
+ 1, 1, 1, 1, 1, 1, 0, 1
+ ]);
+ }
+
+ #[test]
+ fn transp9() {
+ #[rustfmt::skip]
+ let mut i = Image::<_, 1>::build(9, 9).buf(vec![
+ 0, 0, 1, 1, 0, 0, 1, 1, 0,
+ 0, 1, 0, 1, 1, 0, 1, 1, 1,
+ 0, 1, 1, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 1, 0, 0, 1, 1, 0,
+ 0, 1, 1, 1, 1, 0, 1, 1, 1,
+ 0, 1, 1, 1, 1, 0, 0, 1, 1,
+ 0, 1, 1, 1, 1, 0, 1, 0, 1,
+ 0, 0, 1, 1, 1, 0, 1, 1, 0,
+ 1, 1, 1, 0, 1, 1, 0, 1, 0,
+ ]);
+ unsafe { transpose(&mut i.as_mut()) };
+ #[rustfmt::skip]
+ assert_eq!(i.take_buffer(), vec![
+ 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 1, 1, 1, 1, 1, 1, 0, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 1, 1, 1, 1, 1, 0,
+ 0, 1, 1, 0, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 1, 1, 1, 1, 1, 0, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 0, 1, 1,
+ 0, 1, 0, 0, 1, 1, 1, 0, 0
+ ]);
+ }
+
+ #[test]
fn rotate_90() {
let mut from = img![
[00, 01]
diff --git a/src/lib.rs b/src/lib.rs
index 9de0643..9388a3f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,6 +3,7 @@
//! Provides fast image operations, such as rotation, flipping, and overlaying.
#![feature(
slice_swap_unchecked,
+ stmt_expr_attributes,
generic_const_exprs,
slice_as_chunks,
unchecked_math,
@@ -218,6 +219,13 @@ impl<T: std::ops::Deref<Target = [u8]>, const CHANNELS: usize> Image<T, CHANNELS
self.buffer.array_chunks::<CHANNELS>()
}
+ #[inline]
+ /// Flatten the chunks of this image into a slice of slices.
+ pub fn flatten(&mut self) -> &[[u8; CHANNELS]] {
+ // SAFETY: buffer cannot have half pixels
+ unsafe { self.buffer.as_chunks_unchecked::<CHANNELS>() }
+ }
+
/// Return a pixel at (x, y).
/// # Safety
///
@@ -258,6 +266,13 @@ impl<T: std::ops::DerefMut<Target = [u8]>, const CHANNELS: usize> Image<T, CHANN
self.buffer.array_chunks_mut::<CHANNELS>()
}
+ #[inline]
+ /// Flatten the chunks of this image into a mutable slice of slices.
+ pub fn flatten_mut(&mut self) -> &mut [[u8; CHANNELS]] {
+ // SAFETY: buffer cannot have half pixels
+ unsafe { self.buffer.as_chunks_unchecked_mut::<CHANNELS>() }
+ }
+
/// Set the pixel at x, y
///
/// # Safety
diff --git a/src/small_cat.png b/src/small_cat.png
index 3b30807..d39b082 100644
--- a/src/small_cat.png
+++ b/src/small_cat.png
Binary files differ