| -rw-r--r-- | src/lib.rs | 82 |
1 files changed, 79 insertions, 3 deletions
@@ -507,8 +507,12 @@ fn into( color: [u8; 3], ) { type u8x24 = Simd<u8, 24>; + type u8x12 = Simd<u8, 12>; + type u8x6 = u8x6; use std::simd::prelude::*; let c = u8x24::from_array([color; 8].flatten()).cast::<u16>(); + let c2 = u8x12::from_array([color; 4].flatten()).cast::<u16>(); + let c3 = u8x6::from_array([color; 2].flatten()).cast::<u16>(); for y in 0..with.height() { let mut wx_ = 0; while with.width() - wx_ >= 8 { @@ -526,8 +530,8 @@ fn into( ) .flatten(); - let mask = - simd_swizzle!(first8, BGR_DISCARD_ALPHA).cast::<u16>(); + let mask: u8x24 = simd_swizzle!(first8, BGR_DISCARD_ALPHA); + let mask = mask.cast(); let to_b = i .pixels_mut((x_ + wx_..x_ + wx_ + 8, y + y_)) .as_flattened_mut(); @@ -541,8 +545,71 @@ fn into( wx_ += 8; }; } + while with.width() - wx_ >= 4 { + unsafe { + // 0..32 + let first8 = u8x16::from_array( + with.pixels((wx_..wx_ + 4, y)) + .as_array::<4>() + .unwrap_unchecked() + .flatten(), + ); + const BGR_DISCARD_ALPHA: [usize; 12] = car::map!( + range::<16>().chunked::<4>(), + |[r, g, b, _]| [b, g, r] + ) + .flatten(); + + let mask: u8x12 = simd_swizzle!(first8, BGR_DISCARD_ALPHA); + let mask = mask.cast(); + let to_b = i + .pixels_mut((x_ + wx_..x_ + wx_ + 4, y + y_)) + .as_flattened_mut(); + let to = u8x12::load_or_default(to_b).cast::<u16>(); + let result: u8x12 = ((c2 * mask + + (Simd::splat(255) - mask) * to.cast()) + / Simd::splat(255)) + .cast::<u8>(); + result.store_select(to_b, Mask::from_bitmask(!0)); + + wx_ += 4; + }; + } + while with.width() - wx_ >= 2 { + unsafe { + // 0..32 + let first8 = u8x8::from_array( + with.pixels((wx_..wx_ + 2, y)) + .as_array::<2>() + .unwrap_unchecked() + .flatten(), + ); + const BGR_DISCARD_ALPHA: [usize; 6] = car::map!( + range::<8>().chunked::<4>(), + |[r, g, b, _]| [b, g, r] + ) + .flatten(); + + let mask: u8x6 = simd_swizzle!(first8, BGR_DISCARD_ALPHA); + let mask = mask.cast(); + let to_b = i + .pixels_mut((x_ + wx_..x_ + wx_ + 4, y + y_)) + .as_flattened_mut(); + let to = u8x6::load_or_default(to_b).cast::<u16>(); + let result: u8x6 = ((c2 * mask + + (Simd::splat(255) - mask) * to.cast()) + / Simd::splat(255)) + .cast::<u8>(); + result.store_select(to_b, Mask::from_bitmask(!0)); + + wx_ += 2; + }; + } - for x in wx_..with.width() { + let n = with.width() - wx_; + assert!(n < 4); + for k in 0..n { + let x = k + wx_; let d = unsafe { with.pixel(x, y) }; let x = unsafe { i.pixel_mut(x.wrapping_add(x_), y.wrapping_add(y_)) @@ -550,6 +617,15 @@ fn into( let mask = d.init(); blend(mask, color, x); } + // assert!(with.width() - wx_ < 4); + // for x in wx_..with.width() { + // let d = unsafe { with.pixel(x, y) }; + // let x = unsafe { + // i.pixel_mut(x.wrapping_add(x_), y.wrapping_add(y_)) + // }; + // let mask = d.init(); + // blend(mask, color, x); + // } } } |