icing?
bendn 5 weeks ago
parent d36d9fe · commit 74049a9
-rw-r--r--src/lib.rs82
1 files changed, 79 insertions, 3 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 173bbb2..f7bbac4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -507,8 +507,12 @@ fn into(
color: [u8; 3],
) {
type u8x24 = Simd<u8, 24>;
+ type u8x12 = Simd<u8, 12>;
+ type u8x6 = u8x6;
use std::simd::prelude::*;
let c = u8x24::from_array([color; 8].flatten()).cast::<u16>();
+ let c2 = u8x12::from_array([color; 4].flatten()).cast::<u16>();
+ let c3 = u8x6::from_array([color; 2].flatten()).cast::<u16>();
for y in 0..with.height() {
let mut wx_ = 0;
while with.width() - wx_ >= 8 {
@@ -526,8 +530,8 @@ fn into(
)
.flatten();
- let mask =
- simd_swizzle!(first8, BGR_DISCARD_ALPHA).cast::<u16>();
+ let mask: u8x24 = simd_swizzle!(first8, BGR_DISCARD_ALPHA);
+ let mask = mask.cast();
let to_b = i
.pixels_mut((x_ + wx_..x_ + wx_ + 8, y + y_))
.as_flattened_mut();
@@ -541,8 +545,71 @@ fn into(
wx_ += 8;
};
}
+ while with.width() - wx_ >= 4 {
+ unsafe {
+ // 0..32
+ let first8 = u8x16::from_array(
+ with.pixels((wx_..wx_ + 4, y))
+ .as_array::<4>()
+ .unwrap_unchecked()
+ .flatten(),
+ );
+ const BGR_DISCARD_ALPHA: [usize; 12] = car::map!(
+ range::<16>().chunked::<4>(),
+ |[r, g, b, _]| [b, g, r]
+ )
+ .flatten();
+
+ let mask: u8x12 = simd_swizzle!(first8, BGR_DISCARD_ALPHA);
+ let mask = mask.cast();
+ let to_b = i
+ .pixels_mut((x_ + wx_..x_ + wx_ + 4, y + y_))
+ .as_flattened_mut();
+ let to = u8x12::load_or_default(to_b).cast::<u16>();
+ let result: u8x12 = ((c2 * mask
+ + (Simd::splat(255) - mask) * to.cast())
+ / Simd::splat(255))
+ .cast::<u8>();
+ result.store_select(to_b, Mask::from_bitmask(!0));
+
+ wx_ += 4;
+ };
+ }
+ while with.width() - wx_ >= 2 {
+ unsafe {
+ // 0..32
+ let first8 = u8x8::from_array(
+ with.pixels((wx_..wx_ + 2, y))
+ .as_array::<2>()
+ .unwrap_unchecked()
+ .flatten(),
+ );
+ const BGR_DISCARD_ALPHA: [usize; 6] = car::map!(
+ range::<8>().chunked::<4>(),
+ |[r, g, b, _]| [b, g, r]
+ )
+ .flatten();
+
+ let mask: u8x6 = simd_swizzle!(first8, BGR_DISCARD_ALPHA);
+ let mask = mask.cast();
+ let to_b = i
+ .pixels_mut((x_ + wx_..x_ + wx_ + 4, y + y_))
+ .as_flattened_mut();
+ let to = u8x6::load_or_default(to_b).cast::<u16>();
+ let result: u8x6 = ((c2 * mask
+ + (Simd::splat(255) - mask) * to.cast())
+ / Simd::splat(255))
+ .cast::<u8>();
+ result.store_select(to_b, Mask::from_bitmask(!0));
+
+ wx_ += 2;
+ };
+ }
- for x in wx_..with.width() {
+ let n = with.width() - wx_;
+ assert!(n < 4);
+ for k in 0..n {
+ let x = k + wx_;
let d = unsafe { with.pixel(x, y) };
let x = unsafe {
i.pixel_mut(x.wrapping_add(x_), y.wrapping_add(y_))
@@ -550,6 +617,15 @@ fn into(
let mask = d.init();
blend(mask, color, x);
}
+ // assert!(with.width() - wx_ < 4);
+ // for x in wx_..with.width() {
+ // let d = unsafe { with.pixel(x, y) };
+ // let x = unsafe {
+ // i.pixel_mut(x.wrapping_add(x_), y.wrapping_add(y_))
+ // };
+ // let mask = d.init();
+ // blend(mask, color, x);
+ // }
}
}