fast image operations
Diffstat (limited to 'src/term/b64.rs')
| -rw-r--r-- | src/term/b64.rs | 187 |
1 files changed, 0 insertions, 187 deletions
diff --git a/src/term/b64.rs b/src/term/b64.rs deleted file mode 100644 index b91cdfd..0000000 --- a/src/term/b64.rs +++ /dev/null @@ -1,187 +0,0 @@ -#![allow(clippy::undocumented_unsafe_blocks)] -use core::intrinsics::simd::simd_cast; -#[cfg(all(target_feature = "avx2", not(miri)))] -use std::arch::x86_64::*; -use std::{ - intrinsics::transmute_unchecked, - simd::{MaskElement, SimdElement, prelude::*}, -}; - -#[test] -fn b64() { - fn t(i: &'static str, o: &'static str) { - let mut x = Vec::with_capacity(size(i.as_bytes())); - unsafe { portable(i.as_bytes(), x.as_mut_ptr()) }; - unsafe { x.set_len(size(i.as_bytes())) }; - assert_eq!(x, o.as_bytes()); - } - - t("Hello World!", "SGVsbG8gV29ybGQh"); - t("Hello World", "SGVsbG8gV29ybGQ="); -} - -pub fn encode(i: &[u8]) -> String { - let mut x = Vec::with_capacity(size(i)); - unsafe { portable(i, x.as_mut_ptr()) }; - unsafe { x.set_len(size(i)) }; - unsafe { String::from_utf8_unchecked(x) } -} - -trait Cast<T, const N: usize> { - fn cas<U: SimdT>(self) -> U; -} -trait SimdT {} -impl<T: SimdElement, const N: usize> SimdT for Simd<T, N> {} -impl<T: MaskElement, const N: usize> SimdT for Mask<T, N> {} -impl<T: SimdElement, const N: usize> Cast<T, N> for Simd<T, N> { - fn cas<U>(self) -> U { - assert!(std::mem::size_of::<U>() == std::mem::size_of::<Self>()); - unsafe { transmute_unchecked(self) } - } -} - -impl<T: MaskElement, const N: usize> Cast<T, N> for Mask<T, N> { - fn cas<U>(self) -> U { - assert!(std::mem::size_of::<U>() == std::mem::size_of::<Self>()); - unsafe { transmute_unchecked(self) } - } -} - -#[allow(non_camel_case_types)] -type c = u8x32; -unsafe fn portable(mut input: &[u8], mut output: *mut u8) { - while input.len() >= 32 { - #[allow(unsafe_op_in_unsafe_fn)] - #[cfg(all(target_feature = "avx2", not(miri)))] - let indices = { - let lo = _mm_loadu_si128(input.as_ptr() as *const __m128i); - let hi = _mm_loadu_si128(input.as_ptr().add(12) as *const __m128i); - let i = _mm256_shuffle_epi8( - _mm256_set_m128i(hi, lo), - _mm256_set_epi8( - 10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1, // - 10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1, // - ), - ); - let t0 = _mm256_and_si256(i, _mm256_set1_epi32(0x0fc0fc00)); - let t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); - let t2 = _mm256_and_si256(i, _mm256_set1_epi32(0x003f03f0)); - let t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); - - c::from(_mm256_or_si256(t1, t3)) - }; - #[cfg(not(all(target_feature = "avx2", not(miri))))] - let indices = { - let v = c::from_slice(input); - let i = simd_swizzle!( - v, - [ - 1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, 13, 12, // - 14, 13, 16, 15, 17, 16, 19, 18, 20, 19, 22, 21, 23, 22 - ] - ); - - // https://github.com/WojciechMula/base64simd - let t0 = i & u32x8::splat(0x0fc0fc00).cas::<c>(); - let t1 = Cast::cas::<c>(mulhi(t0.cas(), u32x8::splat(0x04000040).cas())); - let t2 = i & u32x8::splat(0x003f03f0).cas::<c>(); - let t3 = mullo(t2.cas(), u32x8::splat(0x01000010).cas()).cas::<c>(); - t1 | t3 - }; - lookup(indices).copy_to_slice(unsafe { std::slice::from_raw_parts_mut(output, 32) }); - output = unsafe { output.add(32) }; - - input = &input[24..]; - } - unsafe { encode_simple(input, output) }; -} -#[allow(dead_code)] -fn mulhi(x: u16x16, y: u16x16) -> u16x16 { - unsafe { - simd_cast::<_, u16x16>( - simd_cast::<_, u32x16>(x) * simd_cast::<_, u32x16>(y) >> u32x16::splat(16), - ) - } -} - -#[allow(dead_code)] -fn mullo(x: u16x16, y: u16x16) -> u16x16 { - x * y -} - -fn lookup(x: c) -> c { - let result = x.saturating_sub(c::splat(51)); - let less = cmpgt(c::splat(26), x); - let result = result | (less & c::splat(13)); - - #[rustfmt::skip] - const LUT: i8x32 = i8x32::from_array([ - b'a' as i8 - 26, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, - b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'+' as i8 - 62, - b'/' as i8 - 63, b'A' as i8, 0, 0, - - b'a' as i8 - 26, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, - b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'+' as i8 - 62, - b'/' as i8 - 63, b'A' as i8, 0, 0 - ]); - #[cfg(all(target_feature = "avx2", not(miri)))] - let result = unsafe { i8x32::from(_mm256_shuffle_epi8(LUT.into(), result.into())) }; - #[cfg(not(all(target_feature = "avx2", not(miri))))] - let result = (LUT.cas::<c>().swizzle_dyn(result)).cas::<i8x32>(); - - Cast::cas(result + x.cas::<i8x32>()) -} - -pub fn cmpgt(x: c, y: c) -> c { - x.cas::<i8x32>().simd_gt(y.cas::<i8x32>()).cas() -} - -trait P { - unsafe fn p<const N: usize>(&mut self, data: [u8; N]); -} - -impl P for *mut u8 { - unsafe fn p<const N: usize>(&mut self, data: [u8; N]) { - unsafe { self.copy_from(data.as_ptr(), N) }; - *self = unsafe { self.add(N) }; - } -} - -#[allow(unsafe_op_in_unsafe_fn)] -/// # Safety -/// ptr valid for [`size`]`(input)` writes. -pub unsafe fn encode_simple(mut input: &[u8], mut output: *mut u8) { - const Α: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - while let [a, b, c, rest @ ..] = input { - let α = ((*a as usize) << 16) | ((*b as usize) << 8) | *c as usize; - output.p([ - Α[α >> 18], - Α[(α >> 12) & 0x3F], - Α[(α >> 6) & 0x3F], - Α[α & 0x3F], - ]); - input = rest; - } - if !input.is_empty() { - let mut α = (input[0] as usize) << 16; - if input.len() > 1 { - α |= (input[1] as usize) << 8; - } - output.p([Α[α >> 18], Α[α >> 12 & 0x3F]]); - if input.len() > 1 { - output.p([Α[α >> 6 & 0x3f]]); - } else { - output.p([b'=']); - } - output.p([b'=']); - } -} - -pub const fn size(of: &[u8]) -> usize { - let use_pad = of.len() % 3 != 0; - if use_pad { - 4 * (of.len() / 3 + 1) - } else { - 4 * (of.len() / 3) - } -} |