heh
Diffstat (limited to 'src/main.rs')
| -rw-r--r-- | src/main.rs | 219 |
1 files changed, 167 insertions, 52 deletions
diff --git a/src/main.rs b/src/main.rs index 94605aa..389b0df 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,6 +19,7 @@ array_windows, try_blocks, slice_take, + portable_simd, test, slice_as_chunks, array_chunks, @@ -31,67 +32,181 @@ use atools::prelude::*; pub use util::prelude::*; const SIZE: usize = 140; +#[derive(Copy, Clone, Default, Debug, PartialEq, Eq)] +struct bitset140([u64; 3]); +impl bitset140 { + #[inline] + fn count_ones(self) -> u32 { + self.0.iter().copied().map(u64::count_ones).sum() + } +} + +impl std::ops::Shr<u32> for bitset140 { + type Output = Self; + + #[inline] + fn shr(self, rhs: u32) -> Self::Output { + let bitset140([a, b, c]) = self; + Self([ + a >> rhs, + b >> rhs | a << (u64::BITS - rhs), + c >> rhs | b << (u64::BITS - rhs), + ]) + } +} +impl std::ops::Shl<u32> for bitset140 { + type Output = Self; + + #[inline] + fn shl(self, rhs: u32) -> Self::Output { + let bitset140([a, b, c]) = self; + Self([ + a << rhs | b >> (u64::BITS - rhs), + b << rhs | c >> (u64::BITS - rhs), + c << rhs, + ]) + } +} + +impl std::ops::BitAnd<bitset140> for bitset140 { + type Output = Self; + + #[inline] + fn bitand(self, bitset140(rhs): bitset140) -> Self::Output { + bitset140(self.0.zip(rhs).map(|(a, b)| a & b)) + } +} + +use std::simd::prelude::*; #[no_mangle] pub fn run(i: &str) -> impl Display { - let grid = unsafe { i.as_bytes().as_chunks_unchecked::<{ SIZE + 1 }>() }; - let get = |x: isize, y: isize| -> Option<u8> { - (x >= 0 && y >= 0 && x < SIZE as isize && y < SIZE as isize) - .then(|| unsafe { grid.get_unchecked(y as usize)[x as usize] }) - }; - macro_rules! ck { - ($x:expr,$y:expr, $eq:literal) => {{ - get($x, $y) == Some($eq) - }}; + fn load(i: &[u8; SIZE + 1]) -> [bitset140; 4] { + let loaded = [ + u8x64::load_or_default(&i[128..]), + u8x64::from_slice(&i[64..128]), + u8x64::from_slice(&i[..64]), + ]; + + [ + bitset140(loaded.map(|x| x.simd_eq(Simd::splat(b'X')).to_bitmask())), + bitset140(loaded.map(|x| x.simd_eq(Simd::splat(b'M')).to_bitmask())), + bitset140(loaded.map(|x| x.simd_eq(Simd::splat(b'A')).to_bitmask())), + bitset140(loaded.map(|x| x.simd_eq(Simd::splat(b'S')).to_bitmask())), + ] } - let mut sum = 0; - for (x, y) in C! { &grid[..SIZE] }.iter().enumerate().flat_map(|(y, e)| { - memchr::memchr_iter(b'X', &e[..SIZE]).map(move |x| (x as isize, y as isize)) - }) { - macro_rules! triple { - ($x: literal, $y:literal) => { - (ck!(x + ($x * 3), y + ($y * 3), b'S') - && ck!(x + ($x * 2), y + ($y * 2), b'A') - && ck!(x + $x, y + $y, b'M')) as u32 - }; + unsafe { + let grid = i.as_bytes().as_chunks_unchecked::<{ SIZE + 1 }>(); + // let masked: [masks; SIZE] = std::array::from_fn(|i| load(grid.get_unchecked(i))); + let mut sum = 0; + let mut prev_x = [bitset140::default(); 4]; + let mut prev_m = [bitset140::default(); 4]; + let mut prev_a = [bitset140::default(); 4]; + let mut prev_s = [bitset140::default(); 4]; + + for y in 0..=2 { + let [x, m, a, s] = load(grid.get_unchecked(y)); + sum += (x & m << 1 & a << 2 & s << 3).count_ones(); + sum += (x & m >> 1 & a >> 2 & s >> 3).count_ones(); + prev_x[y + 1] = x; + prev_m[y + 1] = m; + prev_a[y + 1] = a; + prev_s[y + 1] = s; + // prev[y + 1] = masks; } - sum += 0 // a - + triple!(1, 0) + triple!(-1, 0) - + triple!(0, 1) + triple!(0, -1) + for y in 3..SIZE { + let [x, m, a, s] = load(grid.get_unchecked(y)); + let [_, p1, p2, p3] = prev_x; + prev_x = [p1, p2, p3, x]; + let [_, p1, p2, p3] = prev_m; + prev_m = [p1, p2, p3, m]; + let [_, p1, p2, p3] = prev_a; + prev_a = [p1, p2, p3, a]; + let [_, p1, p2, p3] = prev_s; + prev_s = [p1, p2, p3, s]; + + // println!("{a:?}"); + // // ↘→←↓↙→←↑↗↖↗↖↗→↖↗↑↖18 + // // overlap right + sum += (x & m << 1 & a << 2 & s << 3).count_ones(); + // // overlap left + sum += (x & m >> 1 & a >> 2 & s >> 3).count_ones(); + + // // idea is to overlap and then count + // // idea comes from gioschii + // if y >= 3 + // + { + let x = x; + let m = prev_m[3 - 1]; + let a = prev_a[3 - 2]; + let s = prev_s[3 - 3]; + + // down diagonal + sum += (x & m << 1 & a << 2 & s << 3).count_ones(); + sum += (x & m >> 1 & a >> 2 & s >> 3).count_ones(); + + // vertical + sum += (x & m & a & s).count_ones(); + } + // + { + let s = s; + let a = prev_a[3 - 1]; + let m = prev_m[3 - 2]; + let x = prev_x[3 - 3]; - + triple!(1, 1) + triple!(-1, 1) + triple!(-1, -1) + triple!(1, -1); + // up diagonals + sum += (x & m << 1 & a << 2 & s << 3).count_ones(); + sum += (x & m >> 1 & a >> 2 & s >> 3).count_ones(); + + sum += (x & m & a & s).count_ones(); + } + // println!( + // "{y}: {:064b}{:064b}{:064b}", + // // sum - sum_, + // x.0[0], + // x.0[1], + // x.0[2] + // ); + } + sum } - sum } - +#[no_mangle] pub fn two(i: &str) -> impl Display { - let grid = unsafe { i.as_bytes().as_chunks_unchecked::<{ SIZE + 1 }>() }; - let get = |x: isize, y: isize| -> Option<u8> { - (x >= 0 && y >= 0 && x < SIZE as isize && y < SIZE as isize) - .then(|| unsafe { grid.get_unchecked(y as usize)[x as usize] }) - }; + // let grid = i.as_bytes(); + // let get = |x: usize, y: usize| -> u8 { unsafe { *grid.get_unchecked(y * (SIZE + 1) + x) } }; + // let mut sum = 0; + // for y in 1..SIZE - 1 { + // for x in 1..SIZE - 1 { + // let a = get(x - 1, y - 1); + // let b = get(x + 1, y + 1); + // let c = get(x + 1, y - 1); + // let d = get(x - 1, y + 1); + // sum += + // (((a ^ b) + (c ^ d) == (b'S' ^ b'M') + (b'S' ^ b'M')) & (get(x, y) == b'A')) as u32; + // } + // } + // sum + + let g = i.as_bytes(); + assert_eq!(g.len(), 141 * 140); let mut sum = 0; - for (x, y) in grid[..SIZE - 1] - .iter() - .enumerate() - .skip(1) - .flat_map(|(y, e)| { - memchr::memchr_iter(b'A', &e[1..SIZE - 1]).map(move |x| (x as isize + 1, y as isize)) - }) - { - // println!("{x} {y}"); - // for (y, x) in (0..SIZE as isize).flat_map(|x| (0..SIZE as isize).map(move |y| (x, y))) { - // if get(x, y) == Some(b'A') { - let n: Option<u32> = try { - let a = get(x - 1, y - 1)?; - let b = get(x + 1, y + 1)?; - ((a == b'M' && b == b'S') || (a == b'S' && b == b'M')).then(|| { - let a = get(x + 1, y - 1).ψ(); - let b = get(x - 1, y + 1).ψ(); - (a == b'M' && b == b'S') || (a == b'S' && b == b'M') - })? as u32 - }; - sum += n.unwrap_or(0); - // } + // chunks of 64 + for i in 0..304 { + // tl + let a = u8x64::from_slice(&g[64 * i..]); + // trd + let b = u8x64::from_slice(&g[64 * i + 2..]); + // mid + let current = u8x64::from_slice(&g[141 + 64 * i + 1..]); + // bl + let d = u8x64::from_slice(&g[141 * 2 + 64 * i..]); + // br + let c = u8x64::from_slice(&g[141 * 2 + 64 * i + 2..]); + let wz = ((a ^ c) + (b ^ d)).simd_eq(u8x64::splat(60)); + let a = current.simd_eq(u8x64::splat(b'A')); + sum += (wz & a).to_bitmask().count_ones(); } sum } |