heh
speedinate
bendn 3 months ago
parent 9c2f585 · commit 8f8199e
-rw-r--r--src/main.rs363
-rw-r--r--src/util.rs25
2 files changed, 367 insertions, 21 deletions
diff --git a/src/main.rs b/src/main.rs
index 6c4c765..506c65a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -49,8 +49,8 @@
extern crate test;
pub mod util;
-pub use atools::prelude::*;
-use itertools::chain;
+use atools::Split;
+// pub use atools::prelude::*;
use lower::apply;
use memchr::memmem;
use regex::bytes::Regex;
@@ -69,36 +69,359 @@ use std::{
};
use swizzle::array;
pub use util::prelude::*;
+
#[unsafe(no_mangle)]
-#[implicit_fn::implicit_fn]
-pub unsafe fn p1(x: &'static [u8; ISIZE]) -> impl Debug {
- let mut grid = x.chunked::<{ 136 + 1 }>();
- let mut tot = 0;
- loop {
- let mut removed = 0;
- for roll in grid.clone().find_iter(b'@') {
- let nb = util::nb(roll)
- .map(|(x, y)| grid.get(y).and_then(_.get(x)).is_some_and(*_ == b'@') as u8)
- .sum();
- if nb < 4 {
- grid[roll.1][roll.0] = b'.';
- removed += 1;
+fn p2(v: &[u8]) -> usize {
+ let rows = memchr::memchr(b'\n', v).ψ();
+ let cols = rows + 1;
+
+ let mut degree = vec![0u8; rows * cols];
+ let mut queue = VecDeque::with_capacity(2000);
+
+ for i in memchr::memchr_iter(b'@', v) {
+ let count = util::nb((i / cols, i % cols))
+ .map(|(x, y)| (v.get(x * cols + y) == Some(&b'@')) as u8)
+ .into_iter()
+ .sum::<u8>()
+ + 1;
+
+ degree[i] = count as _;
+
+ if count < 4 + 1 {
+ queue.push_back(i as u16);
+ }
+ }
+
+ let mut sum = 0;
+ while let Some(i) = queue.pop_front() {
+ let i = i as usize;
+ if degree[i] == 0 {
+ continue;
+ }
+
+ degree[i] = 0;
+ sum += 1;
+
+ util::nb((i / cols, i % cols))
+ .into_iter()
+ .map(|(x, y)| x * cols + y)
+ .for_each(|i| {
+ if let Some(d) = degree.get_mut(i)
+ && *d != 0
+ {
+ *d -= 1;
+ if *d < 4 + 1 {
+ queue.push_back(i as u16);
+ }
+ }
+ });
+ }
+ sum
+}
+
+macro_rules! nbors {
+ ($width: literal, $tail:literal, $fname:ident) => {
+ #[unsafe(no_mangle)]
+ fn $fname(x: &[u8]) -> u32 {
+ unsafe { core::hint::assert_unchecked(x.len() == $width * ($width + 1)) };
+ let winc = $width + 1;
+ let mut sum = 0;
+ let mut l_tl;
+ let mut r_tl;
+ let mut t_tl;
+
+ let mut l_tc;
+ let mut r_tc;
+ let mut t_tc;
+
+ let mut l_tr;
+ let mut r_tr;
+ let mut t_tr;
+
+ let t_mask = std::simd::Mask::<i8, _>::from([
+ true, true, true, true, true, true, true, true, true, true, false, false, false,
+ false, false, false,
+ ]);
+ {
+ // first line
+ // overlaps with right by 1
+ let left = {
+ let f = |i| u8x64::from_slice(&x[i..i + 64]) & Simd::splat(2);
+ let cl =
+ u8x64::from_slice(&x[..64]).shift_elements_right::<1>(2) & Simd::splat(2);
+ l_tl = cl;
+ l_tc = f(0);
+ let cr = f(1);
+ l_tr = cr;
+ let bl = f(winc - 1);
+ let bc = f(winc);
+ let br = f(winc + 1);
+
+ let b_ = cl + cr;
+ let c_ = bl + bc + br;
+ (b_ + c_).simd_ge(Simd::splat(4)) & l_tc.simd_eq(u8x64::splat(0))
+ };
+ // overlaps with left by 1
+ // overlaps with tail by 1
+ // covers the span of 63..127
+ let right = {
+ let f = |i| {
+ let i = i + 63;
+ u8x64::from_slice(&x[i..i + 64]) & Simd::splat(2)
+ };
+
+ let cl = u8x64::from_slice(&x[63 - 1..63 - 1 + 64]) & Simd::splat(2); // f(-1)
+ r_tl = cl;
+ // r_tl = u8x64::from_slice(&x[63 - 1..63 - 1 + 64]) &Simd::splat(2);
+ r_tc = u8x64::from_slice(&x[63..63 + 64]) & Simd::splat(2);
+ let cr = f(1);
+ r_tr = cr;
+ let bl = f(winc - 1);
+ let bc = f(winc);
+ let br = f(winc + 1);
+
+ let b_ = cl + cr;
+ let c_ = bl + bc + br;
+
+ (b_ + c_).simd_ge(Simd::splat(4)) & r_tc.simd_eq(u8x64::splat(0))
+ };
+ // covers the span of 126..136
+ let tail = {
+ // last 10 (or 14)
+ let f = |i| {
+ let i = i + 126;
+ u8x16::from_slice(&x[i..i + 16]) & Simd::splat(2)
+ };
+
+ let cl = u8x16::from_slice(&x[126 - 1..126 - 1 + 16]) & Simd::splat(2);
+ t_tl = cl;
+ t_tc = u8x16::load_or_default(&x[126..126 + $tail]) & Simd::splat(2);
+ let cr = f(1);
+ t_tr = cr;
+ let bl = f(winc - 1);
+ let bc = f(winc);
+ let br = f(winc + 1);
+
+ let b_ = cl + cr;
+ let c_ = bl + bc + br;
+ (b_ + c_).simd_ge(Simd::splat(4)) & t_tc.simd_eq(Simd::splat(0))
+ };
+
+ // for &el in left.to_array().iter().take(63) {
+ // if el == true {
+ // print!("x");
+ // } else {
+ // print!(" ");
+ // }
+ // }
+
+ // for el in right.to_array().into_iter().take(63) {
+ // if el == true {
+ // print!("x");
+ // } else {
+ // print!(" ");
+ // }
+ // }
+ // for el in tail.to_array().into_iter().take(10) {
+ // if el == true {
+ // print!("x");
+ // } else {
+ // print!(" ");
+ // }
+ // }
+ // println!();
+ let l = (left.to_bitmask() << 1).count_ones();
+ let r = (right.to_bitmask() << 1).count_ones();
+ let t = ((tail & t_mask).to_bitmask()).count_ones();
+ sum += l + r + t;
+ }
+ for y in 1..$width - 1 {
+ // overlaps with right by 1
+ let left = {
+ let f = |i| u8x64::from_slice(&x[i..i + 64]) & Simd::splat(2);
+ let cl = f(y * winc - 1);
+ // let rel =
+ // u8x64::from_slice(&x[y * winc..y * winc + 64]).simd_eq(u8x64::splat(b'@'));
+ let cr = f(y * winc + 1);
+ let bl = f((y + 1) * winc - 1);
+ let bc = f((y + 1) * winc);
+ let br = f((y + 1) * winc + 1);
+
+ let a_ = l_tl + l_tc + l_tr;
+ l_tc = u8x64::from_slice(&x[y * winc..y * winc + 64]) & Simd::splat(2);
+ (l_tl, l_tr) = (cl, cr);
+ let b_ = cl + cr;
+ let c_ = bl + bc + br;
+
+ (a_ + b_ + c_).simd_ge(Simd::splat(10)) & l_tc.simd_eq(u8x64::splat(0))
+ };
+ // overlaps with left by 1
+ // overlaps with tail by 1
+ // covers the span of 63..127
+ let right = {
+ let f = |i| {
+ let i = i + 63;
+ u8x64::from_slice(&x[i..i + 64]) & Simd::splat(2)
+ };
+ let cl = f(y * winc - 1);
+ let cr = f(y * winc + 1);
+ let bl = f((y + 1) * winc - 1);
+ let bc = f((y + 1) * winc);
+ let br = f((y + 1) * winc + 1);
+
+ let a_ = r_tl + r_tc + r_tr;
+ (r_tl, r_tr) = (cl, cr);
+ r_tc =
+ u8x64::from_slice(&x[y * winc + 63..y * winc + 63 + 64]) & Simd::splat(2);
+
+ let b_ = cl + cr;
+ let c_ = bl + bc + br;
+ // it double
+ (a_ + b_ + c_).simd_ge(Simd::splat(10)) & r_tc.simd_eq(u8x64::splat(0))
+ };
+ // covers the span of 126..136
+ let tail = {
+ // last 8
+ let f = |i| {
+ let i = i + 126;
+ u8x16::load_or_default(&x[i..i + $tail]) & Simd::splat(2)
+ };
+
+ let cl = f(y * winc - 1);
+ let cr = f(y * winc + 1);
+ let bl = f((y + 1) * winc - 1);
+ let bc = f((y + 1) * winc);
+ let br = f((y + 1) * winc + 1);
+
+ let a_ = t_tl + t_tc + t_tr;
+ (t_tl, t_tr) = (cl, cr);
+ t_tc = u8x16::load_or_default(&x[y * winc + 126..y * winc + 126 + $tail])
+ & Simd::splat(2);
+ let b_ = cl + cr;
+ let c_ = bl + bc + br;
+ (a_ + b_ + c_).simd_ge(Simd::splat(10)) & t_tc.simd_eq(Simd::splat(0))
+ };
+ let l = (left.to_bitmask() << 1).count_ones();
+ let r = (right.to_bitmask() << 1).count_ones();
+ let t = ((tail & t_mask).to_bitmask()).count_ones();
+ sum += l + r + t;
+ }
+ {
+ // last line
+ let y = $width - 1;
+ // overlaps with right by 1
+ let left = {
+ let f = |i| u8x64::from_slice(&x[i..i + 64]) & Simd::splat(2);
+ let cl = f(y * winc - 1);
+ let rel =
+ u8x64::from_slice(&x[y * winc..y * winc + 64]).simd_eq(u8x64::splat(b'@'));
+ let cr = f(y * winc + 1);
+
+ let a_ = l_tl + l_tc + l_tr;
+ let b_ = cl + cr;
+
+ (a_ + b_).simd_ge(Simd::splat(4)) & rel
+ };
+ // overlaps with left by 1
+ // overlaps with tail by 1
+ // covers the span of 63..127
+ let right = {
+ let f = |i| {
+ let i = i + 63;
+ u8x64::from_slice(&x[i..i + 64]) & Simd::splat(2)
+ };
+ let cl = f(y * winc - 1);
+ let rel = u8x64::from_slice(&x[y * winc + 63..y * winc + 63 + 64])
+ .simd_eq(u8x64::splat(b'@'));
+ let cr = f(y * winc + 1);
+
+ let a_ = r_tl + r_tc + r_tr;
+ let b_ = cl + cr;
+
+ (a_ + b_).simd_ge(Simd::splat(4)) & rel
+ };
+ // covers the span of 126..136
+ let tail = {
+ // last 8
+ let f = |i| {
+ let i = i + 126;
+ u8x16::load_or_default(&x[i..i + $tail]) & Simd::splat(2)
+ };
+ let cl = f(y * winc - 1);
+ let rel = u8x16::load_or_default(&x[y * winc + 126..y * winc + 126 + $tail])
+ .simd_eq(u8x16::splat(b'@'));
+ let cr = f(y * winc + 1);
+
+ let a_ = t_tl + t_tc + t_tr;
+ let b_ = cl + cr;
+ (a_ + b_).simd_ge(Simd::splat(4)) & rel
+ };
+ let l = (left.to_bitmask() << 1).count_ones();
+ let r = (right.to_bitmask() << 1).count_ones();
+ let t = ((tail & t_mask).to_bitmask()).count_ones();
+ sum += l + r + t;
}
+ sum
}
- tot += removed;
- if removed == 0 {
- return tot;
+ };
+}
+nbors!(136, 10, nbors136);
+nbors!(140, 14, nbors140);
+
+#[unsafe(no_mangle)]
+#[implicit_fn::implicit_fn]
+pub unsafe fn p1(x: &'static [u8]) -> impl Debug {
+ let grid = x.行().collect::<Vec<_>>();
+ let mut tot = 0;
+ // want nbors of each cell
+ // [0, @ , 0]
+ // [0, [@], @]
+ // [@, 0 , 0]
+ // hmm
+
+ // get nbor left and right
+ // above + above >> 1 + above << 2
+ let mut to = vec![vec![b'.'; 136]; 136];
+ for roll in grid.clone().as_slice().find_iter(b'@') {
+ let nb = util::nb(roll)
+ .map(|(x, y)| {
+ grid.get(y)
+ .and_then(|y| y.get(x))
+ .is_some_and(|x| *x == b'@') as u8
+ })
+ .into_iter()
+ .sum::<u8>();
+ if nb < 4 {
+ tot += 1;
+ // to[roll.1][roll.0] = b'x';
+ } else {
+ // to[roll.1][roll.0] = b'@';
}
}
+ // for l in to {
+ // println!("{}", l.str());
+ // }
+ tot
}
const ISIZE: usize = include_bytes!("inp.txt").len();
fn main() {
- unsafe { println!("{:?}", p1(include_bytes!("inp.txt"))) };
+ use atools::prelude::*;
+
+ unsafe {
+ println!(
+ "{:?}",
+ p1(include_bytes!(
+ "/home/os/aoc_inputs_2025_4_53616c7465645f5f5021a2c6f2f1477957cf648aa98f5c84aded26838a2ede90785e4813a18ec8ac8a3d220956ea04b6b445865b32aee7b2e54bb761dc1fa967"
+ ))
+ )
+ };
+
+ // unsafe { println!("{:?}", p1(include_bytes!("inp.txt"))) };
}
#[bench]
fn benc(b: &mut test::Bencher) {
let i = boxd(include_bytes!("inp.txt"));
- b.iter(|| unsafe { p1(i) });
+ b.iter(|| unsafe { nbors136(i) });
}
diff --git a/src/util.rs b/src/util.rs
index 42b0686..30ac3d5 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -2065,7 +2065,30 @@ impl GridFind for &[&[u8]] {
}
fn find_iter(&self, c: u8) -> impl Iterator<Item = (usize, usize)> {
- empty()
+ self.iter().zip(0..).flat_map(move |(x, y)| {
+ x.iter()
+ .ι::<usize>()
+ .fl(move |&x| x == c)
+ .map(move |(_, x)| (x, y))
+ })
+ }
+}
+
+impl GridFind for &[Vec<u8>] {
+ fn find(self, c: u8) -> (usize, usize) {
+ self.iter()
+ .zip(0..)
+ .find_map(|(x, y)| x.iter().position(|&x| x == c).map(|x| (x, y)))
+ .unwrap()
+ }
+
+ fn find_iter(&self, c: u8) -> impl Iterator<Item = (usize, usize)> {
+ self.iter().zip(0..).flat_map(move |(x, y)| {
+ x.iter()
+ .ι::<usize>()
+ .fl(move |&x| x == c)
+ .map(move |(_, x)| (x, y))
+ })
}
}