heh
speedinate
bendn 3 months ago
parent e84efce · commit ce819b7
-rw-r--r--src/main.rs9
-rw-r--r--src/rah.rs247
2 files changed, 151 insertions, 105 deletions
diff --git a/src/main.rs b/src/main.rs
index 3286a79..9e93ca1 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -76,6 +76,7 @@ use atools::prelude::*;
#[implicit_fn::implicit_fn]
pub unsafe fn p1(x: &'static [u8; ISIZE]) -> impl Debug {
let i = x.chunked::<{ 141 + 1 }>();
+ assert_eq!(i.len(), 142);
let start = i.find(b'S');
util::memo_countg(
@@ -97,10 +98,10 @@ pub unsafe fn p1(x: &'static [u8; ISIZE]) -> impl Debug {
const ISIZE: usize = include_bytes!("inp.txt").len();
fn main() {
use atools::prelude::*;
- unsafe { println!("{:?}", p1(include_bytes!("inp.txt"))) };
- // unsafe { println!("{:?}", p1(include_str!("../1"))) };
- // unsafe { println!("{:?}", p1(include_str!("../2"))) };
- // unsafe { println!("{:?}", p1(include_str!("../3"))) };
+ unsafe { println!("{:?}", rah::run(include_bytes!("inp.txt"))) };
+ unsafe { println!("{:?}", rah::run(include_bytes!("../1"))) };
+ unsafe { println!("{:?}", rah::run(include_bytes!("../2"))) };
+ unsafe { println!("{:?}", rah::run(include_bytes!("../3"))) };
}
#[bench]
diff --git a/src/rah.rs b/src/rah.rs
index 6a5a50c..7f8bf67 100644
--- a/src/rah.rs
+++ b/src/rah.rs
@@ -1,108 +1,153 @@
-#![feature(generic_const_exprs, portable_simd)]
-use std::{ops::Range, simd::prelude::*};
+#![feature(generic_const_exprs, portable_simd, stmt_expr_attributes)]
+use std::{hint::assert_unchecked, ops::Range, simd::prelude::*};
#[unsafe(no_mangle)]
-pub unsafe fn run(x: &'static [u8]) -> u64 {
- let width = 3712 + memchr::memchr(b'\n', x.get_unchecked(3712..)).unwrap_unchecked();
- let numbers = [
- x,
- x.get_unchecked(width + 1..),
- x.get_unchecked((width + 1) * 2..),
- x.get_unchecked((width + 1) * 3..),
- ];
- let mut tot = 0;
- let ll = x.get_unchecked((width + 1) * 4..(width + 1) * 5 - 1);
- let mut at = 0;
- while let Some((index, mut mask)) = {
- let sl = ll.get(at..).unwrap_unchecked();
- let x = u8x64::load_or(sl, Simd::splat(b' '));
- let mask = x.simd_ne(Simd::splat(b' ')).to_bitmask() as u64;
- (mask != 0).then(|| {
- (
- at + mask.trailing_zeros() as usize,
- mask ^ (mask & mask.wrapping_neg()),
- )
- })
- } {
- let mut x = Simd::splat(0u16);
- for i in 0..4 {
- let c = u8x64::from_slice(numbers.get_unchecked(i).get_unchecked(index..index + 64));
- let m = c.simd_ne(Simd::splat(b' '));
- x = m
- .cast::<i16>()
- .select(x * Simd::splat(10) + (c - Simd::splat(b'0')).cast(), x);
- }
- let f = |ix: Range<usize>| {
- // let iter1 = ix.clone().map(|i| {
- // numbers
- // .iter()
- // .filter(|x| *x.get_unchecked(index + i) != b' ')
- // .fold(0, |acc, x| {
- // acc * 10 + (x.get_unchecked(index + i) - b'0') as u64
- // })
- // });
- // let mut tot_ = 0;
- // match *ll.get_unchecked(index + ix.start) {
- // b'+' => tot_ += iter1.clone().sum::<u64>(),
- // b'*' => tot_ += iter1.clone().product::<u64>(),
- // _ => panic!(),
- // }
-
- // print!("{:?} = ", iter1.clone().collect::<Vec<_>>());
- // println!("{:?} {}", &x[ix.clone()], ll[index + ix.start] as char);
- const IDX: u32x64 = u32x64::from_array([
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
- 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
- 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- ]);
-
- let i = x
- .as_array()
- .get_unchecked(ix.clone())
- .iter()
- .map(|&x| x as u64);
- let x = match *ll.get_unchecked(index + ix.start) {
- b'+' => {
- let mask = IDX.simd_lt(Simd::splat(ix.end as _))
- & IDX.simd_ge(Simd::splat(ix.start as _));
- (mask.to_int().cast::<u32>() & x.cast::<u32>()).reduce_sum() as u64
- }
- _ => i.product::<u64>(),
- };
- // assert_eq!(x, tot_);
- x
- };
- if mask.leading_zeros() as usize == 64 {
- return tot + f(0..ll.len() - index);
- }
-
- let mut last = 0;
- let mut hold: Option<Range<usize>> = None;
+pub unsafe fn run(x: &[u8]) -> u64 {
+ let i = unsafe { x.as_chunks_unchecked::<{ 141 + 1 }>() };
+ #[rustfmt::skip]
+ static LUT: [(usize, usize); 141] = [(0, 0), (0, 0), (70, 70), (0, 0), (69, 71), (0, 0), (68, 72), (0, 0), (67, 73), (0, 0), (66, 74), (0, 0), (65, 75), (0, 0), (64, 76), (0, 0), (63, 77), (0, 0), (62, 78), (0, 0), (61, 79), (0, 0), (60, 80), (0, 0), (59, 81), (0, 0), (58, 82), (0, 0), (57, 83), (0, 0), (56, 84), (0, 0), (55, 85), (0, 0), (54, 86), (0, 0), (53, 87), (0, 0), (52, 88), (0, 0), (51, 89), (0, 0), (50, 90), (0, 0), (49, 91), (0, 0), (48, 92), (0, 0), (47, 93), (0, 0), (46, 94), (0, 0), (45, 95), (0, 0), (44, 96), (0, 0), (43, 97), (0, 0), (42, 98), (0, 0), (41, 99), (0, 0), (40, 100), (0, 0), (39, 101), (0, 0), (38, 102), (0, 0), (37, 103), (0, 0), (36, 104), (0, 0), (35, 105), (0, 0), (34, 106), (0, 0), (33, 107), (0, 0), (32, 108), (0, 0), (31, 109), (0, 0), (30, 110), (0, 0), (29, 111), (0, 0), (28, 112), (0, 0), (27, 113), (0, 0), (26, 114), (0, 0), (25, 115), (0, 0), (24, 116), (0, 0), (23, 117), (0, 0), (22, 118), (0, 0), (21, 119), (0, 0), (20, 120), (0, 0), (19, 121), (0, 0), (18, 122), (0, 0), (17, 123), (0, 0), (16, 124), (0, 0), (15, 125), (0, 0), (14, 126), (0, 0), (13, 127), (0, 0), (12, 128), (0, 0), (11, 129), (0, 0), (10, 130), (0, 0), (9, 131), (0, 0), (8, 132), (0, 0), (7, 133), (0, 0), (6, 134), (0, 0), (5, 135), (0, 0), (4, 136), (0, 0), (3, 137), (0, 0), (2, 138), (0, 0), (1, 139)];
- while mask != 0 {
- let next = mask.trailing_zeros() as usize;
- mask ^= mask & mask.wrapping_neg();
- if *ll.get_unchecked(index + last) == b'+' {
- if let Some(r) = &mut hold {
- r.end = next;
- } else {
- hold = Some(last..next);
- }
- } else {
- // *
- if let Some(r) = hold.take() {
- tot += f(r);
- }
- tot += f(last..next - 1);
+ for n in (1..141).rev().step_by(2) {
+ let lem = &i[n];
+ println!("{n}: {}", lem.iter().filter(|x| **x == b'^').count());
+ // assert_eq!((start, end), LUT[n]);
+ // assert_eq!(LUT[n], (start, end));
+ // println!("({start}, {end}),");
+ }
+ // println!("{LUT:?}");
+ assert_unchecked(i.len() == 142);
+ let mut v = [1u64; 141];
+ let split = Simd::splat(b'^');
+ let mut f = #[inline(always)]
+ |(star, end): (usize, usize), line: &[u8; 142]| {
+ let mut f = #[inline(always)]
+ |mut mask: u64, o: usize| {
+ while mask != 0 {
+ let x = mask.trailing_zeros() as usize + o;
+ mask ^= mask & mask.wrapping_neg();
+ *v.get_unchecked_mut(x) = *v.get_unchecked(x - 1) + *v.get_unchecked(x + 1);
}
-
- last = next;
+ };
+ // let (star, end) = LUT[ix];
+ if (end - star) < 64 {
+ let mask = u8x64::load_or_default(&line[star..])
+ .simd_eq(split)
+ .to_bitmask();
+ f(mask, star);
+ } else if (end - star) < 128 {
+ let mask = u8x64::load_or_default(&line[star..])
+ .simd_eq(split)
+ .to_bitmask();
+ f(mask, star);
+ let mask = u8x64::load_or_default(&line[star + 64..])
+ .simd_eq(split)
+ .to_bitmask();
+ f(mask, star + 64);
+ } else {
+ let mask = u8x64::load_or_default(&line[star..])
+ .simd_eq(split)
+ .to_bitmask();
+ f(mask, star);
+ let mask = u8x64::load_or_default(&line[star + 64..])
+ .simd_eq(split)
+ .to_bitmask();
+ f(mask, star + 64);
+ let mask = u8x64::load_or_default(&line[star + 128..])
+ .simd_eq(split)
+ .to_bitmask();
+ f(mask, star + 128);
}
+ // if ix > 65 {
+ // let mask = u8x64::load_or_default(&line[..39])
+ // .simd_eq(split)
+ // .to_bitmask();
+ // f(mask, 0);
+ // }
+ // let middle = u8x64::load_or_default(&line[39..])
+ // .simd_eq(split)
+ // .to_bitmask();
+ // f(middle, 39);
+ // if ix > 65 {
+ // let end = u8x64::load_or_default(&line[39 + 64..])
+ // .simd_eq(split)
+ // .to_bitmask();
+ // f(end, 39 + 64);
+ // }
+ };
- if let Some(r) = hold {
- tot += f(r);
- }
+ f(const { LUT[140] }, &i[140]);
+ f(const { LUT[138] }, &i[138]);
+ f(const { LUT[136] }, &i[136]);
+ f(const { LUT[134] }, &i[134]);
+ f(const { LUT[132] }, &i[132]);
+ f(const { LUT[130] }, &i[130]);
+ f(const { LUT[128] }, &i[128]);
+ f(const { LUT[126] }, &i[126]);
+ f(const { LUT[124] }, &i[124]);
+ f(const { LUT[122] }, &i[122]);
+ f(const { LUT[120] }, &i[120]);
+ f(const { LUT[118] }, &i[118]);
+ f(const { LUT[116] }, &i[116]);
+ f(const { LUT[114] }, &i[114]);
+ f(const { LUT[112] }, &i[112]);
+ f(const { LUT[110] }, &i[110]);
+ f(const { LUT[108] }, &i[108]);
+ f(const { LUT[106] }, &i[106]);
+ f(const { LUT[104] }, &i[104]);
+ f(const { LUT[102] }, &i[102]);
+ f(const { LUT[100] }, &i[100]);
+ f(const { LUT[98] }, &i[98]);
+ f(const { LUT[96] }, &i[96]);
+ f(const { LUT[94] }, &i[94]);
+ f(const { LUT[92] }, &i[92]);
+ f(const { LUT[90] }, &i[90]);
+ f(const { LUT[88] }, &i[88]);
+ f(const { LUT[86] }, &i[86]);
+ f(const { LUT[84] }, &i[84]);
+ f(const { LUT[82] }, &i[82]);
+ f(const { LUT[80] }, &i[80]);
+ f(const { LUT[78] }, &i[78]);
+ f(const { LUT[76] }, &i[76]);
+ f(const { LUT[74] }, &i[74]);
+ f(const { LUT[72] }, &i[72]);
+ f(const { LUT[70] }, &i[70]);
+ f(const { LUT[68] }, &i[68]);
+ f(const { LUT[66] }, &i[66]);
+ f(const { LUT[64] }, &i[64]);
+ f(const { LUT[62] }, &i[62]);
+ f(const { LUT[60] }, &i[60]);
+ f(const { LUT[58] }, &i[58]);
+ f(const { LUT[56] }, &i[56]);
+ f(const { LUT[54] }, &i[54]);
+ f(const { LUT[52] }, &i[52]);
+ f(const { LUT[50] }, &i[50]);
+ f(const { LUT[48] }, &i[48]);
+ f(const { LUT[46] }, &i[46]);
+ f(const { LUT[44] }, &i[44]);
+ f(const { LUT[42] }, &i[42]);
+ f(const { LUT[40] }, &i[40]);
+ f(const { LUT[38] }, &i[38]);
+ f(const { LUT[36] }, &i[36]);
+ f(const { LUT[34] }, &i[34]);
+ f(const { LUT[32] }, &i[32]);
+ f(const { LUT[30] }, &i[30]);
+ f(const { LUT[28] }, &i[28]);
+ f(const { LUT[26] }, &i[26]);
+ f(const { LUT[24] }, &i[24]);
+ f(const { LUT[22] }, &i[22]);
+ f(const { LUT[20] }, &i[20]);
+ f(const { LUT[18] }, &i[18]);
+ f(const { LUT[16] }, &i[16]);
+ f(const { LUT[14] }, &i[14]);
+ f(const { LUT[12] }, &i[12]);
+ f(const { LUT[10] }, &i[10]);
+ f(const { LUT[8] }, &i[8]);
+ f(const { LUT[6] }, &i[6]);
+ f(const { LUT[4] }, &i[4]);
+ f(const { LUT[2] }, &i[2]);
+ // for n in (1..141).rev().step_by(2) {
+ // println!("f({n}, &i[{n}]);");
+ // f(n, &i[n]);
+ // }
- at += last;
- }
- tot
+ *v.get_unchecked(70)
}