heh
speed
| -rw-r--r-- | Cargo.toml | 2 | ||||
| -rw-r--r-- | src/main.rs | 136 | ||||
| -rw-r--r-- | src/util.rs | 8 |
3 files changed, 130 insertions, 16 deletions
@@ -11,7 +11,7 @@ car = "0.1.1" collar = "1.0.1" implicit-fn = "0.1.0" # hinted = "0.0.1" -itertools = "0.12.0" +itertools = "0.14.0" lower = "0.2.0" lower-macros = "0.2.3" mattr = "1.0.0" diff --git a/src/main.rs b/src/main.rs index e7df108..ee8aab6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ #![allow( + overflowing_literals, unexpected_cfgs, confusable_idents, uncommon_codepoints, @@ -55,8 +56,10 @@ use memchr::memmem; use regex::bytes::Regex; use rustc_hash::FxBuildHasher; use std::{ + arch::x86_64::*, cmp::{Reverse, minmax}, hash::Hash, + hint::assert_unchecked, mem::take, ops::{Coroutine, Deref}, pin::Pin, @@ -66,36 +69,147 @@ use std::{ }; use swizzle::array; pub use util::prelude::*; +#[unsafe(no_mangle)] +pub unsafe fn max(data: &[u8; 99]) -> u8 { + let v0 = u8x64::from_slice(&data[0..64]); + let v1 = u8x32::from_slice(&data[64..96]).resize::<64>(0); + let mut mx = v0.simd_max(v1).reduce_max(); + for &b in &data[96..] { + if b > mx { + mx = b; + } + } + mx +} -use crate::util::{MapF, UnionFind}; +#[unsafe(no_mangle)] +pub unsafe fn max89(data: &[u8; 89]) -> u8 { + let v0 = u8x64::from_slice(&data[0..64]); + let v1 = u8x32::load_or_default(&data[64..]).resize::<64>(0); + v0.simd_max(v1).reduce_max() +} +pub unsafe fn max2(data: &[u8]) -> u8 { + if data.len() == 1 { + return data[0]; + } + let start = u8x64::load_or_default(data); + if data.len() > 64 { + let extra = u8x64::load_or_default(&data[64..]); + start.simd_max(extra).reduce_max() + } else { + start.reduce_max() + } +} +#[unsafe(no_mangle)] +#[inline(always)] +// https://stackoverflow.com/a/77709227 +unsafe fn maxi32(v: u8x32) -> u8 { + let v: u16x16 = transmute(v); + //indexes + const even: u16x16 = u16x16::from_array([ + 0xff00, 0xff02, 0xff04, 0xff06, 0xff08, 0xff0a, 0xff0c, 0xff0e, 0xff10, 0xff12, 0xff14, + 0xff16, 0xff18, 0xff1a, 0xff1c, 0xff1e, + ]); + let odd = even + u16x16::splat(1); + let vu16 = (v & u16x16::splat(0xff00u16) ^ odd).simd_min(v << 8 ^ even); + _mm_cvtsi128_si32(_mm_minpos_epu16(transmute( + vu16.extract::<0, 8>().simd_min(vu16.extract::<8, 8>()), + ))) as u8 +} +#[unsafe(no_mangle)] +unsafe fn maxi(data: &[u8]) -> (u8, u8) { + if data.len() > 96 { + let f1 = maxi32(u8x32::from_slice(&data[..32])); + let f2 = 32 + maxi32(u8x32::from_slice(&data[32..64])); + let f3 = 64 + maxi32(u8x32::from_slice(&data[64..96])); + + let Left(a, b) = Left(data.get(98).copied().unwrap_or(0), 98) + .max(Left(data.get(97).copied().unwrap_or(0), 97)) + .max(Left(data.get(96).copied().unwrap_or(0), 96)) + .max(Left(*data.get_unchecked(f3 as usize), f3)) + .max(Left(*data.get_unchecked(f2 as usize), f2)) + .max(Left(*data.get_unchecked(f1 as usize), f1)); + (a, b) + } else if data.len() > 64 { + let start = u8x32::from_slice(&data[..32]); + let extra = u8x32::from_slice(&data[32..64]); + let f1 = maxi32(start); + let f2 = 32 + maxi32(extra); + let f3 = 64 + maxi32(u8x32::load_or_default(&data[64..])); + let Left(a, b) = Left(*data.get_unchecked(f3 as usize), f3) + .max(Left(*data.get_unchecked(f2 as usize), f2)) + .max(Left(*data.get_unchecked(f1 as usize), f1)); + (a, b) + } else if data.len() > 32 { + let start = u8x32::from_slice(&data[..32]); + let extra = u8x32::load_or_default(&data[32..]); + let f1 = maxi32(start); + let f2 = 32 + maxi32(extra); + let Left(a, b) = std::cmp::max( + Left(*data.get_unchecked(f2 as usize), f2), + Left(*data.get_unchecked(f1 as usize), f1), + ); + (a, b) + } else { + let start = u8x32::load_or_default(&data); + let x = maxi32(start); + (*data.get_unchecked(x as usize), x) + } +} #[unsafe(no_mangle)] #[implicit_fn::implicit_fn] pub unsafe fn p1(x: &'static [u8; ISIZE]) -> impl Debug { core::hint::assert_unchecked(x.len() == 200 * 101); - x.chunked::<101>() + x.as_chunks_unchecked::<101>() + .into_iter() + .map(|l| { + let l = l.get_unchecked(..100); + let l_ = l.get_unchecked(..l.len() - 1); + let el = max(unsafe { &*(l_.as_ptr() as *const [u8; 99]) }); + let i = memchr::memchr(el, l_).unwrap_unchecked(); + + let n = (el - b'0') as u64; + let l_ = l.get_unchecked(i as usize + 1..); + let el = max2(l_); + (n * 10) + (el - b'0') as u64 + }) + .sum::<u64>() +} + +#[unsafe(no_mangle)] +pub unsafe fn p2(x: &'static [u8; ISIZE]) -> impl Debug { + core::hint::assert_unchecked(x.len() == 200 * 101); + x.as_chunks_unchecked::<101>() .into_iter() .map(|l| { - let mut l = &l[..100]; - let mut n = 0; - for i in 0..12 { - let l_ = &l[..l.len() + i - (12 - 1)]; - let (el, i) = l_.iter().copied().ι::<usize>().fmax_by_left(); + let mut l = l.get_unchecked(..100); + + let l_ = l.get_unchecked(..l.len() - 11); + let (el, i) = maxi(l_); + let mut n = (el - b'0') as u64; + l = l.get_unchecked(i as usize + 1..); + + for i in 1..11 { + let l_ = l.get_unchecked(..l.len() + i - 11); + let (el, i) = maxi(l_); n *= 10; n += (el - b'0') as u64; - l = &l[i + 1..]; + l = l.get_unchecked(i as usize + 1..); } - n + + let el = max2(l); + n * 10 + (el - b'0') as u64 }) .sum::<u64>() } const ISIZE: usize = include_bytes!("inp.txt").len(); fn main() { - unsafe { println!("{:?}", p1(include_bytes!("inp.txt"))) }; + unsafe { println!("{:?}", p2(include_bytes!("inp.txt"))) }; } #[bench] fn benc(b: &mut test::Bencher) { let i = boxd(include_bytes!("inp.txt")); - b.iter(|| unsafe { p1(i) }); + b.iter(|| unsafe { p2(i) }); } diff --git a/src/util.rs b/src/util.rs index da668b9..e781181 100644 --- a/src/util.rs +++ b/src/util.rs @@ -22,7 +22,7 @@ use std::{ pub mod prelude { pub use super::{ AndF, BoolTools, DigiCount, Dir, FilterBy, FilterBy3, FirstMax, GreekTools, GridFind, - IntoCombinations, IntoLines, IterͶ, MapWith, NumTupleIterTools, PRead, ParseIter, + IntoCombinations, IntoLines, IterͶ, Left, MapWith, NumTupleIterTools, PRead, ParseIter, PartitionByKey, Position, Printable, Skip, Splib, SplitU8, Str, TakeLine, TupleIterTools2, TupleIterTools2R, TupleIterTools3, TupleUtils, TwoWayMapCollect, UnifiedTupleUtils, UnsoundUtilities, Widen, countmap, even, gcd, gt, infinite_successors, l, lcm, lt, nail, @@ -519,7 +519,7 @@ pub fn dijkstra_h<N: Debug + Eq + Hash + Copy + Ord, I: Iterator<Item = (N, u16) dang!() } #[derive(Debug, Clone, Copy)] -pub struct Left<T, U>(T, U); +pub struct Left<T, U>(pub T, pub U); impl<T: Hash, U> Hash for Left<T, U> { fn hash<H: std::hash::Hasher>(&self, state: &mut H) { @@ -1304,8 +1304,8 @@ pub trait Ι<T>: Iterator<Item = T> + Sized { } impl<T, I: Iterator<Item = T>> Ι<T> for I {} -pub fn nail<const N: usize, T: Copy>(x: &[T]) -> [T; N] { - unsafe { (x.as_ptr() as *const [T; N]).read() } +pub fn nail<const N: usize, T: Copy>(x: &[T]) -> &[T; N] { + unsafe { &*(x.as_ptr() as *const [T; N]) } } pub mod reading { |