heh
-rw-r--r--Cargo.toml2
-rw-r--r--src/main.rs136
-rw-r--r--src/util.rs8
3 files changed, 130 insertions, 16 deletions
diff --git a/Cargo.toml b/Cargo.toml
index dfef5c8..cd48940 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,7 +11,7 @@ car = "0.1.1"
collar = "1.0.1"
implicit-fn = "0.1.0"
# hinted = "0.0.1"
-itertools = "0.12.0"
+itertools = "0.14.0"
lower = "0.2.0"
lower-macros = "0.2.3"
mattr = "1.0.0"
diff --git a/src/main.rs b/src/main.rs
index e7df108..ee8aab6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,5 @@
#![allow(
+ overflowing_literals,
unexpected_cfgs,
confusable_idents,
uncommon_codepoints,
@@ -55,8 +56,10 @@ use memchr::memmem;
use regex::bytes::Regex;
use rustc_hash::FxBuildHasher;
use std::{
+ arch::x86_64::*,
cmp::{Reverse, minmax},
hash::Hash,
+ hint::assert_unchecked,
mem::take,
ops::{Coroutine, Deref},
pin::Pin,
@@ -66,36 +69,147 @@ use std::{
};
use swizzle::array;
pub use util::prelude::*;
+#[unsafe(no_mangle)]
+pub unsafe fn max(data: &[u8; 99]) -> u8 {
+ let v0 = u8x64::from_slice(&data[0..64]);
+ let v1 = u8x32::from_slice(&data[64..96]).resize::<64>(0);
+ let mut mx = v0.simd_max(v1).reduce_max();
+ for &b in &data[96..] {
+ if b > mx {
+ mx = b;
+ }
+ }
+ mx
+}
-use crate::util::{MapF, UnionFind};
+#[unsafe(no_mangle)]
+pub unsafe fn max89(data: &[u8; 89]) -> u8 {
+ let v0 = u8x64::from_slice(&data[0..64]);
+ let v1 = u8x32::load_or_default(&data[64..]).resize::<64>(0);
+ v0.simd_max(v1).reduce_max()
+}
+pub unsafe fn max2(data: &[u8]) -> u8 {
+ if data.len() == 1 {
+ return data[0];
+ }
+ let start = u8x64::load_or_default(data);
+ if data.len() > 64 {
+ let extra = u8x64::load_or_default(&data[64..]);
+ start.simd_max(extra).reduce_max()
+ } else {
+ start.reduce_max()
+ }
+}
+#[unsafe(no_mangle)]
+#[inline(always)]
+// https://stackoverflow.com/a/77709227
+unsafe fn maxi32(v: u8x32) -> u8 {
+ let v: u16x16 = transmute(v);
+ //indexes
+ const even: u16x16 = u16x16::from_array([
+ 0xff00, 0xff02, 0xff04, 0xff06, 0xff08, 0xff0a, 0xff0c, 0xff0e, 0xff10, 0xff12, 0xff14,
+ 0xff16, 0xff18, 0xff1a, 0xff1c, 0xff1e,
+ ]);
+ let odd = even + u16x16::splat(1);
+ let vu16 = (v & u16x16::splat(0xff00u16) ^ odd).simd_min(v << 8 ^ even);
+ _mm_cvtsi128_si32(_mm_minpos_epu16(transmute(
+ vu16.extract::<0, 8>().simd_min(vu16.extract::<8, 8>()),
+ ))) as u8
+}
+#[unsafe(no_mangle)]
+unsafe fn maxi(data: &[u8]) -> (u8, u8) {
+ if data.len() > 96 {
+ let f1 = maxi32(u8x32::from_slice(&data[..32]));
+ let f2 = 32 + maxi32(u8x32::from_slice(&data[32..64]));
+ let f3 = 64 + maxi32(u8x32::from_slice(&data[64..96]));
+
+ let Left(a, b) = Left(data.get(98).copied().unwrap_or(0), 98)
+ .max(Left(data.get(97).copied().unwrap_or(0), 97))
+ .max(Left(data.get(96).copied().unwrap_or(0), 96))
+ .max(Left(*data.get_unchecked(f3 as usize), f3))
+ .max(Left(*data.get_unchecked(f2 as usize), f2))
+ .max(Left(*data.get_unchecked(f1 as usize), f1));
+ (a, b)
+ } else if data.len() > 64 {
+ let start = u8x32::from_slice(&data[..32]);
+ let extra = u8x32::from_slice(&data[32..64]);
+ let f1 = maxi32(start);
+ let f2 = 32 + maxi32(extra);
+ let f3 = 64 + maxi32(u8x32::load_or_default(&data[64..]));
+ let Left(a, b) = Left(*data.get_unchecked(f3 as usize), f3)
+ .max(Left(*data.get_unchecked(f2 as usize), f2))
+ .max(Left(*data.get_unchecked(f1 as usize), f1));
+ (a, b)
+ } else if data.len() > 32 {
+ let start = u8x32::from_slice(&data[..32]);
+ let extra = u8x32::load_or_default(&data[32..]);
+ let f1 = maxi32(start);
+ let f2 = 32 + maxi32(extra);
+ let Left(a, b) = std::cmp::max(
+ Left(*data.get_unchecked(f2 as usize), f2),
+ Left(*data.get_unchecked(f1 as usize), f1),
+ );
+ (a, b)
+ } else {
+ let start = u8x32::load_or_default(&data);
+ let x = maxi32(start);
+ (*data.get_unchecked(x as usize), x)
+ }
+}
#[unsafe(no_mangle)]
#[implicit_fn::implicit_fn]
pub unsafe fn p1(x: &'static [u8; ISIZE]) -> impl Debug {
core::hint::assert_unchecked(x.len() == 200 * 101);
- x.chunked::<101>()
+ x.as_chunks_unchecked::<101>()
+ .into_iter()
+ .map(|l| {
+ let l = l.get_unchecked(..100);
+ let l_ = l.get_unchecked(..l.len() - 1);
+ let el = max(unsafe { &*(l_.as_ptr() as *const [u8; 99]) });
+ let i = memchr::memchr(el, l_).unwrap_unchecked();
+
+ let n = (el - b'0') as u64;
+ let l_ = l.get_unchecked(i as usize + 1..);
+ let el = max2(l_);
+ (n * 10) + (el - b'0') as u64
+ })
+ .sum::<u64>()
+}
+
+#[unsafe(no_mangle)]
+pub unsafe fn p2(x: &'static [u8; ISIZE]) -> impl Debug {
+ core::hint::assert_unchecked(x.len() == 200 * 101);
+ x.as_chunks_unchecked::<101>()
.into_iter()
.map(|l| {
- let mut l = &l[..100];
- let mut n = 0;
- for i in 0..12 {
- let l_ = &l[..l.len() + i - (12 - 1)];
- let (el, i) = l_.iter().copied().ι::<usize>().fmax_by_left();
+ let mut l = l.get_unchecked(..100);
+
+ let l_ = l.get_unchecked(..l.len() - 11);
+ let (el, i) = maxi(l_);
+ let mut n = (el - b'0') as u64;
+ l = l.get_unchecked(i as usize + 1..);
+
+ for i in 1..11 {
+ let l_ = l.get_unchecked(..l.len() + i - 11);
+ let (el, i) = maxi(l_);
n *= 10;
n += (el - b'0') as u64;
- l = &l[i + 1..];
+ l = l.get_unchecked(i as usize + 1..);
}
- n
+
+ let el = max2(l);
+ n * 10 + (el - b'0') as u64
})
.sum::<u64>()
}
const ISIZE: usize = include_bytes!("inp.txt").len();
fn main() {
- unsafe { println!("{:?}", p1(include_bytes!("inp.txt"))) };
+ unsafe { println!("{:?}", p2(include_bytes!("inp.txt"))) };
}
#[bench]
fn benc(b: &mut test::Bencher) {
let i = boxd(include_bytes!("inp.txt"));
- b.iter(|| unsafe { p1(i) });
+ b.iter(|| unsafe { p2(i) });
}
diff --git a/src/util.rs b/src/util.rs
index da668b9..e781181 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -22,7 +22,7 @@ use std::{
pub mod prelude {
pub use super::{
AndF, BoolTools, DigiCount, Dir, FilterBy, FilterBy3, FirstMax, GreekTools, GridFind,
- IntoCombinations, IntoLines, IterͶ, MapWith, NumTupleIterTools, PRead, ParseIter,
+ IntoCombinations, IntoLines, IterͶ, Left, MapWith, NumTupleIterTools, PRead, ParseIter,
PartitionByKey, Position, Printable, Skip, Splib, SplitU8, Str, TakeLine, TupleIterTools2,
TupleIterTools2R, TupleIterTools3, TupleUtils, TwoWayMapCollect, UnifiedTupleUtils,
UnsoundUtilities, Widen, countmap, even, gcd, gt, infinite_successors, l, lcm, lt, nail,
@@ -519,7 +519,7 @@ pub fn dijkstra_h<N: Debug + Eq + Hash + Copy + Ord, I: Iterator<Item = (N, u16)
dang!()
}
#[derive(Debug, Clone, Copy)]
-pub struct Left<T, U>(T, U);
+pub struct Left<T, U>(pub T, pub U);
impl<T: Hash, U> Hash for Left<T, U> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
@@ -1304,8 +1304,8 @@ pub trait Ι<T>: Iterator<Item = T> + Sized {
}
impl<T, I: Iterator<Item = T>> Ι<T> for I {}
-pub fn nail<const N: usize, T: Copy>(x: &[T]) -> [T; N] {
- unsafe { (x.as_ptr() as *const [T; N]).read() }
+pub fn nail<const N: usize, T: Copy>(x: &[T]) -> &[T; N] {
+ unsafe { &*(x.as_ptr() as *const [T; N]) }
}
pub mod reading {