checkpoint
| -rw-r--r-- | Cargo.lock | 235 | ||||
| -rw-r--r-- | Cargo.toml | 8 | ||||
| -rw-r--r-- | src/lexer.rs | 80 | ||||
| -rw-r--r-- | src/main.rs | 9 | ||||
| -rw-r--r-- | src/parser.rs | 77 | ||||
| -rw-r--r-- | src/parser/fun.rs | 151 | ||||
| -rw-r--r-- | src/parser/types.rs | 68 | ||||
| -rw-r--r-- | src/parser/util.rs | 2 | ||||
| -rw-r--r-- | src/ui.rs | 74 | ||||
| -rw-r--r-- | stackd | 43 |
10 files changed, 509 insertions, 238 deletions
@@ -1,17 +1,14 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] -name = "ahash" -version = "0.8.7" +name = "aho-corasick" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", + "memchr", ] [[package]] @@ -53,18 +50,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" [[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] name = "chumsky" -version = "1.0.0-alpha.6" -source = "git+https://github.com/zesterer/chumsky#8b8cf0a04b157df30799d4f385ddedc1dca85014" +version = "1.0.0-alpha.8" +source = "git+https://github.com/zesterer/chumsky#d12869af1701647e0f0034aef1cce14fa2e170be" dependencies = [ "hashbrown", "unicode-ident", + "unicode-segmentation", +] + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "git+https://github.com/brendanzab/codespan#4d7189e36bc2e6f6093a474395e8f7c7607d9a4b" +dependencies = [ + "termcolor", + "unicode-width", ] [[package]] @@ -79,19 +80,47 @@ dependencies = [ ] [[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] +name = "foldhash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" + +[[package]] name = "hashbrown" -version = "0.14.3" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" dependencies = [ - "ahash", "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", ] [[package]] @@ -100,15 +129,24 @@ version = "0.1.0" dependencies = [ "beef", "chumsky", + "codespan-reporting", "comat", + "itertools", "lerr", "logos", "match_deref", "paste", + "regex", "tinyvec", ] [[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] name = "lerr" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -121,32 +159,34 @@ dependencies = [ [[package]] name = "logos" -version = "0.13.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1" +checksum = "ab6f536c1af4c7cc81edf73da1f8029896e7e1e16a219ef09b184e76a296f3db" dependencies = [ "logos-derive", ] [[package]] name = "logos-codegen" -version = "0.13.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68" +checksum = "189bbfd0b61330abea797e5e9276408f2edbe4f822d7ad08685d67419aafb34e" dependencies = [ "beef", "fnv", + "lazy_static", "proc-macro2", "quote", "regex-syntax", + "rustc_version", "syn 2.0.48", ] [[package]] name = "logos-derive" -version = "0.13.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e" +checksum = "ebfe8e1a19049ddbfccbd14ac834b215e11b85b90bab0c2dba7c7b92fb5d5cba" dependencies = [ "logos-codegen", ] @@ -163,10 +203,10 @@ dependencies = [ ] [[package]] -name = "once_cell" -version = "1.19.0" +name = "memchr" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "paste" @@ -193,10 +233,48 @@ dependencies = [ ] [[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "semver" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" [[package]] name = "syn" @@ -221,6 +299,15 @@ dependencies = [ ] [[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] name = "tinyvec" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -242,6 +329,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] name = "unicode-width" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -254,27 +347,83 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] -name = "version_check" -version = "0.9.4" +name = "winapi-util" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] [[package]] -name = "zerocopy" -version = "0.7.32" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "zerocopy-derive", + "windows-targets", ] [[package]] -name = "zerocopy-derive" -version = "0.7.32" +name = "windows-targets" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" @@ -6,14 +6,16 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -chumsky = { git = "https://github.com/zesterer/chumsky", version = "1.0.0-alpha.6", features = [ - "label", +chumsky = { git = "https://github.com/zesterer/chumsky", version = "1.0.0-alpha.8", features = [ "nightly", ], default-features = false } beef = "0.5.2" -logos = "0.13.0" +logos = "0.15.0" tinyvec = { version = "1.6.0", features = ["alloc"] } comat = "0.1.3" lerr = "0.1.5" match_deref = "0.1.1" paste = "1.0.14" +regex = "1.11.1" +itertools = "0.14.0" +codespan-reporting = { git = "https://github.com/brendanzab/codespan", version = "0.11.1" } diff --git a/src/lexer.rs b/src/lexer.rs index d4d1476..e3943d4 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,7 +1,9 @@ use beef::lean::Cow; -use chumsky::span::SimpleSpan; +use chumsky::span::{SimpleSpan, Span}; use logos::{Lexer as RealLexer, Logos, SpannedIter}; - +use regex::Regex; +use std::sync::LazyLock; +static EMOJI: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\p{Emoji}&&[^0-9]]").unwrap()); macro_rules! tokens { ($($z:literal $( | $y:literal)? => $v:ident,)+) => { #[derive(Logos, Debug, PartialEq, Clone)] @@ -22,7 +24,12 @@ macro_rules! tokens { #[regex(r"'.'", |lex| lex.slice().as_bytes()[1] as char)] Char(char), // todo ignore alot - #[regex(r"[^\s\(\)\[\]\{\}0-9λ'\-←→=≢≡🐢🦆✊🪣🔓🐘🍴🐈↖⤵️☎️🔭+×*√≠<≤>≥⏪⏩\-¯∧∨⊻÷%🔎🚧⬅➡⏭️➡️↘️🐋🐳][^\(\)\[\]\{\}λ←→=≢≡'🐢🐘🍴✊🐈↖⤵️+🪣×🔓*√🦆≠<≤>≥☎️🔭⏪⏩¯∧∨⊻÷%🔎🚧⬅➡⏭️➡️↘️🐋🐳\s]*", priority = 7)] + #[regex(r"[^\s\(\)\[\]\{\}⎬0-9λ'\-←→=≡+×\|*√<\-¯∧∨⊻÷%]", priority = 7, callback = |lex| { + EMOJI.is_match(lex.slice()) + .then_some(logos::Filter::Skip) + .unwrap_or(logos::Filter::Emit(lex.slice())) + })] + #[regex(r"'[^']+'", priority = 8, callback = |lex| &lex.slice()[1..lex.slice().len() - 1])] Ident(&'strings str), #[token("[", chr::<'['>)] #[token("(", chr::<'('>)] @@ -34,12 +41,15 @@ macro_rules! tokens { ClosingBracket(char), $(#[token($z, priority = 8)] $(#[token($y, priority = 8)])? $v,)+ + + Unknown, } impl std::fmt::Display for Token<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { match self { $(Self::$v => write!(f, $z),)+ + Self::Unknown => write!(f, "unknown"), Self::Char(x) => write!(f, "'{x}'"), Self::String(s) => write!(f, "{s}"), Self::Float(n) => write!(f, "{n}"), @@ -55,47 +65,52 @@ macro_rules! tokens { tokens! { "λ" => Lambda, - "←" => Place, - "→" => Ret, + "←" => Ret, + "⎬" => Array, + "→" => Place, "≡" => Eq, - "🐢" => Dup, - "🐘" => Both, - "🍴" => Fork, - "🪣" => Gap, - "✊" => Hold, - "🐈" => Flip, - "🦆" => Duck, - "↖" => Reverse, + "^" => Dup, + // "🐘" => Both, + "&" => And, + "|" => Both, + "🔀" => Flip, "⤵️" => Zap, + + "⬇" => With, + "⬆" => Merge, + "⏫" => Range, "+" => Add, "-" => Sub, "×" => Mul, "*" => Pow, "√" => Sqrt, - "≢" => Ne, "<" => Lt, - "≤" => Le, ">" => Gt, + "≤" => Le, "≥" => Ge, + "🪪" => Type, + "📏" => Length, + "👩👩👧👧" => Group, "⏪" => Shl, "⏩" => Shr, "¯" => Neg, - "∧" => And, + "📶" => Sort, + "∧" => BitAnd, "∨" => Or, - "⊻" => Xor, + "⊕" => Xor, "÷" => Div, "%" => Mod, - "🔓" => Keep, + "🔓" => Mask, + "🔒" => Index, "🚧" => Split, "⬅" => First, "➡" => Last, - "⏭️" => Each, - "➡️" => Reduce, - "↘️" => ReduceStack, - "⬆️" => Range, + "↘️" => Reduce, + "🗺" => Map, "🐋" => If, - "🐳" => Else, - "☎️" => Call, + "🐬" => EagerIf, + "🇳🇿" => Zip, + "." => Call, } @@ -116,10 +131,11 @@ impl<'s> Iterator for Lexer<'s> { type Item = (Token<'s>, SimpleSpan<usize>); fn next(&mut self) -> Option<Self::Item> { - self.inner.find_map(|(x, s)| match x.ok()? { - Token::Comment(_) => None, - x => Some((x, SimpleSpan::new(s.start, s.end))), - }) + self.inner + .find_map(|(x, s)| match x.unwrap_or(Token::Unknown) { + Token::Comment(_) => None, + x => Some((x, SimpleSpan::new((), s))), + }) } } @@ -135,13 +151,6 @@ fn lexer() { 10×+ ) - 🐢≠'\n'🚧 - / run function on all values, pushing to the stack / - ⏭️line - / reduce the stack / - ↘️+ - - true 🐋 (+ 🐳 -) / if true { + } else { - } /"#); // while let Some((x, _)) = lex.next() { // print!("{x} "); @@ -152,5 +161,4 @@ fn lexer() { assert_eq!(lex.next(), None); }} } - test! [String("1abc25hriwm4") Ident("line") Place Lambda OpeningBracket('(') Char('0') Gt Keep Char('9') Lt Keep Char('9') Sub Both First Last Int(10u64) Mul Add ClosingBracket(')') Dup Ne Ident("\\n") Split Each Ident("line") ReduceStack Add Ident("true") If OpeningBracket('(') Add Else Sub ClosingBracket(')') ] } diff --git a/src/main.rs b/src/main.rs index 4499d9b..2efcf49 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,13 @@ -#![feature(iter_intersperse)] +#![feature(iter_intersperse, formatting_options)] + +use parser::types::Ast; mod array; mod lexer; mod parser; mod ui; fn main() { - println!("Hello, world!"); + parser::parse_s( + &std::fs::read_to_string(std::env::args().nth(1).unwrap()).unwrap(), + Ast::parse(), + ); } diff --git a/src/parser.rs b/src/parser.rs index e90f11e..b1cf87d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,10 +1,6 @@ pub mod types; use crate::lexer::{Lexer, Token}; -use chumsky::{ - input::{SpannedInput, Stream}, - prelude::*, - Parser, -}; +use chumsky::{input::Stream, prelude::*, Parser}; pub mod fun; pub mod util; use types::*; @@ -15,6 +11,7 @@ use self::fun::Function; impl<'s> Value<'s> { pub fn parse() -> parser![Self] { select! { + Token::Char(x) => Value::Int(x as _), Token::Int(x) => Value::Int(x), Token::Float(x) => Value::Float(x), Token::String(s) => Value::String(s), @@ -26,80 +23,48 @@ impl<'s> Value<'s> { impl<'s> Expr<'s> { pub fn parse() -> parser![Self] { recursive::<_, Expr, _, _, _>(|expr| { - let inline_expr = recursive(|inline_expr| { - let val = select! { - Token::Int(x) => Expr::Value(Value::Int(x)), - Token::Float(x) => Expr::Value(Value::Float(x)), - Token::String(s) => Expr::Value(Value::String(s)), - } - .labelled("value"); - - choice((t![ident].map(Expr::Ident), val)).boxed() - }); + let inline_expr = Value::parse().map(Expr::Value); let λ = Λ::parse(expr.clone()); - let decl = t![ident] - .then_ignore(t![<-]) - .then(inline_expr.clone().or(λ.clone().map(Expr::Lambda))) - .map(|(name, body)| Expr::Let { - name, - rhs: Box::new(body), - }) - .labelled("declare") - .boxed(); - - let r#if = t![if] - .ignore_then( - expr.clone() - .then(t![else].or_not().ignore_then(expr.clone().or_not())) - .delimited_by(t!['('], t![')']), - ) - .map(|(a, b)| Expr::If { - then: Box::new(a), - or: Box::new(b.unwrap_or_else(|| Expr::Value(Value::Unit))), - }) - .labelled("🐋") - .boxed(); choice(( - decl, - r#if, inline_expr, - λ.clone().map(Expr::Lambda), - Function::parse(λ).map(Expr::Function), + Function::parse(λ.clone()).map(Expr::Function), + λ.map(Expr::Lambda), )) .labelled("expr") }) } } +impl<'s> Ast<'s> { + pub fn parse() -> parser![Self] { + Expr::parse().repeated().collect().map(Ast::Module) + } +} #[test] fn parse_expr() { - parse_s("a ← λ ( +- 🍴 )", Expr::parse()); - // dbg!(Expr::parse().parse(code("a ← λ ( +- 🍴 )")).unwrap()); + // parse_s("a ← λ ( +-🍴 )", Expr::parse()); + let src = r#"⏫⏫⏫ + "#; + println!( + "{:?}", + crate::lexer::lex(src).map(|x| x.0).collect::<Vec<_>>() + ); + parse_s(src, Ast::parse()); } -pub fn stream(lexer: Lexer<'_>, len: usize) -> SpannedInput<Token<'_>, Span, Stream<Lexer<'_>>> { - Stream::from_iter(lexer).spanned((len..len).into()) +pub fn stream(lexer: Lexer<'_>, len: usize) -> types::Input<'_> { + Stream::from_iter(lexer).map(SimpleSpan::new((), len..len), |x| x) } -#[cfg(test)] -pub fn code<'s>(x: &'s str) -> SpannedInput<Token<'s>, Span, Stream<Lexer<'s>>> { +pub fn code<'s>(x: &'s str) -> types::Input<'s> { stream(crate::lexer::lex(x), x.len()) } -#[cfg(test)] pub fn parse_s<'s, T: std::fmt::Debug>(x: &'s str, p: parser![T]) -> T { match crate::ui::display(p.parse(code(x)).into_result(), x) { Ok(x) => dbg!(x), Err(()) => panic!(), } } - -pub fn parse(tokens: Lexer<'_>, len: usize) -> Result<Ast<'_>, Vec<Error<'_>>> { - parser().parse(stream(tokens, len)).into_result() -} - -fn parser<'s>() -> parser![Ast<'s>] { - Expr::parse().repeated().collect().map(Ast::Module) -} diff --git a/src/parser/fun.rs b/src/parser/fun.rs index 3a4c01c..a763a81 100644 --- a/src/parser/fun.rs +++ b/src/parser/fun.rs @@ -4,55 +4,67 @@ use crate::lexer::Token; use chumsky::{prelude::*, Parser}; #[derive(Debug, Clone)] +enum NumberΛ<'s> { + Number(u64), + Λ(Λ<'s>), +} + +#[derive(Debug, Clone)] pub enum Function<'s> { + Both(Λ<'s>), + And(Λ<'s>, Λ<'s>), + If { then: Λ<'s>, or: Λ<'s> }, + Array(Option<NumberΛ<'s>>), + Map(Λ<'s>), Dup, - Both(Λ<'s>, Λ<'s>), - Fork(Λ<'s>, Λ<'s>), - Gap(Λ<'s>), - Hold(Λ<'s>), Flip, - Duck(Λ<'s>), + Eq, Reverse, Zap, Add, Sub, + Not, Mul, Pow, + Type, + Merge, Sqrt, - Ne, Lt, - Le, Gt, Ge, + Le, Shl, Shr, Neg, - And, + BitAnd, + Length, Or, Xor, Div, Mod, - Keep, + Index, + Mask, + Group(Λ<'s>), Split, First, Last, - Each(Λ<'s>), Reduce(Λ<'s>), - ReduceStack(Λ<'s>), Range, + With, Call, + Sort, + Zip, + Ident(&'s str), + Define(&'s str), } impl<'s> Λ<'s> { pub fn parse(exp: parser![Expr<'s>]) -> parser![Self] { - let mut λ = Recursive::declare(); - λ.define(choice(( - t![λ] - .ignore_then(exp.repeated().collect().delimited_by(t!['('], t![')'])) - .map(|x| Self(x)), - Function::parse(λ.clone()).map(|x| Λ(vec![Expr::Function(x)])), - ))); - λ.labelled("λ") + exp.repeated() + .collect() + .delimited_by(t!['('], t![')']) + .map(|x| Self(x)) + .labelled("lambda") } } @@ -62,43 +74,106 @@ impl<'s> Function<'s> { let basic = select! { Token::Dup => Dup, Token::Flip => Flip, - Token::Reverse => Reverse, + // Token::Reverse => Reverse, Token::Zap => Zap, Token::Add => Add, Token::Sub => Sub, Token::Mul => Mul, Token::Pow => Pow, Token::Sqrt => Sqrt, - Token::Ne => Ne, Token::Lt => Lt, - Token::Le => Le, - Token::Gt => Gt, - Token::Ge => Ge, + Token::Index => Index, + Token::Merge => Merge, Token::Shl => Shl, Token::Shr => Shr, Token::Neg => Neg, - Token::And => And, + Token::Eq => Eq, + Token::Gt => Gt, + Token::Ge => Ge, + Token::Length => Length, + Token::Range => Range, + Token::Le => Le, + Token::BitAnd => BitAnd, Token::Or => Or, Token::Xor => Xor, + Token::Sort => Sort, + Token::Zip => Zip, Token::Div => Div, Token::Mod => Mod, - Token::Keep => Keep, + Token::Mask => Mask, + Token::With => With, Token::Split => Split, Token::First => First, + Token::Type => Type, Token::Last => Last, - }; + Token::Ident(x) => Ident(x), + } + .labelled("token"); + + let fn_param = choice(( + basic + .map(|x| Λ(vec![Expr::Function(x)])) + .labelled("function"), + λ.clone(), + )) + .labelled("operand"); + + macro_rules! one { + ($name:ident) => { + fn_param + .clone() + .then_ignore(just(Token::$name)) + .map($name) + .labelled(stringify!($name)) + }; + } macro_rules! two { - ($name:ident) => {{ - let mut p = Recursive::declare(); - p.define( - λ.clone() - .then(λ.clone()) - .then_ignore(just(Token::$name)) - .map(|(a, b)| $name(a, b)), - ); - p - }}; + ($name:ident) => { + fn_param + .clone() + .then(fn_param.clone()) + .then_ignore(just(Token::$name)) + .map(|(a, b)| $name(a, b)) + .labelled(stringify!($name)) + }; } - choice((basic, two![Both], two![Fork])) + choice(( + two![And], + one![Both], + one![Reduce], + one![Map], + λ.clone().then_ignore(just(Token::Group)).map(Group), + just(Token::Array) + .ignore_then( + fn_param + .clone() + .map(NumberΛ::Λ) + .or(select! { Token::Int(x) => NumberΛ::Number(x)}), + ) + .map(Some) + .map(Array) + .labelled("array") + .boxed(), + fn_param + .clone() + .then(fn_param.clone()) + .then_ignore(just(Token::If)) + .map(|(then, or)| If { then, or }) + .labelled("if-else") + .boxed(), + fn_param + .clone() + .then_ignore(just(Token::EagerIf).labelled("if")) + .map(|then| If { + then, + or: Λ::default(), + }) + .labelled("if") + .boxed(), + t![->].ignore_then(t![ident]).map(Define).labelled("def"), + basic, + )) + .boxed() + .labelled("function") } } diff --git a/src/parser/types.rs b/src/parser/types.rs index e050890..b9197e6 100644 --- a/src/parser/types.rs +++ b/src/parser/types.rs @@ -1,22 +1,49 @@ -use std::ops::Deref; +use std::{ + fmt::{Debug, FormattingOptions}, + ops::Deref, +}; use crate::lexer::Token; use beef::lean::Cow; use chumsky::{ - input::{SpannedInput, Stream}, + input::{MappedInput, Stream}, prelude::*, }; -use match_deref::match_deref; pub type Span = SimpleSpan<usize>; pub type Error<'s> = Rich<'s, Token<'s>, Span>; -pub type Input<'s> = SpannedInput<Token<'s>, SimpleSpan, Stream<crate::lexer::Lexer<'s>>>; - +pub type Input<'s> = MappedInput< + Token<'s>, + Span, + Stream<crate::lexer::Lexer<'s>>, + fn((Token<'_>, SimpleSpan)) -> (Token<'_>, SimpleSpan), +>; + +#[derive(Debug)] pub enum Ast<'s> { Module(Vec<Expr<'s>>), } -#[derive(Clone, Debug)] +#[derive(Clone, Default)] pub struct Λ<'s>(pub Vec<Expr<'s>>); +impl std::fmt::Debug for Λ<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &*self.0 { + [] => write!(f, "λ()"), + [a] => f.write_fmt(format_args!("λ({a:?})")), + x => { + if x.len() < 5 { + f.write_fmt(format_args!("λ({x:?})")) + } else { + write!(f, "λ")?; + f.with_options(*FormattingOptions::new().alternate(true)) + .debug_list() + .entries(&self.0) + .finish() + } + } + } + } +} #[derive(Clone)] pub enum Value<'s> { @@ -29,29 +56,34 @@ pub enum Value<'s> { impl std::fmt::Debug for Value<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Float(x) => write!(f, "{x}f"), - Self::Int(x) => write!(f, "{x}i"), + Self::Float(x) => write!(f, "{x}"), + Self::Int(x) => write!(f, "{x}"), Self::String(x) => write!(f, "\"{x}\""), Self::Unit => write!(f, "()"), } } } -#[derive(Clone, Debug)] +impl std::fmt::Debug for Expr<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NoOp => write!(f, "nop"), + Self::Function(x) => x.fmt(f), + Self::Value(x) => x.fmt(f), + Self::Ident(x) => x.fmt(f), + Self::Lambda(x) => x.fmt(f), + Self::Let { name, rhs } => write!(f, "let({rhs:?} -> {name})"), + } + } +} +#[derive(Clone)] pub enum Expr<'s> { NoOp, Function(super::fun::Function<'s>), Value(Value<'s>), Ident(&'s str), Lambda(Λ<'s>), - Let { - name: &'s str, - rhs: Box<Expr<'s>>, - }, - If { - then: Box<Expr<'s>>, - or: Box<Expr<'s>>, - }, + Let { name: &'s str, rhs: Box<Expr<'s>> }, } #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] @@ -79,7 +111,7 @@ impl<T> Spanned<T> { pub fn dummy(inner: T) -> Spanned<T> { Spanned { inner, - span: SimpleSpan::new(0, 0), + span: SimpleSpan::new((), 0..0), } } diff --git a/src/parser/util.rs b/src/parser/util.rs index 42ff5d2..d01d405 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -26,7 +26,7 @@ macro_rules! t { just(Token::Colon) }; (->) => { - just(Token::ThinArrow) + just(Token::Place) }; (()) => { just(Token::Call) @@ -1,6 +1,13 @@ use crate::parser::types::Error; use chumsky::{error::RichReason, prelude::*}; +use codespan_reporting::diagnostic::LabelStyle::{Primary, Secondary}; +use codespan_reporting::diagnostic::{Diagnostic, Label}; +use codespan_reporting::files::SimpleFiles; +use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; + +use codespan_reporting::term::Chars; use comat::cformat as cmt; +use itertools::Itertools; pub fn display<T>(result: Result<T, Vec<Error>>, code: &str) -> Result<T, ()> { let e = match result { @@ -8,35 +15,60 @@ pub fn display<T>(result: Result<T, Vec<Error>>, code: &str) -> Result<T, ()> { Err(e) => e, }; + let mut files = SimpleFiles::new(); + files.add("x.kale", code); + for e in e.into_iter().map(|e| e.map_token(|c| c.to_string())) { - let mut o = lerr::Error::new(code); - o.label((e.span().into_range(), "here")); + let mut d = Diagnostic::<usize>::new(codespan_reporting::diagnostic::Severity::Error); + // let mut o = lerr::Error::new(code); + d = d.with_label(Label { + style: Primary, + file_id: 0, + range: e.span().into_range(), + message: "here".into(), + }); + // o.label((e.span().into_range(), "here")); match e.reason() { RichReason::Custom(x) => { - o.message(cmt!("{red}error{reset}: {x}")); + d = d.with_message(cmt!("{red}error{reset}: {x}")); + // o.message(cmt!("{red}error{reset}: {x}")); } RichReason::ExpectedFound { .. } => { - o.message(cmt!("{red}error{reset}: {e}")); - } - RichReason::Many(x) => { - match &x[..] { - [x, rest @ ..] => { - o.message(cmt!("{red}error{reset}: {x}")); - for elem in rest { - o.note(cmt!("{yellow}also{reset}: {elem}")); - } - } - _ => unreachable!(), - }; - } + d = d.with_message(format!("{e}")); + // o.message(cmt!("{red}error{reset}: {e}")); + } // RichReason::Many(x) => { + // match &x[..] { + // [x, rest @ ..] => { + // o.message(cmt!("{red}error{reset}: {x}")); + // for elem in rest { + // o.note(cmt!("{yellow}also{reset}: {elem}")); + // } + // } + // _ => unreachable!(), + // }; + // } } + dbg!(e.contexts().collect::<Vec<_>>()); + for (l, span) in e.contexts() { - o.label(( - span.into_range(), - cmt!("{yellow}while parsing this{reset}: {l}"), - )); + d = d.with_label(Label { + style: Secondary, + file_id: 0, + range: span.into_range(), + message: cmt!("{yellow}while parsing this{reset}: {l}"), + }) + // o.label(( + // span.into_range(), + // cmt!("{yellow}while parsing this{reset}: {l}"), + // )); } - eprintln!("{o}"); + // eprintln!("{o}"); + + let writer = StandardStream::stderr(ColorChoice::Always); + let mut config = codespan_reporting::term::Config::default(); + config.chars = Chars::box_drawing(); + + codespan_reporting::term::emit(&mut writer.lock(), &config, &files, &d).unwrap(); } Err(()) } @@ -1,26 +1,29 @@ "1abc25hriwm4" -// usage: 0 🐢 λ(<5) λ(1+) 🐬☎️ -🐬 ← λ ( - / evaluate the condition / - λ(🐢🪣☎️)✊✊ - 🐋 ( - / evaluate the body / - 🐢☎️ - / recurse / - 🐬☎️ - ) -) -// { str → int } -line ← λ ( - '0'>🔓'9'<🔓 - '9'- - ⬅➡🍴 - 10×+ -) +/ { str → int } / +( + (('0'≥)🗺) + (('9'≤)🗺)&∧ + 🔓0 1¯🔒 + ⬇10×+⬆ +) → l + +(^0<¯🐬) → ⟐ + +/ [x] -> [x] int / +(^(1)🗺+↘️) → '📏' -🐢≢'\n'🚧 / run function on all values, pushing to the stack / ⏭️line / reduce the stack / -↘️+ ++↘️ + +("\n"≢)👩👩👧👧 l🗺 + +(📶|🇳🇿(-⟐)🗺+↘️) → a +( + 0 1 🔒 < → s + 2🪟 (s-(🔀-)🐋 (≥1)(≤3)&s∧!)🗺+↘️0≡ +) → 'd2p1' + +(📏1+⏫( / array, int / ^🇽 'd2p1')🗺+↘️0≢) → 'd2p2' |