smol lang
basic parsing
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | Cargo.toml | 13 | ||||
| -rw-r--r-- | draft.kl | 109 | ||||
| -rw-r--r-- | src/lexer.rs | 271 | ||||
| -rw-r--r-- | src/main.rs | 52 | ||||
| -rw-r--r-- | src/parser.rs | 339 | ||||
| -rw-r--r-- | src/parser/types.rs | 183 | ||||
| -rw-r--r-- | src/parser/util.rs | 124 |
8 files changed, 1094 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d292c90 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +Cargo.lock +*.kl diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..845a3d4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "klunk" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +beef = "0.5.2" +chumsky = { git = "https://github.com/zesterer/chumsky", version = "1.0.0-alpha.6", features = ["label", "nightly"] } +comat = "0.1.3" +lerr = "0.1.5" +logos = "0.13.0" diff --git a/draft.kl b/draft.kl new file mode 100644 index 0000000..2f3b5c7 --- /dev/null +++ b/draft.kl @@ -0,0 +1,109 @@ +sint: typeset = { i8, i16, i32, i64 } +uint: typeset = { u8, u16, u32, u64 } +int: typeset = sint ∪ uint; +float: typeset = { f32, f64 }; +numeric: typeset = integer ∪ float; + +mod intrinsics { + eq ((a: T, b: T) -> bool) { compiler_defined } [prefix, infix] + and ((a: T, b: T) -> T) { compiler_defined } [prefix, infix, T ∈ int] + or ((a: T, b: T) -> T) { compiler_defined } [prefix, infix, T ∈ int] + // .. +} + +enum bool { true, false } + +each (x: set[T] -> for[T]) { idk } [prefix] +each (x: typeset[T] -> for[T]) { idk } [prefix] +each (x: slice[T] -> for[T]) { idk } [prefix] +each (x: vec[T] -> for[T]) { idk } [prefix] +each (x: array[T, N] -> for[T]) { idk } [prefix, N ∈ ℤ] + +¬ (x: T -> bool) [alias { !, not }, prefix { tighter_than all }] +- (x: T -> T) [alias neg, prefix { like ¬ }] + +× ((a: T, b: T) -> T) [alias { *, ⋅, mul }, infix { associativity <, looser_than ¬ }] +÷ ((a: T, b: T) -> T) [alias { /, div }, infix { like × }] +rem ((a: T, b: T) -> T) [infix { like × }] +mod ((a: T, b: T) -> T) [infix { like × }] +∣ ((a: T, b: T) -> T) { a mod b == 0 } [infix { like × }] +∤ ((a: T, b: T) -> T) { ¬(a ∣ b) } [infix { like × }] + ++ ((a: T, b: T) -> T) [alias add, infix { associativity <, looser_than × }] +- ((a: T, b: T) -> T) [alias { −, sub }, infix { like + }] + +« ((a: T, b: T) -> T) [alias { <<, shl }, infix { associativity <, looser_than + }] +» ((a: T, b: T) -> T) [alias { >>, shr }, infix { like « }] + +∧ ((a: bool, b: bool) -> bool) { a & b } [infix { associativity <, looser_than « }] +& ((a: T, b: T) -> T) [alias and, infix { like ∧ }] +impl bool { + ∧ ((a, b: λ(() -> me)) -> me) { + match a { + true => b (), + false => false, + } + } +} + +^ ((a: T, b: T) -> T) [alias xor, infix { associativity <, looser_than & }] + +∨ ((a: bool, b: bool) -> bool) { a | b } [infix { associativity <, looser_than ^}] +impl bool { + ∨ ((a, b: λ(() -> me)) -> me) { + match a { + true => true, + false => b (), + } + } +} +| ((a: T, b: T) -> T) [alias or, infix { like ∨ }] + +≡ ((a: T, b: T) -> bool) [alias { ==, eq }, infix { associativity none, looser_than | }] +≢ ((a: T, b: T) -> bool) { ¬(a ≡ b) } [alias { ≠, !=, ne }, infix { like ≡ }] +> ((a: T, b: T) -> bool) [alias { gt }, infix { like ≡ }] +< ((a: T, b: T) -> bool) [alias { lt }, infix { like ≡ }] +≤ ((a: T, b: T) -> bool) [alias { <=, ≯, le }, infix { like ≡ }] +≥ ((a: T, b: T) -> bool) [alias { >=, ≮, ge }, infix { like ≡ }] + +impl each int { + ¬ (a -> me) { intrinsics::complement a } + - (a -> me) { intrinsics::negate a } + × ((a, b) -> me) { a intrinsics::mul b } + ÷ ((a, b) -> me) { a intrinsics::div b } + rem ((a, b) -> me) { a intrinsics::rem b } + .. + ≡ ((a, b) -> bool) { a intrinsics::eq b } + & ((a, b) -> me) { a intrinsics::bitand b } +} + +impl each uint { + mod ((a, b) -> me) rem +} + +impl each sint { + abs ((x) -> me) { + match x { + ..0 => x × -1, + 0.. => x, + } + } + mod ((a, b) -> me) { + match a % b { + x @ ..0 => x + abs x, + x @ 0.. => x, + } + } +} + +mod iterators { + next (me => T) [postfix] + map ((i: T, fn: λ (T -> U)) -> mapper[T, U]) { mapper { inner: i, fn } } [postfix] + + struct mapper { + inner: T, + fn: λ (T -> U) + } + + next (me { inner, fn }: mapper[T] -> U) { fn (inner next) } +} diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..a3f8c60 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,271 @@ +use beef::lean::Cow; +use chumsky::span::SimpleSpan; +use logos::{Lexer as RealLexer, Logos, SpannedIter}; + +macro_rules! tokens { + ($($z:literal $( | $y:literal)? => $v:ident,)+) => { + #[derive(Logos, Debug, PartialEq, Clone)] + #[logos(skip r"[\n\s]+")] + pub enum Token<'strings> { + #[regex("//[^\n]+", priority = 8)] + // #[regex(r"/\*[\s\S]+\*/", priority = 8)] + Comment(&'strings str), + #[regex(r"[0-9]+", |lex| lex.slice().parse().ok())] + #[regex(r"0[xX][0-9a-fA-F]+", |lex| u64::from_str_radix(&lex.slice()[2..], 16).ok())] + #[regex(r"0[bB][01]+", |lex| u64::from_str_radix(&lex.slice()[2..], 2).ok())] + Int(u64), + #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse().ok())] + Float(f64), + #[regex(r#""([^\\"\n])*""#, callback = |lex| Cow::from(&lex.slice()[1..lex.slice().len()-1]), priority = 12)] + #[regex(r#""[^"]*""#, callback = |lex| Cow::from(lex.slice()[1..lex.slice().len()-1].replace(r"\n", "\n")), priority = 8)] + String(Cow<'strings, str>), + #[regex(r"[a-z_α-ωA-Z]['̇A-Za-z0-9_α-ω]*", priority = 7)] + Ident(&'strings str), + #[regex(r"[^\{\[\(\)\]\}:λ0-9,\?\s][^'̇\?\{\[\(\)\]\},:\s]*", priority = 6)] + FnIdent(&'strings str), + + #[token("{", chr::<'{'>)] + #[token("[", chr::<'['>)] + #[token("(", chr::<'('>)] + OpeningBracket(char), + #[token("}", chr::<'}'>)] + #[token("]", chr::<']'>)] + #[token(")", chr::<')'>)] + ClosingBracket(char), + + $(#[token($z, priority = 8)] $(#[token($y, priority = 8)])? $v,)+ + } + + impl std::fmt::Display for Token<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + match self { + $(Self::$v => write!(f, $z),)+ + Self::FnIdent(s) | Self::Ident(s) | Self::Comment(s) => write!(f, "{s}"), + Self::String(s) => write!(f, "{s}"), + Self::Float(n) => write!(f, "{n}"), + Self::Int(n) => write!(f, "{n}"), + Self::OpeningBracket(x) | Self::ClosingBracket(x) => write!(f,"{x}"), + } + } + } + } +} + +tokens! { + "mut" => Mut, + "let" => Let, + "static" => Static, + "impl" => Impl, + "mod" => Mod, + "match" => Match, + "for" => For, + "break" => Break, + "enum" => Enum, + "union" => Union, + "pub" => Public, + "typeck" => TypeCheck, + "struct" => Struct, + "if" => If, + "else" => Else, + "=>" | "⇒" => FatArrow, + "->" | "→" => ThinArrow, + "," => Comma, + ":" => Colon, + ";" => Semicolon, + "::" | "∷" => Access, + "=" => Equal, + "λ" => Lamba, + "()" => Unit, + "prefix" => Prefix, + "infix" => Infix, + "postfix" => Postfix, + "alias" => Alias, + "associativity" => Associativity, + "looser_than" => LooserThan, + "tighter_than" => TighterThan, + "like" => Like, +} + +pub fn lex(s: &str) -> Lexer { + Lexer { + inner: Token::lexer(s).spanned(), + } +} + +fn chr<'src, const CHR: char>(_: &mut RealLexer<'src, Token<'src>>) -> Result<char, ()> { + Ok(CHR) +} +pub struct Lexer<'s> { + inner: SpannedIter<'s, Token<'s>>, +} + +impl<'s> Iterator for Lexer<'s> { + type Item = (Token<'s>, SimpleSpan<usize>); + + fn next(&mut self) -> Option<Self::Item> { + self.inner.find_map(|(x, s)| match x.ok()? { + Token::Comment(_) => None, + x => Some((x, SimpleSpan::new(s.start, s.end))), + }) + } +} + +#[test] +fn lexer() { + let mut lex = lex(r#" +let sint: typeset = { i8, i16 } +- (x: T -> T) [alias neg, prefix { like ¬ }] +mod intrinsics { + or ((a: T, b: T) -> T) { compiler_defined } [prefix, infix, T ∈ int] +} +enum bool { true, false } +∧ ((a: bool, b: bool) -> bool) { a & b } [infix { associativity <, looser_than « }] +impl bool { + ∧ ((a, b: λ(() -> me)) -> me) { + match a { + true => b (), + false => false, + } + } +} + "#); + // while let Some(x) = lex.next() { print!("{x} "); } + macro_rules! test { + ($($tok:ident$(($var:literal))?)+) => {{ + $(assert_eq!(lex.next().map(|(x,_)|x), Some(Token::$tok$(($var.into()))?));)+ + assert_eq!(lex.next(), None); + }} + } + test! [ + Let + Ident("sint") + Colon + Ident("typeset") + Equal + OpeningBracket('{') + Ident("i8") + Comma + Ident("i16") + ClosingBracket('}') + FnIdent("-") + OpeningBracket('(') + Ident("x") + Colon + FnIdent("T") + ThinArrow + FnIdent("T") + ClosingBracket(')') + OpeningBracket('[') + Alias + Ident("neg") + Comma + Prefix + OpeningBracket('{') + Like + FnIdent("¬") + ClosingBracket('}') + ClosingBracket(']') + Mod + Ident("intrinsics") + OpeningBracket('{') + Ident("or") + OpeningBracket('(') + OpeningBracket('(') + Ident("a") + Colon + FnIdent("T") + Comma + Ident("b") + Colon + FnIdent("T") + ClosingBracket(')') + ThinArrow + FnIdent("T") + ClosingBracket(')') + OpeningBracket('{') + Ident("compiler_defined") + ClosingBracket('}') + OpeningBracket('[') + Prefix + Comma + Infix + Comma + FnIdent("T") + FnIdent("∈") + Ident("int") + ClosingBracket(']') + ClosingBracket('}') + Enum + Ident("bool") + OpeningBracket('{') + Ident("true") + Comma + Ident("false") + ClosingBracket('}') + FnIdent("∧") + OpeningBracket('(') + OpeningBracket('(') + Ident("a") + Colon + Ident("bool") + Comma + Ident("b") + Colon + Ident("bool") + ClosingBracket(')') + ThinArrow + Ident("bool") + ClosingBracket(')') + OpeningBracket('{') + Ident("a") + FnIdent("&") + Ident("b") + ClosingBracket('}') + OpeningBracket('[') + Infix + OpeningBracket('{') + Associativity + FnIdent("<") + Comma + LooserThan + FnIdent("«") + ClosingBracket('}') + ClosingBracket(']') + Impl + Ident("bool") + OpeningBracket('{') + FnIdent("∧") + OpeningBracket('(') + OpeningBracket('(') + Ident("a") + Comma + Ident("b") + Colon + Lamba + OpeningBracket('(') + Unit + ThinArrow + Ident("me") + ClosingBracket(')') + ClosingBracket(')') + ThinArrow + Ident("me") + ClosingBracket(')') + OpeningBracket('{') + Match + Ident("a") + OpeningBracket('{') + Ident("true") + FatArrow + Ident("b") + Unit + Comma + Ident("false") + FatArrow + Ident("false") + Comma + ClosingBracket('}') + ClosingBracket('}') + ClosingBracket('}') + ] +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..2ad7194 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,52 @@ +#![feature(iter_intersperse)] +use chumsky::error::RichReason; +use comat::cformat as cmt; +use std::process::ExitCode; +mod lexer; +mod parser; + +fn main() -> ExitCode { + let Some(Ok(code)) = std::env::args().nth(1).map(std::fs::read_to_string) else { + comat::cprintln!("{bold_red}error{reset}: where file?"); + return ExitCode::FAILURE; + }; + let lexer = lexer::lex(&code); + match parser::parse(lexer, code.len()) { + Ok(x) => dbg!(x), + Err(e) => { + for e in e.into_iter().map(|e| e.map_token(|c| c.to_string())) { + let mut o = lerr::Error::new(&code); + o.label((e.span().into_range(), "here")); + match e.reason() { + RichReason::Custom(x) => { + o.message(cmt!("{red}error{reset}: {x}")); + } + RichReason::ExpectedFound { .. } => { + o.message(cmt!("{red}error{reset}: {e}")); + } + RichReason::Many(x) => { + match &x[..] { + [x, rest @ ..] => { + o.message(cmt!("{red}error{reset}: {x}")); + for elem in rest { + o.note(cmt!("{yellow}also{reset}: {elem}")); + } + } + _ => unreachable!(), + }; + } + } + for (l, span) in e.contexts() { + o.label(( + span.into_range(), + cmt!("{yellow}while parsing this{reset}: {l}"), + )); + } + eprintln!("{o}"); + } + return ExitCode::FAILURE; + } + }; + + ExitCode::SUCCESS +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..18aa536 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,339 @@ +use crate::lexer::{Lexer, Token}; +use chumsky::{ + input::{SpannedInput, Stream}, + prelude::*, + Parser, +}; +mod types; +mod util; +use types::*; +use util::*; + +impl<'s> FixMetaData<'s> { + pub fn from_pieces( + x: &[Spanned<FixMetaPiece<'s>>], + fixness: Fix, + e: Span, + ) -> Result<Self, Error<'s>> { + let mut looser_than = None; + let mut tighter_than = None; + let mut assoc = None; + for &(x, span) in x { + match x { + FixMetaPiece::A(_) if assoc.is_some() => { + return Err(Rich::custom(span, "duplicate associatity meta elements")) + } + FixMetaPiece::L(_) if looser_than.is_some() => { + return Err(Rich::custom(span, "duplicate loosseness meta elements")) + } + FixMetaPiece::T(_) if tighter_than.is_some() => { + return Err(Rich::custom(span, "duplicate tightness meta elements")) + } + FixMetaPiece::A(x) => assoc = Some(x), + FixMetaPiece::L(x) => looser_than = Some(x), + FixMetaPiece::T(x) => tighter_than = Some(x), + } + } + + Ok(FixMetaData { + looser_than, + tighter_than, + fixness, + assoc, + }) + .and_then(|x| match x.looser_than.empty().or(x.tighter_than.empty()) { + None => Err(Rich::custom(e, "precedence meta required")), + Some(()) => Ok(x), + }) + } +} + +fn expr<'s>() -> parser![Expr<'s>] { + recursive::<_, Expr, _, _, _>(|expr| { + let inline_expr = recursive(|inline_expr| { + let val = select! { + Token::Unit => Expr::Value(Value::Unit), + Token::Int(x) => Expr::Value(Value::Int(x)), + Token::Float(x) => Expr::Value(Value::Float(x)), + Token::String(s) => Expr::Value(Value::String(s)), + } + .labelled("value"); + + let decl = tok![let] + .ignore_then(tok![ident]) + .then_ignore(tok![=]) + .then(inline_expr.clone()) + .map(|(name, body)| Expr::Let { + name, + rhs: Box::new(body), + }) + .labelled("declare") + .boxed(); + + choice(( + tok![ident].map(Expr::Ident), + decl, + val, + expr.clone().delimited_by(tok![lparen], tok![rparen]), + )) + .boxed() + }); + + let block = expr + .clone() + .delimited_by(tok![lbrace], tok![rbrace]) + .boxed(); + + let r#if = recursive(|if_| { + tok![if] + .ignore_then(expr.clone()) + .then(block.clone()) + .then(tok![else].ignore_then(block.clone().or(if_)).or_not()) + .map(|((cond, a), b)| Expr::If { + cond: Box::new(cond), + when: Box::new(a), + or: Box::new(b.unwrap_or_else(|| Expr::Value(Value::Unit))), + }) + }); + + let block_expr = block.or(r#if); + + let block_chain = block_expr + .clone() + .foldl(block_expr.clone().repeated(), |a, b| { + Expr::Semicolon(Box::new(a), Box::new(b)) + }); + + block_chain.labelled("block").or(inline_expr.clone()).foldl( + tok![;].ignore_then(expr.or_not()).repeated(), + |a, b| { + Expr::Semicolon( + Box::new(a), + Box::new(b.unwrap_or_else(|| Expr::Value(Value::Unit))), + ) + }, + ) + }) +} + +impl<'s> Type<'s> { + pub fn parse() -> parser![Type<'s>] { + recursive(|ty| { + let tuple = ty + .separated_by(tok![,]) + .allow_trailing() + .collect::<Vec<_>>() + .delimited_by(tok![lparen], tok![rparen]) + .map(|x| Type::Tuple(x.into_boxed_slice())) + .boxed(); + let unit = tok![()].map(|_| Type::Unit); + let path = tok![ident].map(Type::Path); + + choice((path, tuple, unit)) + }) + .labelled("type") + } +} + +#[derive(Debug, Copy, Clone)] +pub enum FixMetaPiece<'s> { + A(Associativity), + L(&'s str), + T(&'s str), +} + +impl<'s> Meta<'s> { + fn fns() -> parser![Vec<&'s str>] { + tok![fnname] + .separated_by(tok![,]) + .collect::<Vec<_>>() + .delimited_by(tok![lbrace].or_not(), tok![rbrace].or_not()) + .labelled("functions") + } + + fn associativity() -> parser![Spanned<Associativity>] { + tok![associativity] + .ignore_then(select! { + Token::FnIdent("<") => Associativity::Left, + Token::Ident("none") => Associativity::None, + Token::FnIdent(">") => Associativity::Right + }) + .map_with(spanned!()) + } + fn looser() -> parser![Spanned<&'s str>] { + tok![looser_than] + .ignore_then(tok![fnname]) + .map_with(spanned!()) + .labelled("looser than") + } + + fn tighter() -> parser![Spanned<&'s str>] { + tok![tighter_than] + .ignore_then(tok![fnname]) + .map_with(spanned!()) + .labelled("tighter than") + } + + fn like() -> parser![&'s str] { + tok![like] + .ignore_then(tok![fnname]) + .delimited_by(tok![lbrace], tok![rbrace]) + .labelled("like") + } + + fn alias() -> parser![Meta<'s>] { + tok![alias] + .ignore_then(Meta::fns()) + .map(Meta::Alias) + .labelled("alias meta") + } + + fn pieces() -> parser![Vec<Spanned<FixMetaPiece<'s>>>] { + use FixMetaPiece::*; + choice(( + Meta::associativity().map(|x| x.ml(A)), + Meta::looser().map(|x| x.ml(L)), + Meta::tighter().map(|x| x.ml(T)), + )) + .separated_by(tok![,]) + .collect::<Vec<_>>() + .labelled("meta pices") + } + + fn prefix() -> parser![Meta<'s>] { + tok![prefix] + .ignore_then(choice(( + Meta::like().map(|x| Meta::Fix(FixMeta::Like(Fix::Pre, x))), + Meta::pieces() + .try_map(|x, e| { + FixMetaData::from_pieces(&x, Fix::Pre, e) + .and_then(|x| match x.assoc { + Some(_) => { + Err(Rich::custom(e, "prefix does not have associativity")) + } + None => Ok(x), + }) + .map(|x| Meta::Fix(FixMeta::Data(x))) + }) + .delimited_by(tok![lbrace], tok![rbrace]), + empty().map(|()| Meta::Fix(FixMeta::Default(Fix::Pre))), + ))) + .labelled("prefix meta") + } + + fn postfix() -> parser![Meta<'s>] { + tok![postfix] + .ignore_then(choice(( + Meta::like().map(|x| Meta::Fix(FixMeta::Like(Fix::Post, x))), + Meta::pieces() + .try_map(|x, e| { + FixMetaData::from_pieces(&x, Fix::Post, e) + .and_then(|x| match x.assoc { + Some(_) => { + Err(Rich::custom(e, "postfix does not have associativity")) + } + None => Ok(x), + }) + .map(|x| Meta::Fix(FixMeta::Data(x))) + }) + .delimited_by(tok![lbrace], tok![rbrace]), + empty().map(|()| Meta::Fix(FixMeta::Default(Fix::Post))), + ))) + .labelled("postfix meta") + } + + fn infix() -> parser![Meta<'s>] { + tok![infix] + .ignore_then(choice(( + Meta::like().map(|x| Meta::Fix(FixMeta::Like(Fix::In, x))), + Meta::pieces() + .try_map(|x, e| { + FixMetaData::from_pieces(&x, Fix::In, e) + .and_then(|x| match x.assoc { + None => Err(Rich::custom(e, "infix requires associativity")), + Some(_) => Ok(x), + }) + .map(|x| Meta::Fix(FixMeta::Data(x))) + }) + .delimited_by(tok![lbrace], tok![rbrace]), + empty().map(|()| Meta::Fix(FixMeta::Default(Fix::In))), + ))) + .labelled("infix meta") + } + + fn parse() -> parser![Vec<Meta<'s>>] { + choice(( + Meta::alias(), + Meta::infix(), + Meta::prefix(), + Meta::postfix(), + )) + .separated_by(tok![,]) + .collect::<Vec<_>>() + .delimited_by(tok![lbrack], tok![rbrack]) + .labelled("meta") + } +} + +impl<'s> FnDef<'s> { + pub fn args() -> parser![(Vec<(Type<'s>, Type<'s>)>, Option<Type<'s>>)] { + Type::parse() + .then_ignore(tok![:]) + .then(Type::parse()) + .separated_by(tok![,]) + .collect::<Vec<_>>() + // note: `(a: _, b: _) -> c` is technically invalid, and should be (a, b): (_, _) -> c, but + .delimited_by(tok![lparen].or_not(), tok![rparen].or_not()) + .then(tok![->].ignore_then(Type::parse()).or_not()) + .labelled("function signature") + } + + fn block() -> parser![Expr<'s>] { + expr() + .clone() + .delimited_by(tok![lbrace], tok![rbrace]) + .labelled("block") + } + + fn parse() -> parser![Self] { + tok![fnname] + .then(Self::args().delimited_by(tok![lparen], tok![rparen])) + .then(Self::block().or_not()) + .then(Meta::parse()) + .map(|(((name, (args, ret)), block), meta)| Self { + name, + args, + ret: ret.unwrap_or(Type::Unit), + block, + meta, + }) + .labelled("function") + } +} + +fn parser<'s>() -> parser![Ast<'s>] { + FnDef::parse() + .map(Stmt::Fn) + .repeated() + .collect() + .map(Ast::Module) +} + +pub fn stream(lexer: Lexer<'_>, len: usize) -> SpannedInput<Token<'_>, Span, Stream<Lexer<'_>>> { + Stream::from_iter(lexer).spanned((len..len).into()) +} + +#[cfg(test)] +pub fn code<'s>(x: &'s str) -> SpannedInput<Token<'s>, Span, Stream<Lexer<'s>>> { + stream(crate::lexer::lex(x), x.len()) +} + +pub fn parse(tokens: Lexer<'_>, len: usize) -> Result<Ast<'_>, Vec<Error<'_>>> { + parser().parse(stream(tokens, len)).into_result() +} + +#[test] +fn fn_test() { + assert_eq!(Meta::fns().parse(code("{ !, not }")).unwrap(), ["!", "not"]); + assert_eq!(Meta::fns().parse(code("!")).unwrap(), ["!"]); +} diff --git a/src/parser/types.rs b/src/parser/types.rs new file mode 100644 index 0000000..f06554a --- /dev/null +++ b/src/parser/types.rs @@ -0,0 +1,183 @@ +use crate::lexer::Token; +use beef::lean::Cow; +use chumsky::prelude::*; +pub type Span = SimpleSpan<usize>; +pub type Error<'s> = Rich<'s, Token<'s>, Span>; + +#[derive(Clone)] +pub struct FnDef<'s> { + pub name: &'s str, + pub args: Vec<(Type<'s>, Type<'s>)>, + pub ret: Type<'s>, + pub block: Option<Expr<'s>>, + pub meta: Vec<Meta<'s>>, +} + +impl std::fmt::Debug for FnDef<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} {:?} -> {:?}", self.name, self.args, self.ret)?; + if let Some(b) = &self.block { + write!(f, " {b:?}")?; + } + write!(f, " {:?}", self.meta) + } +} + +#[derive(Debug, Clone)] +pub enum Stmt<'s> { + Fn(FnDef<'s>), +} + +#[derive(Debug, Clone)] +pub enum Ast<'s> { + Module(Vec<Stmt<'s>>), +} + +#[derive(Clone)] +pub enum Value<'s> { + Float(f64), + Int(u64), + String(Cow<'s, str>), + Unit, +} + +impl std::fmt::Debug for Value<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Float(x) => write!(f, "{x}f"), + Self::Int(x) => write!(f, "{x}i"), + Self::String(x) => write!(f, "\"{x}\""), + Self::Unit => write!(f, "()"), + } + } +} + +#[derive(Clone, Debug, Copy)] +pub enum Associativity { + Left, + Right, + None, +} + +#[derive(Clone, Debug, Copy)] +pub enum Fix { + Pre, + Post, + In, +} + +#[derive(Clone, Copy)] +pub struct FixMetaData<'s> { + pub looser_than: Option<&'s str>, + pub tighter_than: Option<&'s str>, + pub fixness: Fix, + pub assoc: Option<Associativity>, +} + +impl std::fmt::Debug for FixMetaData<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?} {{", self.fixness)?; + if let Some(x) = self.assoc { + write!(f, " assoc {x:?}")?; + } + if let Some(x) = self.looser_than { + write!(f, " looser {x}")?; + } + if let Some(x) = self.tighter_than { + write!(f, " tighter {x}")?; + } + write!(f, " }}") + } +} + +#[derive(Clone, Copy)] +pub enum FixMeta<'s> { + Default(Fix), // function precedence + Like(Fix, &'s str), + Data(FixMetaData<'s>), +} + +impl std::fmt::Debug for FixMeta<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Default(x) => write!(f, "{x:?}"), + Self::Like(x, y) => write!(f, "{x:?} {{ like {y} }}"), + Self::Data(x) => write!(f, "{x:?}"), + } + } +} + +#[derive(Clone)] +pub enum Meta<'s> { + Fix(FixMeta<'s>), + Alias(Vec<&'s str>), +} + +impl std::fmt::Debug for Meta<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Fix(fix) => write!(f, "{fix:?}"), + Self::Alias(what) => write!(f, "alias {what:?}"), + } + } +} + +#[derive(Clone)] +pub enum Expr<'s> { + Value(Value<'s>), + Ident(&'s str), + + Let { + name: &'s str, + rhs: Box<Expr<'s>>, + }, + If { + cond: Box<Expr<'s>>, + when: Box<Expr<'s>>, + or: Box<Expr<'s>>, + }, + Semicolon(Box<Expr<'s>>, Box<Expr<'s>>), + Call(&'s str, Vec<Expr<'s>>), +} + +impl std::fmt::Debug for Expr<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Value(x) => write!(f, "{x:?}"), + Self::Ident(x) => write!(f, "{x}"), + Self::Let { name, rhs } => write!(f, "let {name} = {rhs:?}"), + Self::If { cond, when, or } => { + write!(f, "if {cond:?} {{ {when:?} }} else {{ {or:?} }}") + } + Self::Semicolon(arg0, arg1) => f.debug_list().entries([arg0, arg1]).finish(), + Self::Call(arg, x) => f.debug_tuple("callu").field(arg).field(x).finish(), + } + } +} + +pub type Spanned<T> = (T, Span); + +#[derive(Clone)] +pub enum Type<'s> { + Tuple(Box<[Type<'s>]>), + Path(&'s str), + Unit, +} + +impl std::fmt::Debug for Type<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Tuple(x) => write!( + f, + "{}", + std::iter::once("(".to_string()) + .chain(x.iter().map(|x| format!("{x:?}")).intersperse(", ".into()),) + .chain([")".to_string()]) + .reduce(|acc, x| acc + &x) + .unwrap() + ), + Self::Path(x) => write!(f, "{x}"), + Self::Unit => write!(f, "()"), + } + } +} diff --git a/src/parser/util.rs b/src/parser/util.rs new file mode 100644 index 0000000..744e202 --- /dev/null +++ b/src/parser/util.rs @@ -0,0 +1,124 @@ +use super::types::*; + +macro_rules! tok { + (ident) => { + select! { Token::Ident(ident) => ident }.labelled("ident") + }; + (fnname) => { + select! { Token::Ident(x) | Token::FnIdent(x) => x }.labelled("function name") + }; + (let) => { + just(Token::Let) + }; + (looser_than) => { + just(Token::LooserThan) + }; + (tighter_than) => { + just(Token::TighterThan) + }; + (associativity) => { + just(Token::Associativity) + }; + (if) => { + just(Token::If) + }; + (alias) => { + just(Token::Alias) + }; + (like) => { + just(Token::Like) + }; + (infix) => { + just(Token::Infix) + }; + (prefix) => { + just(Token::Prefix) + }; + (postfix) => { + just(Token::Postfix) + }; + (else) => { + just(Token::Else) + }; + (=) => { + just(Token::Equal) + }; + (;) => { + just(Token::Semicolon) + }; + (,) => { + just(Token::Comma) + }; + (:) => { + just(Token::Colon) + }; + (->) => { + just(Token::ThinArrow) + }; + (()) => { + just(Token::Unit) + }; + (lparen) => { + just(Token::OpeningBracket('(')) + }; + (rparen) => { + just(Token::ClosingBracket(')')) + }; + (lbrack) => { + just(Token::OpeningBracket('[')) + }; + (rbrack) => { + just(Token::ClosingBracket(']')) + }; + (lbrace) => { + just(Token::OpeningBracket('{')) + }; + (rbrace) => { + just(Token::ClosingBracket('}')) + }; +} +macro_rules! parser { + ($t:ty) => { + impl Parser<'s, SpannedInput<Token<'s>, SimpleSpan, Stream<Lexer<'s>>>, $t, extra::Err<Error<'s>>> + Clone + } +} + +macro_rules! spanned { + () => { + |a, extra| (a, extra.span()) + }; +} + +pub(crate) use parser; +pub(crate) use spanned; +pub(crate) use tok; + +pub trait Unit<T> { + fn empty(&self) -> T; +} + +impl<T> Unit<Option<()>> for Option<T> { + fn empty(&self) -> Option<()> { + self.as_ref().map(|_| ()) + } +} + +pub trait Spanner { + fn spun(self, s: Span) -> Spanned<Self> + where + Self: Sized, + { + (self, s) + } +} +impl<T> Spanner for T {} + +pub trait MapLeft<T, V> { + fn ml<U>(self, f: impl Fn(T) -> U) -> (U, V); +} + +impl<T, V> MapLeft<T, V> for (T, V) { + fn ml<U>(self, f: impl Fn(T) -> U) -> (U, V) { + (f(self.0), self.1) + } +} |