Diffstat (limited to 'src/lexer.rs')
| -rw-r--r-- | src/lexer.rs | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..3345887 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,116 @@ +use beef::lean::Cow; +use chumsky::span::SimpleSpan; +use logos::{Lexer as RealLexer, Logos, SpannedIter}; + +macro_rules! tokens { + ($($z:literal $( | $y:literal)? => $v:ident,)+) => { + #[derive(Logos, Debug, PartialEq, Clone)] + #[logos(skip r"[\n\s]+")] + pub enum Token<'strings> { + #[regex("/[^\n/]+/", priority = 8)] + Comment(&'strings str), + #[regex(r"[0-9]+", |lex| lex.slice().parse().ok())] + #[regex(r"0[xX][0-9a-fA-F]+", |lex| u64::from_str_radix(&lex.slice()[2..], 16).ok())] + #[regex(r"0[bB][01]+", |lex| u64::from_str_radix(&lex.slice()[2..], 2).ok())] + Int(u64), + #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse().ok())] + Float(f64), + #[regex(r#""([^\\"\n])*""#, callback = |lex| Cow::from(&lex.slice()[1..lex.slice().len()-1]), priority = 12)] + #[regex(r#""[^"]*""#, callback = |lex| Cow::from(lex.slice()[1..lex.slice().len()-1].replace(r"\n", "\n")), priority = 8)] + String(Cow<'strings, str>), + // todo ignore alot + #[regex(r"[^\s0-9][^\s]*", priority = 7)] + Ident(&'strings str), + + #[token("[", chr::<'['>)] + #[token("(", chr::<'('>)] + #[token("{", chr::<'{'>)] + OpeningBracket(char), + #[token("]", chr::<']'>)] + #[token(")", chr::<')'>)] + #[token("}", chr::<'}'>)] + ClosingBracket(char), + + $(#[token($z, priority = 8)] $(#[token($y, priority = 8)])? $v,)+ + } + + impl std::fmt::Display for Token<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + match self { + $(Self::$v => write!(f, $z),)+ + Self::FnIdent(s) | Self::Ident(s) | Self::Comment(s) => write!(f, "{s}"), + Self::String(s) => write!(f, "{s}"), + Self::Float(n) => write!(f, "{n}"), + Self::Int(n) => write!(f, "{n}"), + Self::OpeningBracket(x) | Self::ClosingBracket(x) => write!(f,"{x}"), + } + } + } + } +} + +tokens! { + "λ" => Lamba, + "←" => Place, + "→" => Ret, + "=" => Eq, + "." => Dup, + ":" => Flip, + "⤵️" => Pop, + "+" => Add, + "×" => Mul, + "*" => Pow, + "√" => Sqrt, + "≠" => Ne, + "<" => Lt, + "≤" | "≯" => Le, + ">" => Gt, + "≥" | "≮" => Ge, + "«" => Shl, + "»" => Shr, + "¯" => Neg, + "&" => And, + "|" => Or, + "^" => Xor, + "÷" => Div, + "%" => Mod, + "🔎" => Keep, + "🚧" => Split, + +} + +pub fn lex(s: &str) -> Lexer { + Lexer { + inner: Token::lexer(s).spanned(), + } +} + +fn chr<'src, const CHR: char>(_: &mut RealLexer<'src, Token<'src>>) -> Result<char, ()> { + Ok(CHR) +} +pub struct Lexer<'s> { + inner: SpannedIter<'s, Token<'s>>, +} + +impl<'s> Iterator for Lexer<'s> { + type Item = (Token<'s>, SimpleSpan<usize>); + + fn next(&mut self) -> Option<Self::Item> { + self.inner.find_map(|(x, s)| match x.ok()? { + Token::Comment(_) => None, + x => Some((x, SimpleSpan::new(s.start, s.end))), + }) + } +} + +#[test] +fn lexer() { + let mut lex = lex(r#""#); + // while let Some(x) = lex.next() { print!("{x} "); } + macro_rules! test { + ($($tok:ident$(($var:literal))?)+) => {{ + $(assert_eq!(lex.next().map(|(x,_)|x), Some(Token::$tok$(($var.into()))?));)+ + assert_eq!(lex.next(), None); + }} + } +} |