Unnamed repository; edit this file 'description' to name the repository.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
//! Ungrammar -- a DSL for specifying concrete syntax tree grammar.
//!
//! Producing a parser is an explicit non-goal -- it's ok for this grammar to be
//! ambiguous, non LL, non LR, etc.
//!
//! See this
//! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html)
//! for details.

#![deny(missing_debug_implementations)]
#![deny(missing_docs)]
#![deny(rust_2018_idioms)]

mod error;
mod lexer;
mod parser;

use std::{ops, str::FromStr};

pub use error::{Error, Result};

/// A node, like `A = 'b' | 'c'`.
///
/// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a
/// [`NodeData`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Node(usize);

/// A token, denoted with single quotes, like `'+'` or `'struct'`.
///
/// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a
/// [`TokenData`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token(usize);

/// An Ungrammar grammar.
#[derive(Default, Debug)]
pub struct Grammar {
    nodes: Vec<NodeData>,
    tokens: Vec<TokenData>,
}

impl FromStr for Grammar {
    type Err = Error;
    fn from_str(s: &str) -> Result<Self> {
        let tokens = lexer::tokenize(s)?;
        parser::parse(tokens)
    }
}

impl Grammar {
    /// Returns an iterator over all nodes in the grammar.
    pub fn iter(&self) -> impl Iterator<Item = Node> + '_ {
        (0..self.nodes.len()).map(Node)
    }

    /// Returns an iterator over all tokens in the grammar.
    pub fn tokens(&self) -> impl Iterator<Item = Token> + '_ {
        (0..self.tokens.len()).map(Token)
    }
}

impl ops::Index<Node> for Grammar {
    type Output = NodeData;
    fn index(&self, Node(index): Node) -> &NodeData {
        &self.nodes[index]
    }
}

impl ops::Index<Token> for Grammar {
    type Output = TokenData;
    fn index(&self, Token(index): Token) -> &TokenData {
        &self.tokens[index]
    }
}

/// Data about a node.
#[derive(Debug)]
pub struct NodeData {
    /// The name of the node.
    ///
    /// In the rule `A = 'b' | 'c'`, this is `"A"`.
    pub name: String,
    /// The rule for this node.
    ///
    /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`.
    pub rule: Rule,
}

/// Data about a token.
#[derive(Debug)]
pub struct TokenData {
    /// The name of the token.
    pub name: String,
}

/// A production rule.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Rule {
    /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule).
    Labeled {
        /// The label.
        label: String,
        /// The rule.
        rule: Box<Rule>,
    },
    /// A node, like `A`.
    Node(Node),
    /// A token, like `'struct'`.
    Token(Token),
    /// A sequence of rules, like `'while' '(' Expr ')' Stmt`.
    Seq(Vec<Rule>),
    /// An alternative between many rules, like `'+' | '-' | '*' | '/'`.
    Alt(Vec<Rule>),
    /// An optional rule, like `A?`.
    Opt(Box<Rule>),
    /// A repeated rule, like `A*`.
    Rep(Box<Rule>),
}

#[test]
fn smoke() {
    let grammar = include_str!("../ungrammar.ungram");
    let grammar = grammar.parse::<Grammar>().unwrap();
    drop(grammar)
}