Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'xtask/src/codegen/grammar/ast_src.rs')
-rw-r--r--xtask/src/codegen/grammar/ast_src.rs381
1 files changed, 147 insertions, 234 deletions
diff --git a/xtask/src/codegen/grammar/ast_src.rs b/xtask/src/codegen/grammar/ast_src.rs
index c246ee9950..3444f89908 100644
--- a/xtask/src/codegen/grammar/ast_src.rs
+++ b/xtask/src/codegen/grammar/ast_src.rs
@@ -1,241 +1,154 @@
//! Defines input for code generation process.
-pub(crate) struct KindsSrc<'a> {
- pub(crate) punct: &'a [(&'a str, &'a str)],
- pub(crate) keywords: &'a [&'a str],
- pub(crate) contextual_keywords: &'a [&'a str],
- pub(crate) literals: &'a [&'a str],
- pub(crate) tokens: &'a [&'a str],
- pub(crate) nodes: &'a [&'a str],
+use crate::codegen::grammar::to_upper_snake_case;
+
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct KindsSrc {
+ pub(crate) punct: &'static [(&'static str, &'static str)],
+ pub(crate) keywords: &'static [&'static str],
+ pub(crate) contextual_keywords: &'static [&'static str],
+ pub(crate) literals: &'static [&'static str],
+ pub(crate) tokens: &'static [&'static str],
+ pub(crate) nodes: &'static [&'static str],
}
-pub(crate) const KINDS_SRC: KindsSrc<'_> = KindsSrc {
- punct: &[
- (";", "SEMICOLON"),
- (",", "COMMA"),
- ("(", "L_PAREN"),
- (")", "R_PAREN"),
- ("{", "L_CURLY"),
- ("}", "R_CURLY"),
- ("[", "L_BRACK"),
- ("]", "R_BRACK"),
- ("<", "L_ANGLE"),
- (">", "R_ANGLE"),
- ("@", "AT"),
- ("#", "POUND"),
- ("~", "TILDE"),
- ("?", "QUESTION"),
- ("$", "DOLLAR"),
- ("&", "AMP"),
- ("|", "PIPE"),
- ("+", "PLUS"),
- ("*", "STAR"),
- ("/", "SLASH"),
- ("^", "CARET"),
- ("%", "PERCENT"),
- ("_", "UNDERSCORE"),
- (".", "DOT"),
- ("..", "DOT2"),
- ("...", "DOT3"),
- ("..=", "DOT2EQ"),
- (":", "COLON"),
- ("::", "COLON2"),
- ("=", "EQ"),
- ("==", "EQ2"),
- ("=>", "FAT_ARROW"),
- ("!", "BANG"),
- ("!=", "NEQ"),
- ("-", "MINUS"),
- ("->", "THIN_ARROW"),
- ("<=", "LTEQ"),
- (">=", "GTEQ"),
- ("+=", "PLUSEQ"),
- ("-=", "MINUSEQ"),
- ("|=", "PIPEEQ"),
- ("&=", "AMPEQ"),
- ("^=", "CARETEQ"),
- ("/=", "SLASHEQ"),
- ("*=", "STAREQ"),
- ("%=", "PERCENTEQ"),
- ("&&", "AMP2"),
- ("||", "PIPE2"),
- ("<<", "SHL"),
- (">>", "SHR"),
- ("<<=", "SHLEQ"),
- (">>=", "SHREQ"),
- ],
- keywords: &[
- "abstract", "as", "async", "await", "become", "box", "break", "const", "continue", "crate",
- "do", "dyn", "else", "enum", "extern", "false", "final", "fn", "for", "if", "impl", "in",
- "let", "loop", "macro", "match", "mod", "move", "mut", "override", "priv", "pub", "ref",
- "return", "self", "Self", "static", "struct", "super", "trait", "true", "try", "type",
- "typeof", "unsafe", "unsized", "use", "virtual", "where", "while", "yield",
- ],
- contextual_keywords: &[
- "auto",
- "builtin",
- "default",
- "existential",
- "union",
- "raw",
- "macro_rules",
- "yeet",
- "offset_of",
- "asm",
- "format_args",
- ],
- literals: &["INT_NUMBER", "FLOAT_NUMBER", "CHAR", "BYTE", "STRING", "BYTE_STRING", "C_STRING"],
- tokens: &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],
- nodes: &[
- "SOURCE_FILE",
- "STRUCT",
- "UNION",
- "ENUM",
- "FN",
- "RET_TYPE",
- "EXTERN_CRATE",
- "MODULE",
- "USE",
- "STATIC",
- "CONST",
- "TRAIT",
- "TRAIT_ALIAS",
- "IMPL",
- "TYPE_ALIAS",
- "MACRO_CALL",
- "MACRO_RULES",
- "MACRO_ARM",
- "TOKEN_TREE",
- "MACRO_DEF",
- "PAREN_TYPE",
- "TUPLE_TYPE",
- "MACRO_TYPE",
- "NEVER_TYPE",
- "PATH_TYPE",
- "PTR_TYPE",
- "ARRAY_TYPE",
- "SLICE_TYPE",
- "REF_TYPE",
- "INFER_TYPE",
- "FN_PTR_TYPE",
- "FOR_TYPE",
- "IMPL_TRAIT_TYPE",
- "DYN_TRAIT_TYPE",
- "OR_PAT",
- "PAREN_PAT",
- "REF_PAT",
- "BOX_PAT",
- "IDENT_PAT",
- "WILDCARD_PAT",
- "REST_PAT",
- "PATH_PAT",
- "RECORD_PAT",
- "RECORD_PAT_FIELD_LIST",
- "RECORD_PAT_FIELD",
- "TUPLE_STRUCT_PAT",
- "TUPLE_PAT",
- "SLICE_PAT",
- "RANGE_PAT",
- "LITERAL_PAT",
- "MACRO_PAT",
- "CONST_BLOCK_PAT",
- // atoms
- "TUPLE_EXPR",
- "ARRAY_EXPR",
- "PAREN_EXPR",
- "PATH_EXPR",
- "CLOSURE_EXPR",
- "IF_EXPR",
- "WHILE_EXPR",
- "LOOP_EXPR",
- "FOR_EXPR",
- "CONTINUE_EXPR",
- "BREAK_EXPR",
- "LABEL",
- "BLOCK_EXPR",
- "STMT_LIST",
- "RETURN_EXPR",
- "BECOME_EXPR",
- "YIELD_EXPR",
- "YEET_EXPR",
- "LET_EXPR",
- "UNDERSCORE_EXPR",
- "MACRO_EXPR",
- "MATCH_EXPR",
- "MATCH_ARM_LIST",
- "MATCH_ARM",
- "MATCH_GUARD",
- "RECORD_EXPR",
- "RECORD_EXPR_FIELD_LIST",
- "RECORD_EXPR_FIELD",
- "OFFSET_OF_EXPR",
- "ASM_EXPR",
- "FORMAT_ARGS_EXPR",
- "FORMAT_ARGS_ARG",
- // postfix
- "CALL_EXPR",
- "INDEX_EXPR",
- "METHOD_CALL_EXPR",
- "FIELD_EXPR",
- "AWAIT_EXPR",
- "TRY_EXPR",
- "CAST_EXPR",
- // unary
- "REF_EXPR",
- "PREFIX_EXPR",
- "RANGE_EXPR", // just weird
- "BIN_EXPR",
- "EXTERN_BLOCK",
- "EXTERN_ITEM_LIST",
- "VARIANT",
- "RECORD_FIELD_LIST",
- "RECORD_FIELD",
- "TUPLE_FIELD_LIST",
- "TUPLE_FIELD",
- "VARIANT_LIST",
- "ITEM_LIST",
- "ASSOC_ITEM_LIST",
- "ATTR",
- "META",
- "USE_TREE",
- "USE_TREE_LIST",
- "PATH",
- "PATH_SEGMENT",
- "LITERAL",
- "RENAME",
- "VISIBILITY",
- "WHERE_CLAUSE",
- "WHERE_PRED",
- "ABI",
- "NAME",
- "NAME_REF",
- "LET_STMT",
- "LET_ELSE",
- "EXPR_STMT",
- "GENERIC_PARAM_LIST",
- "GENERIC_PARAM",
- "LIFETIME_PARAM",
- "TYPE_PARAM",
- "RETURN_TYPE_ARG",
- "CONST_PARAM",
- "GENERIC_ARG_LIST",
- "LIFETIME",
- "LIFETIME_ARG",
- "TYPE_ARG",
- "ASSOC_TYPE_ARG",
- "CONST_ARG",
- "PARAM_LIST",
- "PARAM",
- "SELF_PARAM",
- "ARG_LIST",
- "TYPE_BOUND",
- "TYPE_BOUND_LIST",
- // macro related
- "MACRO_ITEMS",
- "MACRO_STMTS",
- "MACRO_EAGER_INPUT",
- ],
-};
+/// The punctuations of the language.
+const PUNCT: &[(&str, &str)] = &[
+ // KEEP THE DOLLAR AT THE TOP ITS SPECIAL
+ ("$", "DOLLAR"),
+ (";", "SEMICOLON"),
+ (",", "COMMA"),
+ ("(", "L_PAREN"),
+ (")", "R_PAREN"),
+ ("{", "L_CURLY"),
+ ("}", "R_CURLY"),
+ ("[", "L_BRACK"),
+ ("]", "R_BRACK"),
+ ("<", "L_ANGLE"),
+ (">", "R_ANGLE"),
+ ("@", "AT"),
+ ("#", "POUND"),
+ ("~", "TILDE"),
+ ("?", "QUESTION"),
+ ("&", "AMP"),
+ ("|", "PIPE"),
+ ("+", "PLUS"),
+ ("*", "STAR"),
+ ("/", "SLASH"),
+ ("^", "CARET"),
+ ("%", "PERCENT"),
+ ("_", "UNDERSCORE"),
+ (".", "DOT"),
+ ("..", "DOT2"),
+ ("...", "DOT3"),
+ ("..=", "DOT2EQ"),
+ (":", "COLON"),
+ ("::", "COLON2"),
+ ("=", "EQ"),
+ ("==", "EQ2"),
+ ("=>", "FAT_ARROW"),
+ ("!", "BANG"),
+ ("!=", "NEQ"),
+ ("-", "MINUS"),
+ ("->", "THIN_ARROW"),
+ ("<=", "LTEQ"),
+ (">=", "GTEQ"),
+ ("+=", "PLUSEQ"),
+ ("-=", "MINUSEQ"),
+ ("|=", "PIPEEQ"),
+ ("&=", "AMPEQ"),
+ ("^=", "CARETEQ"),
+ ("/=", "SLASHEQ"),
+ ("*=", "STAREQ"),
+ ("%=", "PERCENTEQ"),
+ ("&&", "AMP2"),
+ ("||", "PIPE2"),
+ ("<<", "SHL"),
+ (">>", "SHR"),
+ ("<<=", "SHLEQ"),
+ (">>=", "SHREQ"),
+];
+const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"];
+// &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],;
+
+const EOF: &str = "EOF";
+
+const RESERVED: &[&str] = &[
+ "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized",
+ "virtual", "yield", "try",
+];
+const CONTEXTUAL_RESERVED: &[&str] = &[];
+
+pub(crate) fn generate_kind_src(
+ nodes: &[AstNodeSrc],
+ enums: &[AstEnumSrc],
+ grammar: &ungrammar::Grammar,
+) -> KindsSrc {
+ let mut keywords: Vec<&_> = Vec::new();
+ let mut contextual_keywords: Vec<&_> = Vec::new();
+ let mut tokens: Vec<&_> = TOKENS.to_vec();
+ let mut literals: Vec<&_> = Vec::new();
+ let mut used_puncts = vec![false; PUNCT.len()];
+ // Mark $ as used
+ used_puncts[0] = true;
+ grammar.tokens().for_each(|token| {
+ let name = &*grammar[token].name;
+ if name == EOF {
+ return;
+ }
+ match name.split_at(1) {
+ ("@", lit) if !lit.is_empty() => {
+ literals.push(String::leak(to_upper_snake_case(lit)));
+ }
+ ("#", token) if !token.is_empty() => {
+ tokens.push(String::leak(to_upper_snake_case(token)));
+ }
+ ("?", kw) if !kw.is_empty() => {
+ contextual_keywords.push(String::leak(kw.to_owned()));
+ }
+ _ if name.chars().all(char::is_alphabetic) => {
+ keywords.push(String::leak(name.to_owned()));
+ }
+ _ => {
+ let idx = PUNCT
+ .iter()
+ .position(|(punct, _)| punct == &name)
+ .unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}"));
+ used_puncts[idx] = true;
+ }
+ }
+ });
+ PUNCT.iter().zip(used_puncts).filter(|(_, used)| !used).for_each(|((punct, _), _)| {
+ panic!("Punctuation {punct:?} is not used in grammar");
+ });
+ keywords.extend(RESERVED.iter().copied());
+ keywords.sort();
+ keywords.dedup();
+ contextual_keywords.extend(CONTEXTUAL_RESERVED.iter().copied());
+ contextual_keywords.sort();
+ contextual_keywords.dedup();
+
+ // we leak things here for simplicity, that way we don't have to deal with lifetimes
+ // The execution is a one shot job so thats fine
+ let nodes = nodes
+ .iter()
+ .map(|it| &it.name)
+ .chain(enums.iter().map(|it| &it.name))
+ .map(|it| to_upper_snake_case(it))
+ .map(String::leak)
+ .map(|it| &*it)
+ .collect();
+ let nodes = Vec::leak(nodes);
+ nodes.sort();
+ let keywords = Vec::leak(keywords);
+ let contextual_keywords = Vec::leak(contextual_keywords);
+ let literals = Vec::leak(literals);
+ literals.sort();
+ let tokens = Vec::leak(tokens);
+ tokens.sort();
+
+ KindsSrc { punct: PUNCT, nodes, keywords, contextual_keywords, literals, tokens }
+}
#[derive(Default, Debug)]
pub(crate) struct AstSrc {