Unnamed repository; edit this file 'description' to name the repository.
-rw-r--r--lib/ungrammar/.github/ci.rs114
-rw-r--r--lib/ungrammar/.github/workflows/ci.yaml36
-rw-r--r--lib/ungrammar/.gitignore3
-rw-r--r--lib/ungrammar/Cargo.toml15
-rw-r--r--lib/ungrammar/README.md21
-rw-r--r--lib/ungrammar/bors.toml2
-rw-r--r--lib/ungrammar/rust.ungram666
-rw-r--r--lib/ungrammar/src/error.rs50
-rw-r--r--lib/ungrammar/src/lexer.rs129
-rw-r--r--lib/ungrammar/src/lib.rs137
-rw-r--r--lib/ungrammar/src/parser.rs225
-rw-r--r--lib/ungrammar/ungrammar.ungram16
-rw-r--r--lib/ungrammar/ungrammar2json/Cargo.toml12
-rw-r--r--lib/ungrammar/ungrammar2json/src/main.rs77
14 files changed, 1503 insertions, 0 deletions
diff --git a/lib/ungrammar/.github/ci.rs b/lib/ungrammar/.github/ci.rs
new file mode 100644
index 0000000000..87eb307d63
--- /dev/null
+++ b/lib/ungrammar/.github/ci.rs
@@ -0,0 +1,114 @@
+use std::{
+ env, fs,
+ process::{self, Command, ExitStatus, Stdio},
+ time::Instant,
+};
+
+type Error = Box<dyn std::error::Error>;
+type Result<T> = std::result::Result<T, Error>;
+
+fn main() {
+ if let Err(err) = try_main() {
+ eprintln!("{}", err);
+ process::exit(1);
+ }
+}
+
+fn try_main() -> Result<()> {
+ let cwd = env::current_dir()?;
+ let cargo_toml = cwd.join("Cargo.toml");
+ assert!(
+ cargo_toml.exists(),
+ "Cargo.toml not found, cwd: {}",
+ cwd.display()
+ );
+
+ {
+ let _s = Section::new("BUILD");
+ shell("cargo test --workspace --no-run")?;
+ }
+
+ {
+ let _s = Section::new("TEST");
+ shell("cargo test --workspace")?;
+ }
+
+ let current_branch = shell_output("git branch --show-current")?;
+ if &current_branch == "master" {
+ let _s = Section::new("PUBLISH");
+ let manifest = fs::read_to_string(&cargo_toml)?;
+ let version = get_field(&manifest, "version")?;
+ let tag = format!("v{}", version);
+ let tags = shell_output("git tag --list")?;
+
+ if !tags.contains(&tag) {
+ let token = env::var("CRATES_IO_TOKEN").unwrap();
+ shell(&format!("git tag v{}", version))?;
+ shell(&format!("cargo publish --token {}", token))?;
+ shell("git push --tags")?;
+ }
+ }
+ Ok(())
+}
+
+fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> {
+ for line in text.lines() {
+ let words = line.split_ascii_whitespace().collect::<Vec<_>>();
+ match words.as_slice() {
+ [n, "=", v, ..] if n.trim() == name => {
+ assert!(v.starts_with('"') && v.ends_with('"'));
+ return Ok(&v[1..v.len() - 1]);
+ }
+ _ => (),
+ }
+ }
+ Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))?
+}
+
+fn shell(cmd: &str) -> Result<()> {
+ let status = command(cmd).status()?;
+ check_status(status)
+}
+
+fn shell_output(cmd: &str) -> Result<String> {
+ let output = command(cmd).stderr(Stdio::inherit()).output()?;
+ check_status(output.status)?;
+ let res = String::from_utf8(output.stdout)?;
+ Ok(res.trim().to_string())
+}
+
+fn command(cmd: &str) -> Command {
+ eprintln!("> {}", cmd);
+ let words = cmd.split_ascii_whitespace().collect::<Vec<_>>();
+ let (cmd, args) = words.split_first().unwrap();
+ let mut res = Command::new(cmd);
+ res.args(args);
+ res
+}
+
+fn check_status(status: ExitStatus) -> Result<()> {
+ if !status.success() {
+ Err(format!("$status: {}", status))?;
+ }
+ Ok(())
+}
+
+struct Section {
+ name: &'static str,
+ start: Instant,
+}
+
+impl Section {
+ fn new(name: &'static str) -> Section {
+ println!("::group::{}", name);
+ let start = Instant::now();
+ Section { name, start }
+ }
+}
+
+impl Drop for Section {
+ fn drop(&mut self) {
+ eprintln!("{}: {:.2?}", self.name, self.start.elapsed());
+ println!("::endgroup::");
+ }
+}
diff --git a/lib/ungrammar/.github/workflows/ci.yaml b/lib/ungrammar/.github/workflows/ci.yaml
new file mode 100644
index 0000000000..88f133867e
--- /dev/null
+++ b/lib/ungrammar/.github/workflows/ci.yaml
@@ -0,0 +1,36 @@
+name: CI
+on:
+ pull_request:
+ push:
+ branches:
+ - master
+ - staging
+ - trying
+
+env:
+ CARGO_INCREMENTAL: 0
+ CARGO_NET_RETRY: 10
+ CI: 1
+ RUST_BACKTRACE: short
+ RUSTFLAGS: -D warnings
+ RUSTUP_MAX_RETRIES: 10
+
+jobs:
+ rust:
+ name: Rust
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+
+ - name: Install Rust toolchain
+ uses: actions-rs/toolchain@v1
+ with:
+ toolchain: stable
+ profile: minimal
+ override: true
+
+ - run: rustc ./.github/ci.rs && ./ci
+ env:
+ CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
diff --git a/lib/ungrammar/.gitignore b/lib/ungrammar/.gitignore
new file mode 100644
index 0000000000..e3bd43f693
--- /dev/null
+++ b/lib/ungrammar/.gitignore
@@ -0,0 +1,3 @@
+/ci
+/Cargo.lock
+/target
diff --git a/lib/ungrammar/Cargo.toml b/lib/ungrammar/Cargo.toml
new file mode 100644
index 0000000000..920d9ef49d
--- /dev/null
+++ b/lib/ungrammar/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "ungrammar"
+description = "A DSL for describing concrete syntax trees"
+version = "1.16.1"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/rust-analyzer/ungrammar"
+edition = "2018"
+
+exclude = ["/bors.toml", "/.github"]
+
+[workspace]
+members = ["ungrammar2json"]
+
+[dependencies]
+# nope
diff --git a/lib/ungrammar/README.md b/lib/ungrammar/README.md
new file mode 100644
index 0000000000..a5e130fedf
--- /dev/null
+++ b/lib/ungrammar/README.md
@@ -0,0 +1,21 @@
+# ungrammar
+
+A DSL for specifying concrete syntax trees.
+
+See the [blog post][post] for an introduction.
+
+See [./rust.ungram](./rust.ungram) for an example.
+
+## Editor support
+
+- Vim
+ - [vim-ungrammar][]
+ - [ungrammar.vim][]
+- VSCode
+ - [ungrammar-tools][]
+
+[post]:
+ https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html
+[vim-ungrammar]: https://github.com/Iron-E/vim-ungrammar
+[ungrammar.vim]: https://github.com/drtychai/ungrammar.vim
+[ungrammar-tools]: https://github.com/azdavis/ungrammar-tools
diff --git a/lib/ungrammar/bors.toml b/lib/ungrammar/bors.toml
new file mode 100644
index 0000000000..b92b99ac30
--- /dev/null
+++ b/lib/ungrammar/bors.toml
@@ -0,0 +1,2 @@
+status = [ "Rust" ]
+delete_merged_branches = true
diff --git a/lib/ungrammar/rust.ungram b/lib/ungrammar/rust.ungram
new file mode 100644
index 0000000000..7781e719e2
--- /dev/null
+++ b/lib/ungrammar/rust.ungram
@@ -0,0 +1,666 @@
+// Note this grammar file does not reflect the current language as this file is no longer maintained.
+
+// Rust Un-Grammar.
+//
+// This grammar specifies the structure of Rust's concrete syntax tree.
+// It does not specify parsing rules (ambiguities, precedence, etc are out of scope).
+// Tokens are processed -- contextual keywords are recognised, compound operators glued.
+//
+// Legend:
+//
+// // -- comment
+// Name = -- non-terminal definition
+// 'ident' -- token (terminal)
+// A B -- sequence
+// A | B -- alternation
+// A* -- zero or more repetition
+// A? -- zero or one repetition
+// (A) -- same as A
+// label:A -- suggested name for field of AST node
+
+//*************************//
+// Names, Paths and Macros //
+//*************************//
+
+Name =
+ 'ident' | 'self'
+
+NameRef =
+ 'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self'
+
+Lifetime =
+ 'lifetime_ident'
+
+Path =
+ (qualifier:Path '::')? segment:PathSegment
+
+PathSegment =
+ '::'? NameRef
+| NameRef GenericArgList?
+| NameRef ParamList RetType?
+| '<' PathType ('as' PathType)? '>'
+
+GenericArgList =
+ '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>'
+
+GenericArg =
+ TypeArg
+| AssocTypeArg
+| LifetimeArg
+| ConstArg
+
+TypeArg =
+ Type
+
+AssocTypeArg =
+ NameRef GenericParamList? (':' TypeBoundList | '=' Type)
+
+LifetimeArg =
+ Lifetime
+
+ConstArg =
+ Expr
+
+MacroCall =
+ Attr* Path '!' TokenTree ';'?
+
+TokenTree =
+ '(' ')'
+| '{' '}'
+| '[' ']'
+
+MacroItems =
+ Item*
+
+MacroStmts =
+ statements:Stmt*
+ Expr?
+
+//*************************//
+// Items //
+//*************************//
+
+SourceFile =
+ 'shebang'?
+ Attr*
+ Item*
+
+Item =
+ Const
+| Enum
+| ExternBlock
+| ExternCrate
+| Fn
+| Impl
+| MacroCall
+| MacroRules
+| MacroDef
+| Module
+| Static
+| Struct
+| Trait
+| TypeAlias
+| Union
+| Use
+
+MacroRules =
+ Attr* Visibility?
+ 'macro_rules' '!' Name
+ TokenTree
+
+MacroDef =
+ Attr* Visibility?
+ 'macro' Name args:TokenTree?
+ body:TokenTree
+
+Module =
+ Attr* Visibility?
+ 'mod' Name
+ (ItemList | ';')
+
+ItemList =
+ '{' Attr* Item* '}'
+
+ExternCrate =
+ Attr* Visibility?
+ 'extern' 'crate' NameRef Rename? ';'
+
+Rename =
+ 'as' (Name | '_')
+
+Use =
+ Attr* Visibility?
+ 'use' UseTree ';'
+
+UseTree =
+ (Path? '::')? ('*' | UseTreeList)
+| Path Rename?
+
+UseTreeList =
+ '{' (UseTree (',' UseTree)* ','?)? '}'
+
+Fn =
+ Attr* Visibility?
+ 'default'? 'const'? 'async'? 'unsafe'? Abi?
+ 'fn' Name GenericParamList? ParamList RetType? WhereClause?
+ (body:BlockExpr | ';')
+
+Abi =
+ 'extern' 'string'?
+
+ParamList =
+ '('(
+ SelfParam
+ | (SelfParam ',')? (Param (',' Param)* ','?)?
+ )')'
+| '|' (Param (',' Param)* ','?)? '|'
+
+SelfParam =
+ Attr* (
+ ('&' Lifetime?)? 'mut'? Name
+ | 'mut'? Name ':' Type
+ )
+
+Param =
+ Attr* (
+ Pat (':' Type)?
+ | Type
+ | '...'
+ )
+
+RetType =
+ '->' Type
+
+TypeAlias =
+ Attr* Visibility?
+ 'default'?
+ 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
+ ('=' Type)? ';'
+
+Struct =
+ Attr* Visibility?
+ 'struct' Name GenericParamList? (
+ WhereClause? (RecordFieldList | ';')
+ | TupleFieldList WhereClause? ';'
+ )
+
+RecordFieldList =
+ '{' fields:(RecordField (',' RecordField)* ','?)? '}'
+
+RecordField =
+ Attr* Visibility?
+ Name ':' Type
+
+TupleFieldList =
+ '(' fields:(TupleField (',' TupleField)* ','?)? ')'
+
+TupleField =
+ Attr* Visibility?
+ Type
+
+FieldList =
+ RecordFieldList
+| TupleFieldList
+
+Enum =
+ Attr* Visibility?
+ 'enum' Name GenericParamList? WhereClause?
+ VariantList
+
+VariantList =
+ '{' (Variant (',' Variant)* ','?)? '}'
+
+Variant =
+ Attr* Visibility?
+ Name FieldList? ('=' Expr)?
+
+Union =
+ Attr* Visibility?
+ 'union' Name GenericParamList? WhereClause?
+ RecordFieldList
+
+// A Data Type.
+//
+// Not used directly in the grammar, but handy to have anyway.
+Adt =
+ Enum
+| Struct
+| Union
+
+Const =
+ Attr* Visibility?
+ 'default'?
+ 'const' (Name | '_') ':' Type
+ ('=' body:Expr)? ';'
+
+Static =
+ Attr* Visibility?
+ 'static' 'mut'? Name ':' Type
+ ('=' body:Expr)? ';'
+
+Trait =
+ Attr* Visibility?
+ 'unsafe'? 'auto'?
+ 'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
+ AssocItemList
+
+AssocItemList =
+ '{' Attr* AssocItem* '}'
+
+AssocItem =
+ Const
+| Fn
+| MacroCall
+| TypeAlias
+
+Impl =
+ Attr* Visibility?
+ 'default'? 'unsafe'?
+ 'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause?
+ AssocItemList
+
+ExternBlock =
+ Attr* 'unsafe'? Abi ExternItemList
+
+ExternItemList =
+ '{' Attr* ExternItem* '}'
+
+ExternItem =
+ Fn
+| MacroCall
+| Static
+| TypeAlias
+
+GenericParamList =
+ '<' (GenericParam (',' GenericParam)* ','?)? '>'
+
+GenericParam =
+ ConstParam
+| LifetimeParam
+| TypeParam
+
+TypeParam =
+ Attr* Name (':' TypeBoundList?)?
+ ('=' default_type:Type)?
+
+ConstParam =
+ Attr* 'const' Name ':' Type
+ ('=' default_val:Expr)?
+
+LifetimeParam =
+ Attr* Lifetime (':' TypeBoundList?)?
+
+WhereClause =
+ 'where' predicates:(WherePred (',' WherePred)* ','?)
+
+WherePred =
+ ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList?
+
+Visibility =
+ 'pub' ('(' 'in'? Path ')')?
+
+Attr =
+ '#' '!'? '[' Meta ']'
+
+Meta =
+ Path ('=' Expr | TokenTree)?
+
+//****************************//
+// Statements and Expressions //
+//****************************//
+
+Stmt =
+ ';'
+| ExprStmt
+| Item
+| LetStmt
+
+LetStmt =
+ Attr* 'let' Pat (':' Type)?
+ '=' initializer:Expr
+ LetElse?
+ ';'
+
+LetElse =
+ 'else' BlockExpr
+
+ExprStmt =
+ Expr ';'?
+
+Expr =
+ ArrayExpr
+| AwaitExpr
+| BinExpr
+| BlockExpr
+| BoxExpr
+| BreakExpr
+| CallExpr
+| CastExpr
+| ClosureExpr
+| ContinueExpr
+| FieldExpr
+| ForExpr
+| IfExpr
+| IndexExpr
+| Literal
+| LoopExpr
+| MacroCall
+| MacroStmts
+| MatchExpr
+| MethodCallExpr
+| ParenExpr
+| PathExpr
+| PrefixExpr
+| RangeExpr
+| RecordExpr
+| RefExpr
+| ReturnExpr
+| TryExpr
+| TupleExpr
+| WhileExpr
+| YieldExpr
+| LetExpr
+| UnderscoreExpr
+
+Literal =
+ Attr* value:(
+ 'int_number' | 'float_number'
+ | 'string' | 'raw_string'
+ | 'byte_string' | 'raw_byte_string'
+ | 'true' | 'false'
+ | 'char' | 'byte'
+ )
+
+PathExpr =
+ Attr* Path
+
+StmtList =
+ '{'
+ Attr*
+ statements:Stmt*
+ tail_expr:Expr?
+ '}'
+
+RefExpr =
+ Attr* '&' ('raw' | 'mut' | 'const') Expr
+
+TryExpr =
+ Attr* Expr '?'
+
+BlockExpr =
+ Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList
+
+PrefixExpr =
+ Attr* op:('-' | '!' | '*') Expr
+
+BinExpr =
+ Attr*
+ lhs:Expr
+ op:(
+ '||' | '&&'
+ | '==' | '!=' | '<=' | '>=' | '<' | '>'
+ | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&'
+ | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^='
+ )
+ rhs:Expr
+
+CastExpr =
+ Attr* Expr 'as' Type
+
+ParenExpr =
+ Attr* '(' Attr* Expr ')'
+
+ArrayExpr =
+ Attr* '[' Attr* (
+ (Expr (',' Expr)* ','?)?
+ | Expr ';' Expr
+ ) ']'
+
+IndexExpr =
+ Attr* base:Expr '[' index:Expr ']'
+
+TupleExpr =
+ Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')'
+
+RecordExpr =
+ Path RecordExprFieldList
+
+RecordExprFieldList =
+ '{'
+ Attr*
+ fields:(RecordExprField (',' RecordExprField)* ','?)?
+ ('..' spread:Expr?)?
+ '}'
+
+RecordExprField =
+ Attr* (NameRef ':')? Expr
+
+CallExpr =
+ Attr* Expr ArgList
+
+ArgList =
+ '(' args:(Expr (',' Expr)* ','?)? ')'
+
+MethodCallExpr =
+ Attr* receiver:Expr '.' NameRef GenericArgList? ArgList
+
+FieldExpr =
+ Attr* Expr '.' NameRef
+
+ClosureExpr =
+ Attr* 'static'? 'async'? 'move'? ParamList RetType?
+ body:Expr
+
+IfExpr =
+ Attr* 'if' condition:Expr then_branch:BlockExpr
+ ('else' else_branch:(IfExpr | BlockExpr))?
+
+LoopExpr =
+ Attr* Label? 'loop'
+ loop_body:BlockExpr
+
+ForExpr =
+ Attr* Label? 'for' Pat 'in' iterable:Expr
+ loop_body:BlockExpr
+
+WhileExpr =
+ Attr* Label? 'while' condition:Expr
+ loop_body:BlockExpr
+
+Label =
+ Lifetime ':'
+
+BreakExpr =
+ Attr* 'break' Lifetime? Expr?
+
+ContinueExpr =
+ Attr* 'continue' Lifetime?
+
+RangeExpr =
+ Attr* start:Expr? op:('..' | '..=') end:Expr?
+
+MatchExpr =
+ Attr* 'match' Expr MatchArmList
+
+MatchArmList =
+ '{'
+ Attr*
+ arms:MatchArm*
+ '}'
+
+MatchArm =
+ Attr* Pat guard:MatchGuard? '=>' Expr ','?
+
+MatchGuard =
+ 'if' condition:Expr
+
+ReturnExpr =
+ Attr* 'return' Expr?
+
+YieldExpr =
+ Attr* 'yield' Expr?
+
+LetExpr =
+ Attr* 'let' Pat '=' Expr
+
+UnderscoreExpr =
+ Attr* '_'
+
+AwaitExpr =
+ Attr* Expr '.' 'await'
+
+BoxExpr =
+ Attr* 'box' Expr
+
+//*************************//
+// Types //
+//*************************//
+
+Type =
+ ArrayType
+| DynTraitType
+| FnPtrType
+| ForType
+| ImplTraitType
+| InferType
+| MacroType
+| NeverType
+| ParenType
+| PathType
+| PtrType
+| RefType
+| SliceType
+| TupleType
+
+ParenType =
+ '(' Type ')'
+
+NeverType =
+ '!'
+
+MacroType =
+ MacroCall
+
+PathType =
+ Path
+
+TupleType =
+ '(' fields:(Type (',' Type)* ','?)? ')'
+
+PtrType =
+ '*' ('const' | 'mut') Type
+
+RefType =
+ '&' Lifetime? 'mut'? Type
+
+ArrayType =
+ '[' Type ';' Expr ']'
+
+SliceType =
+ '[' Type ']'
+
+InferType =
+ '_'
+
+FnPtrType =
+ 'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType?
+
+ForType =
+ 'for' GenericParamList Type
+
+ImplTraitType =
+ 'impl' TypeBoundList
+
+DynTraitType =
+ 'dyn' TypeBoundList
+
+TypeBoundList =
+ bounds:(TypeBound ('+' TypeBound)* '+'?)
+
+TypeBound =
+ Lifetime
+| ('?' | '~' 'const')? Type
+
+//************************//
+// Patterns //
+//************************//
+
+Pat =
+ IdentPat
+| BoxPat
+| RestPat
+| LiteralPat
+| MacroPat
+| OrPat
+| ParenPat
+| PathPat
+| WildcardPat
+| RangePat
+| RecordPat
+| RefPat
+| SlicePat
+| TuplePat
+| TupleStructPat
+| ConstBlockPat
+
+LiteralPat =
+ Literal
+
+IdentPat =
+ Attr* 'ref'? 'mut'? Name ('@' Pat)?
+
+WildcardPat =
+ '_'
+
+RangePat =
+ // 1..
+ start:Pat op:('..' | '..=')
+ // 1..2
+ | start:Pat op:('..' | '..=') end:Pat
+ // ..2
+ | op:('..' | '..=') end:Pat
+
+RefPat =
+ '&' 'mut'? Pat
+
+RecordPat =
+ Path RecordPatFieldList
+
+RecordPatFieldList =
+ '{'
+ fields:(RecordPatField (',' RecordPatField)* ','?)?
+ RestPat?
+ '}'
+
+RecordPatField =
+ Attr* (NameRef ':')? Pat
+
+TupleStructPat =
+ Path '(' fields:(Pat (',' Pat)* ','?)? ')'
+
+TuplePat =
+ '(' fields:(Pat (',' Pat)* ','?)? ')'
+
+ParenPat =
+ '(' Pat ')'
+
+SlicePat =
+ '[' (Pat (',' Pat)* ','?)? ']'
+
+PathPat =
+ Path
+
+OrPat =
+ (Pat ('|' Pat)* '|'?)
+
+BoxPat =
+ 'box' Pat
+
+RestPat =
+ Attr* '..'
+
+MacroPat =
+ MacroCall
+
+ConstBlockPat =
+ 'const' BlockExpr
diff --git a/lib/ungrammar/src/error.rs b/lib/ungrammar/src/error.rs
new file mode 100644
index 0000000000..355e0b7ebc
--- /dev/null
+++ b/lib/ungrammar/src/error.rs
@@ -0,0 +1,50 @@
+//! Boilerplate error definitions.
+use std::fmt;
+
+use crate::lexer::Location;
+
+/// A type alias for std's Result with the Error as our error type.
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+/// An error encountered when parsing a Grammar.
+#[derive(Debug)]
+pub struct Error {
+ pub(crate) message: String,
+ pub(crate) location: Option<Location>,
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ if let Some(loc) = self.location {
+ // Report 1-based indices, to match text editors
+ write!(f, "{}:{}: ", loc.line + 1, loc.column + 1)?
+ }
+ write!(f, "{}", self.message)
+ }
+}
+
+impl std::error::Error for Error {}
+
+impl Error {
+ pub(crate) fn with_location(self, location: Location) -> Error {
+ Error {
+ location: Some(location),
+ ..self
+ }
+ }
+}
+
+macro_rules! _format_err {
+ ($($tt:tt)*) => {
+ $crate::error::Error {
+ message: format!($($tt)*),
+ location: None,
+ }
+ };
+}
+pub(crate) use _format_err as format_err;
+
+macro_rules! _bail {
+ ($($tt:tt)*) => { return Err($crate::error::format_err!($($tt)*)) };
+}
+pub(crate) use _bail as bail;
diff --git a/lib/ungrammar/src/lexer.rs b/lib/ungrammar/src/lexer.rs
new file mode 100644
index 0000000000..f4c979b5bd
--- /dev/null
+++ b/lib/ungrammar/src/lexer.rs
@@ -0,0 +1,129 @@
+//! Simple hand-written ungrammar lexer
+use crate::error::{bail, Result};
+
+#[derive(Debug, Eq, PartialEq)]
+pub(crate) enum TokenKind {
+ Node(String),
+ Token(String),
+ Eq,
+ Star,
+ Pipe,
+ QMark,
+ Colon,
+ LParen,
+ RParen,
+}
+
+#[derive(Debug)]
+pub(crate) struct Token {
+ pub(crate) kind: TokenKind,
+ pub(crate) loc: Location,
+}
+
+#[derive(Copy, Clone, Default, Debug)]
+pub(crate) struct Location {
+ pub(crate) line: usize,
+ pub(crate) column: usize,
+}
+
+impl Location {
+ fn advance(&mut self, text: &str) {
+ match text.rfind('\n') {
+ Some(idx) => {
+ self.line += text.chars().filter(|&it| it == '\n').count();
+ self.column = text[idx + 1..].chars().count();
+ }
+ None => self.column += text.chars().count(),
+ }
+ }
+}
+
+pub(crate) fn tokenize(mut input: &str) -> Result<Vec<Token>> {
+ let mut res = Vec::new();
+ let mut loc = Location::default();
+ while !input.is_empty() {
+ let old_input = input;
+ skip_ws(&mut input);
+ skip_comment(&mut input);
+ if old_input.len() == input.len() {
+ match advance(&mut input) {
+ Ok(kind) => {
+ res.push(Token { kind, loc });
+ }
+ Err(err) => return Err(err.with_location(loc)),
+ }
+ }
+ let consumed = old_input.len() - input.len();
+ loc.advance(&old_input[..consumed]);
+ }
+
+ Ok(res)
+}
+
+fn skip_ws(input: &mut &str) {
+ *input = input.trim_start_matches(is_whitespace)
+}
+fn skip_comment(input: &mut &str) {
+ if input.starts_with("//") {
+ let idx = input.find('\n').map_or(input.len(), |it| it + 1);
+ *input = &input[idx..]
+ }
+}
+
+fn advance(input: &mut &str) -> Result<TokenKind> {
+ let mut chars = input.chars();
+ let c = chars.next().unwrap();
+ let res = match c {
+ '=' => TokenKind::Eq,
+ '*' => TokenKind::Star,
+ '?' => TokenKind::QMark,
+ '(' => TokenKind::LParen,
+ ')' => TokenKind::RParen,
+ '|' => TokenKind::Pipe,
+ ':' => TokenKind::Colon,
+ '\'' => {
+ let mut buf = String::new();
+ loop {
+ match chars.next() {
+ None => bail!("unclosed token literal"),
+ Some('\\') => match chars.next() {
+ Some(c) if is_escapable(c) => buf.push(c),
+ _ => bail!("invalid escape in token literal"),
+ },
+ Some('\'') => break,
+ Some(c) => buf.push(c),
+ }
+ }
+ TokenKind::Token(buf)
+ }
+ c if is_ident_char(c) => {
+ let mut buf = String::new();
+ buf.push(c);
+ loop {
+ match chars.clone().next() {
+ Some(c) if is_ident_char(c) => {
+ chars.next();
+ buf.push(c);
+ }
+ _ => break,
+ }
+ }
+ TokenKind::Node(buf)
+ }
+ '\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"),
+ c => bail!("unexpected character: `{}`", c),
+ };
+
+ *input = chars.as_str();
+ Ok(res)
+}
+
+fn is_escapable(c: char) -> bool {
+ matches!(c, '\\' | '\'')
+}
+fn is_whitespace(c: char) -> bool {
+ matches!(c, ' ' | '\t' | '\n')
+}
+fn is_ident_char(c: char) -> bool {
+ matches!(c, 'a'..='z' | 'A'..='Z' | '_')
+}
diff --git a/lib/ungrammar/src/lib.rs b/lib/ungrammar/src/lib.rs
new file mode 100644
index 0000000000..6adf8ef8ea
--- /dev/null
+++ b/lib/ungrammar/src/lib.rs
@@ -0,0 +1,137 @@
+//! Ungrammar -- a DSL for specifying concrete syntax tree grammar.
+//!
+//! Producing a parser is an explicit non-goal -- it's ok for this grammar to be
+//! ambiguous, non LL, non LR, etc.
+//!
+//! See this
+//! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html)
+//! for details.
+
+#![deny(missing_debug_implementations)]
+#![deny(missing_docs)]
+#![deny(rust_2018_idioms)]
+
+mod error;
+mod lexer;
+mod parser;
+
+use std::{ops, str::FromStr};
+
+pub use error::{Error, Result};
+
+/// Returns a Rust grammar.
+pub fn rust_grammar() -> Grammar {
+ let src = include_str!("../rust.ungram");
+ src.parse().unwrap()
+}
+
+/// A node, like `A = 'b' | 'c'`.
+///
+/// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a
+/// [`NodeData`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Node(usize);
+
+/// A token, denoted with single quotes, like `'+'` or `'struct'`.
+///
+/// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a
+/// [`TokenData`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Token(usize);
+
+/// An Ungrammar grammar.
+#[derive(Default, Debug)]
+pub struct Grammar {
+ nodes: Vec<NodeData>,
+ tokens: Vec<TokenData>,
+}
+
+impl FromStr for Grammar {
+ type Err = Error;
+ fn from_str(s: &str) -> Result<Self> {
+ let tokens = lexer::tokenize(s)?;
+ parser::parse(tokens)
+ }
+}
+
+impl Grammar {
+ /// Returns an iterator over all nodes in the grammar.
+ pub fn iter(&self) -> impl Iterator<Item = Node> + '_ {
+ (0..self.nodes.len()).map(Node)
+ }
+
+ /// Returns an iterator over all tokens in the grammar.
+ pub fn tokens(&self) -> impl Iterator<Item = Token> + '_ {
+ (0..self.tokens.len()).map(Token)
+ }
+}
+
+impl ops::Index<Node> for Grammar {
+ type Output = NodeData;
+ fn index(&self, Node(index): Node) -> &NodeData {
+ &self.nodes[index]
+ }
+}
+
+impl ops::Index<Token> for Grammar {
+ type Output = TokenData;
+ fn index(&self, Token(index): Token) -> &TokenData {
+ &self.tokens[index]
+ }
+}
+
+/// Data about a node.
+#[derive(Debug)]
+pub struct NodeData {
+ /// The name of the node.
+ ///
+ /// In the rule `A = 'b' | 'c'`, this is `"A"`.
+ pub name: String,
+ /// The rule for this node.
+ ///
+ /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`.
+ pub rule: Rule,
+}
+
+/// Data about a token.
+#[derive(Debug)]
+pub struct TokenData {
+ /// The name of the token.
+ pub name: String,
+}
+
+/// A production rule.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum Rule {
+ /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule).
+ Labeled {
+ /// The label.
+ label: String,
+ /// The rule.
+ rule: Box<Rule>,
+ },
+ /// A node, like `A`.
+ Node(Node),
+ /// A token, like `'struct'`.
+ Token(Token),
+ /// A sequence of rules, like `'while' '(' Expr ')' Stmt`.
+ Seq(Vec<Rule>),
+ /// An alternative between many rules, like `'+' | '-' | '*' | '/'`.
+ Alt(Vec<Rule>),
+ /// An optional rule, like `A?`.
+ Opt(Box<Rule>),
+ /// A repeated rule, like `A*`.
+ Rep(Box<Rule>),
+}
+
+#[test]
+fn smoke() {
+ let grammar = include_str!("../ungrammar.ungram");
+ let grammar = grammar.parse::<Grammar>().unwrap();
+ drop(grammar)
+}
+
+#[test]
+fn test_rust_grammar() {
+ let _ = rust_grammar();
+}
diff --git a/lib/ungrammar/src/parser.rs b/lib/ungrammar/src/parser.rs
new file mode 100644
index 0000000000..a4ce9c1202
--- /dev/null
+++ b/lib/ungrammar/src/parser.rs
@@ -0,0 +1,225 @@
+//! Simple hand-written ungrammar parser.
+use std::collections::HashMap;
+
+use crate::{
+ error::{bail, format_err, Result},
+ lexer::{self, TokenKind},
+ Grammar, Node, NodeData, Rule, Token, TokenData,
+};
+
+macro_rules! bail {
+ ($loc:expr, $($tt:tt)*) => {{
+ let err = $crate::error::format_err!($($tt)*)
+ .with_location($loc);
+ return Err(err);
+ }};
+}
+
+pub(crate) fn parse(tokens: Vec<lexer::Token>) -> Result<Grammar> {
+ let mut p = Parser::new(tokens);
+ while !p.is_eof() {
+ node(&mut p)?;
+ }
+ p.finish()
+}
+
+#[derive(Default)]
+struct Parser {
+ grammar: Grammar,
+ tokens: Vec<lexer::Token>,
+ node_table: HashMap<String, Node>,
+ token_table: HashMap<String, Token>,
+}
+
+const DUMMY_RULE: Rule = Rule::Node(Node(!0));
+
+impl Parser {
+ fn new(mut tokens: Vec<lexer::Token>) -> Parser {
+ tokens.reverse();
+ Parser {
+ tokens,
+ ..Parser::default()
+ }
+ }
+
+ fn peek(&self) -> Option<&lexer::Token> {
+ self.peek_n(0)
+ }
+ fn peek_n(&self, n: usize) -> Option<&lexer::Token> {
+ self.tokens.iter().nth_back(n)
+ }
+ fn bump(&mut self) -> Result<lexer::Token> {
+ self.tokens
+ .pop()
+ .ok_or_else(|| format_err!("unexpected EOF"))
+ }
+ fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> {
+ let token = self.bump()?;
+ if token.kind != kind {
+ bail!(token.loc, "unexpected token, expected `{}`", what);
+ }
+ Ok(())
+ }
+ fn is_eof(&self) -> bool {
+ self.tokens.is_empty()
+ }
+ fn finish(self) -> Result<Grammar> {
+ for node_data in &self.grammar.nodes {
+ if matches!(node_data.rule, DUMMY_RULE) {
+ crate::error::bail!("Undefined node: {}", node_data.name)
+ }
+ }
+ Ok(self.grammar)
+ }
+ fn intern_node(&mut self, name: String) -> Node {
+ let len = self.node_table.len();
+ let grammar = &mut self.grammar;
+ *self.node_table.entry(name.clone()).or_insert_with(|| {
+ grammar.nodes.push(NodeData {
+ name,
+ rule: DUMMY_RULE,
+ });
+ Node(len)
+ })
+ }
+ fn intern_token(&mut self, name: String) -> Token {
+ let len = self.token_table.len();
+ let grammar = &mut self.grammar;
+ *self.token_table.entry(name.clone()).or_insert_with(|| {
+ grammar.tokens.push(TokenData { name });
+ Token(len)
+ })
+ }
+}
+
+fn node(p: &mut Parser) -> Result<()> {
+ let token = p.bump()?;
+ let node = match token.kind {
+ TokenKind::Node(it) => p.intern_node(it),
+ _ => bail!(token.loc, "expected ident"),
+ };
+ p.expect(TokenKind::Eq, "=")?;
+ if !matches!(p.grammar[node].rule, DUMMY_RULE) {
+ bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name)
+ }
+
+ let rule = rule(p)?;
+ p.grammar.nodes[node.0].rule = rule;
+ Ok(())
+}
+
+fn rule(p: &mut Parser) -> Result<Rule> {
+ if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() {
+ bail!(
+ *loc,
+ "The first element in a sequence of productions or alternatives \
+ must not have a leading pipe (`|`)"
+ );
+ }
+
+ let lhs = seq_rule(p)?;
+ let mut alt = vec![lhs];
+ while let Some(token) = p.peek() {
+ if token.kind != TokenKind::Pipe {
+ break;
+ }
+ p.bump()?;
+ let rule = seq_rule(p)?;
+ alt.push(rule)
+ }
+ let res = if alt.len() == 1 {
+ alt.pop().unwrap()
+ } else {
+ Rule::Alt(alt)
+ };
+ Ok(res)
+}
+
+fn seq_rule(p: &mut Parser) -> Result<Rule> {
+ let lhs = atom_rule(p)?;
+
+ let mut seq = vec![lhs];
+ while let Some(rule) = opt_atom_rule(p)? {
+ seq.push(rule)
+ }
+ let res = if seq.len() == 1 {
+ seq.pop().unwrap()
+ } else {
+ Rule::Seq(seq)
+ };
+ Ok(res)
+}
+
+fn atom_rule(p: &mut Parser) -> Result<Rule> {
+ match opt_atom_rule(p)? {
+ Some(it) => Ok(it),
+ None => {
+ let token = p.bump()?;
+ bail!(token.loc, "unexpected token")
+ }
+ }
+}
+
+fn opt_atom_rule(p: &mut Parser) -> Result<Option<Rule>> {
+ let token = match p.peek() {
+ Some(it) => it,
+ None => return Ok(None),
+ };
+ let mut res = match &token.kind {
+ TokenKind::Node(name) => {
+ if let Some(lookahead) = p.peek_n(1) {
+ match lookahead.kind {
+ TokenKind::Eq => return Ok(None),
+ TokenKind::Colon => {
+ let label = name.clone();
+ p.bump()?;
+ p.bump()?;
+ let rule = atom_rule(p)?;
+ let res = Rule::Labeled {
+ label,
+ rule: Box::new(rule),
+ };
+ return Ok(Some(res));
+ }
+ _ => (),
+ }
+ }
+ match p.peek_n(1) {
+ Some(token) if token.kind == TokenKind::Eq => return Ok(None),
+ _ => (),
+ }
+ let name = name.clone();
+ p.bump()?;
+ let node = p.intern_node(name);
+ Rule::Node(node)
+ }
+ TokenKind::Token(name) => {
+ let name = name.clone();
+ p.bump()?;
+ let token = p.intern_token(name);
+ Rule::Token(token)
+ }
+ TokenKind::LParen => {
+ p.bump()?;
+ let rule = rule(p)?;
+ p.expect(TokenKind::RParen, ")")?;
+ rule
+ }
+ _ => return Ok(None),
+ };
+
+ if let Some(token) = p.peek() {
+ match &token.kind {
+ TokenKind::QMark => {
+ p.bump()?;
+ res = Rule::Opt(Box::new(res));
+ }
+ TokenKind::Star => {
+ p.bump()?;
+ res = Rule::Rep(Box::new(res));
+ }
+ _ => (),
+ }
+ }
+ Ok(Some(res))
+}
diff --git a/lib/ungrammar/ungrammar.ungram b/lib/ungrammar/ungrammar.ungram
new file mode 100644
index 0000000000..856a6cede0
--- /dev/null
+++ b/lib/ungrammar/ungrammar.ungram
@@ -0,0 +1,16 @@
+/// ungrammar for ungrammar
+Grammar =
+ Node *
+
+Node =
+ name:'ident' '=' Rule
+
+Rule =
+ 'ident'
+| 'token_ident'
+| Rule *
+| Rule ( '|' Rule) *
+| Rule '?'
+| Rule '*'
+| '(' Rule ')'
+| label:'ident' ':' Rule
diff --git a/lib/ungrammar/ungrammar2json/Cargo.toml b/lib/ungrammar/ungrammar2json/Cargo.toml
new file mode 100644
index 0000000000..19ca3d8324
--- /dev/null
+++ b/lib/ungrammar/ungrammar2json/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "ungrammar2json"
+description = "Convert ungrammar files to JSON"
+version = "1.0.0"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/matklad/ungrammar"
+authors = ["Aleksey Kladov <[email protected]>"]
+edition = "2018"
+
+[dependencies]
+write-json = "0.1.1"
+ungrammar = { path = "../", version = "1.1.0" }
diff --git a/lib/ungrammar/ungrammar2json/src/main.rs b/lib/ungrammar/ungrammar2json/src/main.rs
new file mode 100644
index 0000000000..f588ed5eb6
--- /dev/null
+++ b/lib/ungrammar/ungrammar2json/src/main.rs
@@ -0,0 +1,77 @@
+use std::{
+ env,
+ io::{self, Read},
+ process,
+};
+
+use ungrammar::{Grammar, Rule};
+
+fn main() {
+ if let Err(err) = try_main() {
+ eprintln!("{}", err);
+ process::exit(101);
+ }
+}
+
+fn try_main() -> io::Result<()> {
+ if env::args().count() != 1 {
+ eprintln!("Usage: ungrammar2json < grammar.ungram > grammar.json");
+ return Ok(());
+ }
+ let grammar = read_stdin()?;
+ let grammar = grammar
+ .parse::<Grammar>()
+ .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
+
+ let mut buf = String::new();
+ grammar_to_json(&grammar, write_json::object(&mut buf));
+ println!("{}", buf);
+ Ok(())
+}
+
+fn read_stdin() -> io::Result<String> {
+ let mut buf = String::new();
+ io::stdin().lock().read_to_string(&mut buf)?;
+ Ok(buf)
+}
+
+fn grammar_to_json(grammar: &Grammar, mut obj: write_json::Object<'_>) {
+ for node in grammar.iter() {
+ let node = &grammar[node];
+ rule_to_json(grammar, &node.rule, obj.object(&node.name));
+ }
+}
+
+fn rule_to_json(grammar: &Grammar, rule: &Rule, mut obj: write_json::Object) {
+ match rule {
+ Rule::Labeled { label, rule } => {
+ obj.string("label", label);
+ rule_to_json(grammar, rule, obj.object("rule"))
+ }
+ Rule::Node(node) => {
+ obj.string("node", &grammar[*node].name);
+ }
+ Rule::Token(token) => {
+ obj.string("token", &grammar[*token].name);
+ }
+ Rule::Seq(rules) | Rule::Alt(rules) => {
+ let tag = match rule {
+ Rule::Seq(_) => "seq",
+ Rule::Alt(_) => "alt",
+ _ => unreachable!(),
+ };
+ let mut array = obj.array(tag);
+ for rule in rules {
+ rule_to_json(grammar, rule, array.object());
+ }
+ }
+ Rule::Opt(arg) | Rule::Rep(arg) => {
+ let tag = match rule {
+ Rule::Opt(_) => "opt",
+ Rule::Rep(_) => "rep",
+ _ => unreachable!(),
+ };
+ rule_to_json(grammar, arg, obj.object(tag));
+ }
+ }
+}