rust-analyzer

diff --git a/lib/ungrammar/.github/ci.rs b/lib/ungrammar/.github/ci.rs
new file mode 100644
index 0000000000..87eb307d63
--- /dev/null
+++ b/lib/ungrammar/.github/ci.rs

@@ -0,0 +1,114 @@

+use std::{

+ env, fs,

+ process::{self, Command, ExitStatus, Stdio},

+ time::Instant,

+};

+type Error = Box<dyn std::error::Error>;

+type Result<T> = std::result::Result<T, Error>;

+fn main() {

+ if let Err(err) = try_main() {

+ eprintln!("{}", err);

+ process::exit(1);

+ }

+fn try_main() -> Result<()> {

+ let cwd = env::current_dir()?;

+ let cargo_toml = cwd.join("Cargo.toml");

+ assert!(

+ cargo_toml.exists(),

+ "Cargo.toml not found, cwd: {}",

+ cwd.display()

+ );

+ {

+ let _s = Section::new("BUILD");

+ shell("cargo test --workspace --no-run")?;

+ }

+ {

+ let _s = Section::new("TEST");

+ shell("cargo test --workspace")?;

+ }

+ let current_branch = shell_output("git branch --show-current")?;

+ if &current_branch == "master" {

+ let _s = Section::new("PUBLISH");

+ let manifest = fs::read_to_string(&cargo_toml)?;

+ let version = get_field(&manifest, "version")?;

+ let tag = format!("v{}", version);

+ let tags = shell_output("git tag --list")?;

+ if !tags.contains(&tag) {

+ let token = env::var("CRATES_IO_TOKEN").unwrap();

+ shell(&format!("git tag v{}", version))?;

+ shell(&format!("cargo publish --token {}", token))?;

+ shell("git push --tags")?;

+ }

+ Ok(())

+fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> {

+ for line in text.lines() {

+ let words = line.split_ascii_whitespace().collect::<Vec<_>>();

+ match words.as_slice() {

+ [n, "=", v, ..] if n.trim() == name => {

+ assert!(v.starts_with('"') && v.ends_with('"'));

+ return Ok(&v[1..v.len() - 1]);

+ }

+ _ => (),

+ }

+ Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))?

+fn shell(cmd: &str) -> Result<()> {

+ let status = command(cmd).status()?;

+ check_status(status)

+fn shell_output(cmd: &str) -> Result<String> {

+ let output = command(cmd).stderr(Stdio::inherit()).output()?;

+ check_status(output.status)?;

+ let res = String::from_utf8(output.stdout)?;

+ Ok(res.trim().to_string())

+fn command(cmd: &str) -> Command {

+ eprintln!("> {}", cmd);

+ let words = cmd.split_ascii_whitespace().collect::<Vec<_>>();

+ let (cmd, args) = words.split_first().unwrap();

+ let mut res = Command::new(cmd);

+ res.args(args);

+ res

+fn check_status(status: ExitStatus) -> Result<()> {

+ if !status.success() {

+ Err(format!("$status: {}", status))?;

+ }

+ Ok(())

+struct Section {

+ name: &'static str,

+ start: Instant,

+impl Section {

+ fn new(name: &'static str) -> Section {

+ println!("::group::{}", name);

+ let start = Instant::now();

+ Section { name, start }

+ }

+impl Drop for Section {

+ fn drop(&mut self) {

+ eprintln!("{}: {:.2?}", self.name, self.start.elapsed());

+ println!("::endgroup::");

+ }

diff --git a/lib/ungrammar/.github/workflows/ci.yaml b/lib/ungrammar/.github/workflows/ci.yaml
new file mode 100644
index 0000000000..88f133867e
--- /dev/null
+++ b/lib/ungrammar/.github/workflows/ci.yaml

@@ -0,0 +1,36 @@

+name: CI

+on:

+ pull_request:

+ push:

+ branches:

+ - master

+ - staging

+ - trying

+env:

+ CARGO_INCREMENTAL: 0

+ CARGO_NET_RETRY: 10

+ CI: 1

+ RUST_BACKTRACE: short

+ RUSTFLAGS: -D warnings

+ RUSTUP_MAX_RETRIES: 10

+jobs:

+ rust:

+ name: Rust

+ runs-on: ubuntu-latest

+ steps:

+ - name: Checkout repository

+ uses: actions/checkout@v2

+ - name: Install Rust toolchain

+ uses: actions-rs/toolchain@v1

+ with:

+ toolchain: stable

+ profile: minimal

+ override: true

+ - run: rustc ./.github/ci.rs && ./ci

+ env:

+ CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}

diff --git a/lib/ungrammar/.gitignore b/lib/ungrammar/.gitignore
new file mode 100644
index 0000000000..e3bd43f693
--- /dev/null
+++ b/lib/ungrammar/.gitignore

@@ -0,0 +1,3 @@

+/ci

+/Cargo.lock

+/target

diff --git a/lib/ungrammar/Cargo.toml b/lib/ungrammar/Cargo.toml
new file mode 100644
index 0000000000..920d9ef49d
--- /dev/null
+++ b/lib/ungrammar/Cargo.toml

@@ -0,0 +1,15 @@

+[package]

+name = "ungrammar"

+description = "A DSL for describing concrete syntax trees"

+version = "1.16.1"

+license = "MIT OR Apache-2.0"

+repository = "https://github.com/rust-analyzer/ungrammar"

+edition = "2018"

+exclude = ["/bors.toml", "/.github"]

+[workspace]

+members = ["ungrammar2json"]

+[dependencies]

+# nope

diff --git a/lib/ungrammar/README.md b/lib/ungrammar/README.md
new file mode 100644
index 0000000000..a5e130fedf
--- /dev/null
+++ b/lib/ungrammar/README.md

@@ -0,0 +1,21 @@

+# ungrammar

+A DSL for specifying concrete syntax trees.

+See the [blog post][post] for an introduction.

+See [./rust.ungram](./rust.ungram) for an example.

+## Editor support

+- Vim

+ - [vim-ungrammar][]

+ - [ungrammar.vim][]

+- VSCode

+ - [ungrammar-tools][]

+[post]:

+ https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html

+[vim-ungrammar]: https://github.com/Iron-E/vim-ungrammar

+[ungrammar.vim]: https://github.com/drtychai/ungrammar.vim

+[ungrammar-tools]: https://github.com/azdavis/ungrammar-tools

diff --git a/lib/ungrammar/bors.toml b/lib/ungrammar/bors.toml
new file mode 100644
index 0000000000..b92b99ac30
--- /dev/null
+++ b/lib/ungrammar/bors.toml

@@ -0,0 +1,2 @@

+status = [ "Rust" ]

+delete_merged_branches = true

diff --git a/lib/ungrammar/rust.ungram b/lib/ungrammar/rust.ungram
new file mode 100644
index 0000000000..7781e719e2
--- /dev/null
+++ b/lib/ungrammar/rust.ungram

@@ -0,0 +1,666 @@

+// Note this grammar file does not reflect the current language as this file is no longer maintained.

+// Rust Un-Grammar.

+//

+// This grammar specifies the structure of Rust's concrete syntax tree.

+// It does not specify parsing rules (ambiguities, precedence, etc are out of scope).

+// Tokens are processed -- contextual keywords are recognised, compound operators glued.

+//

+// Legend:

+//

+// // -- comment

+// Name = -- non-terminal definition

+// 'ident' -- token (terminal)

+// A B -- sequence

+// A | B -- alternation

+// A* -- zero or more repetition

+// A? -- zero or one repetition

+// (A) -- same as A

+// label:A -- suggested name for field of AST node

+//*************************//

+// Names, Paths and Macros //

+//*************************//

+Name =

+ 'ident' | 'self'

+NameRef =

+Lifetime =

+ 'lifetime_ident'

+Path =

+ (qualifier:Path '::')? segment:PathSegment

+PathSegment =

+ '::'? NameRef

+| NameRef GenericArgList?

+| NameRef ParamList RetType?

+| '<' PathType ('as' PathType)? '>'

+GenericArgList =

+ '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>'

+GenericArg =

+ TypeArg

+| AssocTypeArg

+| LifetimeArg

+| ConstArg

+TypeArg =

+ Type

+AssocTypeArg =

+ NameRef GenericParamList? (':' TypeBoundList | '=' Type)

+LifetimeArg =

+ Lifetime

+ConstArg =

+ Expr

+MacroCall =

+ Attr* Path '!' TokenTree ';'?

+TokenTree =

+ '(' ')'

+| '{' '}'

+| '[' ']'

+MacroItems =

+ Item*

+MacroStmts =

+ statements:Stmt*

+ Expr?

+//*************************//

+// Items //

+//*************************//

+SourceFile =

+ 'shebang'?

+ Attr*

+ Item*

+Item =

+ Const

+| Enum

+| ExternBlock

+| ExternCrate

+| Fn

+| Impl

+| MacroCall

+| MacroRules

+| MacroDef

+| Module

+| Static

+| Struct

+| Trait

+| TypeAlias

+| Union

+| Use

+MacroRules =

+ Attr* Visibility?

+ 'macro_rules' '!' Name

+ TokenTree

+MacroDef =

+ Attr* Visibility?

+ 'macro' Name args:TokenTree?

+ body:TokenTree

+Module =

+ Attr* Visibility?

+ 'mod' Name

+ (ItemList | ';')

+ItemList =

+ '{' Attr* Item* '}'

+ExternCrate =

+ Attr* Visibility?

+ 'extern' 'crate' NameRef Rename? ';'

+Rename =

+ 'as' (Name | '_')

+Use =

+ Attr* Visibility?

+ 'use' UseTree ';'

+UseTree =

+ (Path? '::')? ('*' | UseTreeList)

+| Path Rename?

+UseTreeList =

+ '{' (UseTree (',' UseTree)* ','?)? '}'

+Fn =

+ Attr* Visibility?

+ 'default'? 'const'? 'async'? 'unsafe'? Abi?

+ 'fn' Name GenericParamList? ParamList RetType? WhereClause?

+ (body:BlockExpr | ';')

+Abi =

+ 'extern' 'string'?

+ParamList =

+ '('(

+ SelfParam

+ | (SelfParam ',')? (Param (',' Param)* ','?)?

+ )')'

+| '|' (Param (',' Param)* ','?)? '|'

+SelfParam =

+ Attr* (

+ ('&' Lifetime?)? 'mut'? Name

+ | 'mut'? Name ':' Type

+ )

+Param =

+ Attr* (

+ Pat (':' Type)?

+ | Type

+ | '...'

+ )

+RetType =

+ '->' Type

+TypeAlias =

+ Attr* Visibility?

+ 'default'?

+ 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause?

+ ('=' Type)? ';'

+Struct =

+ Attr* Visibility?

+ 'struct' Name GenericParamList? (

+ WhereClause? (RecordFieldList | ';')

+ | TupleFieldList WhereClause? ';'

+ )

+RecordFieldList =

+ '{' fields:(RecordField (',' RecordField)* ','?)? '}'

+RecordField =

+ Attr* Visibility?

+ Name ':' Type

+TupleFieldList =

+ '(' fields:(TupleField (',' TupleField)* ','?)? ')'

+TupleField =

+ Attr* Visibility?

+ Type

+FieldList =

+ RecordFieldList

+| TupleFieldList

+Enum =

+ Attr* Visibility?

+ 'enum' Name GenericParamList? WhereClause?

+ VariantList

+VariantList =

+ '{' (Variant (',' Variant)* ','?)? '}'

+Variant =

+ Attr* Visibility?

+ Name FieldList? ('=' Expr)?

+Union =

+ Attr* Visibility?

+ 'union' Name GenericParamList? WhereClause?

+ RecordFieldList

+// A Data Type.

+//

+// Not used directly in the grammar, but handy to have anyway.

+Adt =

+ Enum

+| Struct

+| Union

+Const =

+ Attr* Visibility?

+ 'default'?

+ 'const' (Name | '_') ':' Type

+ ('=' body:Expr)? ';'

+Static =

+ Attr* Visibility?

+ 'static' 'mut'? Name ':' Type

+ ('=' body:Expr)? ';'

+Trait =

+ Attr* Visibility?

+ 'unsafe'? 'auto'?

+ 'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause?

+ AssocItemList

+AssocItemList =

+ '{' Attr* AssocItem* '}'

+AssocItem =

+ Const

+| Fn

+| MacroCall

+| TypeAlias

+Impl =

+ Attr* Visibility?

+ 'default'? 'unsafe'?

+ 'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause?

+ AssocItemList

+ExternBlock =

+ Attr* 'unsafe'? Abi ExternItemList

+ExternItemList =

+ '{' Attr* ExternItem* '}'

+ExternItem =

+ Fn

+| MacroCall

+| Static

+| TypeAlias

+GenericParamList =

+ '<' (GenericParam (',' GenericParam)* ','?)? '>'

+GenericParam =

+ ConstParam

+| LifetimeParam

+| TypeParam

+TypeParam =

+ Attr* Name (':' TypeBoundList?)?

+ ('=' default_type:Type)?

+ConstParam =

+ Attr* 'const' Name ':' Type

+ ('=' default_val:Expr)?

+LifetimeParam =

+ Attr* Lifetime (':' TypeBoundList?)?

+WhereClause =

+ 'where' predicates:(WherePred (',' WherePred)* ','?)

+WherePred =

+ ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList?

+Visibility =

+ 'pub' ('(' 'in'? Path ')')?

+Attr =

+ '#' '!'? '[' Meta ']'

+Meta =

+ Path ('=' Expr | TokenTree)?

+//****************************//

+// Statements and Expressions //

+//****************************//

+Stmt =

+ ';'

+| ExprStmt

+| Item

+| LetStmt

+LetStmt =

+ Attr* 'let' Pat (':' Type)?

+ '=' initializer:Expr

+ LetElse?

+ ';'

+LetElse =

+ 'else' BlockExpr

+ExprStmt =

+ Expr ';'?

+Expr =

+ ArrayExpr

+| AwaitExpr

+| BinExpr

+| BlockExpr

+| BoxExpr

+| BreakExpr

+| CallExpr

+| CastExpr

+| ClosureExpr

+| ContinueExpr

+| FieldExpr

+| ForExpr

+| IfExpr

+| IndexExpr

+| Literal

+| LoopExpr

+| MacroCall

+| MacroStmts

+| MatchExpr

+| MethodCallExpr

+| ParenExpr

+| PathExpr

+| PrefixExpr

+| RangeExpr

+| RecordExpr

+| RefExpr

+| ReturnExpr

+| TryExpr

+| TupleExpr

+| WhileExpr

+| YieldExpr

+| LetExpr

+| UnderscoreExpr

+Literal =

+ Attr* value:(

+ 'int_number' | 'float_number'

+ | 'string' | 'raw_string'

+ | 'byte_string' | 'raw_byte_string'

+ | 'true' | 'false'

+ | 'char' | 'byte'

+ )

+PathExpr =

+ Attr* Path

+StmtList =

+ '{'

+ Attr*

+ statements:Stmt*

+ tail_expr:Expr?

+ '}'

+RefExpr =

+ Attr* '&' ('raw' | 'mut' | 'const') Expr

+TryExpr =

+ Attr* Expr '?'

+BlockExpr =

+ Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList

+PrefixExpr =

+ Attr* op:('-' | '!' | '*') Expr

+BinExpr =

+ Attr*

+ lhs:Expr

+ op:(

+ '||' | '&&'

+ | '==' | '!=' | '<=' | '>=' | '<' | '>'

+ | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&'

+ | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^='

+ )

+ rhs:Expr

+CastExpr =

+ Attr* Expr 'as' Type

+ParenExpr =

+ Attr* '(' Attr* Expr ')'

+ArrayExpr =

+ Attr* '[' Attr* (

+ (Expr (',' Expr)* ','?)?

+ | Expr ';' Expr

+ ) ']'

+IndexExpr =

+ Attr* base:Expr '[' index:Expr ']'

+TupleExpr =

+ Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')'

+RecordExpr =

+ Path RecordExprFieldList

+RecordExprFieldList =

+ '{'

+ Attr*

+ fields:(RecordExprField (',' RecordExprField)* ','?)?

+ ('..' spread:Expr?)?

+ '}'

+RecordExprField =

+ Attr* (NameRef ':')? Expr

+CallExpr =

+ Attr* Expr ArgList

+ArgList =

+ '(' args:(Expr (',' Expr)* ','?)? ')'

+MethodCallExpr =

+ Attr* receiver:Expr '.' NameRef GenericArgList? ArgList

+FieldExpr =

+ Attr* Expr '.' NameRef

+ClosureExpr =

+ Attr* 'static'? 'async'? 'move'? ParamList RetType?

+ body:Expr

+IfExpr =

+ Attr* 'if' condition:Expr then_branch:BlockExpr

+ ('else' else_branch:(IfExpr | BlockExpr))?

+LoopExpr =

+ Attr* Label? 'loop'

+ loop_body:BlockExpr

+ForExpr =

+ Attr* Label? 'for' Pat 'in' iterable:Expr

+ loop_body:BlockExpr

+WhileExpr =

+ Attr* Label? 'while' condition:Expr

+ loop_body:BlockExpr

+Label =

+ Lifetime ':'

+BreakExpr =

+ Attr* 'break' Lifetime? Expr?

+ContinueExpr =

+ Attr* 'continue' Lifetime?

+RangeExpr =

+ Attr* start:Expr? op:('..' | '..=') end:Expr?

+MatchExpr =

+ Attr* 'match' Expr MatchArmList

+MatchArmList =

+ '{'

+ Attr*

+ arms:MatchArm*

+ '}'

+MatchArm =

+ Attr* Pat guard:MatchGuard? '=>' Expr ','?

+MatchGuard =

+ 'if' condition:Expr

+ReturnExpr =

+ Attr* 'return' Expr?

+YieldExpr =

+ Attr* 'yield' Expr?

+LetExpr =

+ Attr* 'let' Pat '=' Expr

+UnderscoreExpr =

+ Attr* '_'

+AwaitExpr =

+ Attr* Expr '.' 'await'

+BoxExpr =

+ Attr* 'box' Expr

+//*************************//

+// Types //

+//*************************//

+Type =

+ ArrayType

+| DynTraitType

+| FnPtrType

+| ForType

+| ImplTraitType

+| InferType

+| MacroType

+| NeverType

+| ParenType

+| PathType

+| PtrType

+| RefType

+| SliceType

+| TupleType

+ParenType =

+ '(' Type ')'

+NeverType =

+ '!'

+MacroType =

+ MacroCall

+PathType =

+ Path

+TupleType =

+ '(' fields:(Type (',' Type)* ','?)? ')'

+PtrType =

+ '*' ('const' | 'mut') Type

+RefType =

+ '&' Lifetime? 'mut'? Type

+ArrayType =

+ '[' Type ';' Expr ']'

+SliceType =

+ '[' Type ']'

+InferType =

+ '_'

+FnPtrType =

+ 'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType?

+ForType =

+ 'for' GenericParamList Type

+ImplTraitType =

+ 'impl' TypeBoundList

+DynTraitType =

+ 'dyn' TypeBoundList

+TypeBoundList =

+ bounds:(TypeBound ('+' TypeBound)* '+'?)

+TypeBound =

+ Lifetime

+| ('?' | '~' 'const')? Type

+//************************//

+// Patterns //

+//************************//

+Pat =

+ IdentPat

+| BoxPat

+| RestPat

+| LiteralPat

+| MacroPat

+| OrPat

+| ParenPat

+| PathPat

+| WildcardPat

+| RangePat

+| RecordPat

+| RefPat

+| SlicePat

+| TuplePat

+| TupleStructPat

+| ConstBlockPat

+LiteralPat =

+ Literal

+IdentPat =

+ Attr* 'ref'? 'mut'? Name ('@' Pat)?

+WildcardPat =

+ '_'

+RangePat =

+ // 1..

+ start:Pat op:('..' | '..=')

+ // 1..2

+ | start:Pat op:('..' | '..=') end:Pat

+ // ..2

+ | op:('..' | '..=') end:Pat

+RefPat =

+ '&' 'mut'? Pat

+RecordPat =

+ Path RecordPatFieldList

+RecordPatFieldList =

+ '{'

+ fields:(RecordPatField (',' RecordPatField)* ','?)?

+ RestPat?

+ '}'

+RecordPatField =

+ Attr* (NameRef ':')? Pat

+TupleStructPat =

+ Path '(' fields:(Pat (',' Pat)* ','?)? ')'

+TuplePat =

+ '(' fields:(Pat (',' Pat)* ','?)? ')'

+ParenPat =

+ '(' Pat ')'

+SlicePat =

+ '[' (Pat (',' Pat)* ','?)? ']'

+PathPat =

+ Path

+OrPat =

+ (Pat ('|' Pat)* '|'?)

+BoxPat =

+ 'box' Pat

+RestPat =

+ Attr* '..'

+MacroPat =

+ MacroCall

+ConstBlockPat =

+ 'const' BlockExpr

diff --git a/lib/ungrammar/src/error.rs b/lib/ungrammar/src/error.rs
new file mode 100644
index 0000000000..355e0b7ebc
--- /dev/null
+++ b/lib/ungrammar/src/error.rs

@@ -0,0 +1,50 @@

+//! Boilerplate error definitions.

+use std::fmt;

+use crate::lexer::Location;

+/// A type alias for std's Result with the Error as our error type.

+pub type Result<T, E = Error> = std::result::Result<T, E>;

+/// An error encountered when parsing a Grammar.

+#[derive(Debug)]

+pub struct Error {

+ pub(crate) message: String,

+ pub(crate) location: Option<Location>,

+impl fmt::Display for Error {

+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

+ if let Some(loc) = self.location {

+ // Report 1-based indices, to match text editors

+ write!(f, "{}:{}: ", loc.line + 1, loc.column + 1)?

+ }

+ write!(f, "{}", self.message)

+ }

+impl std::error::Error for Error {}

+impl Error {

+ pub(crate) fn with_location(self, location: Location) -> Error {

+ Error {

+ location: Some(location),

+ ..self

+ }

+macro_rules! _format_err {

+ ($($tt:tt)*) => {

+ $crate::error::Error {

+ message: format!($($tt)*),

+ location: None,

+ }

+ };

+pub(crate) use _format_err as format_err;

+macro_rules! _bail {

+ ($($tt:tt)*) => { return Err($crate::error::format_err!($($tt)*)) };

+pub(crate) use _bail as bail;

diff --git a/lib/ungrammar/src/lexer.rs b/lib/ungrammar/src/lexer.rs
new file mode 100644
index 0000000000..f4c979b5bd
--- /dev/null
+++ b/lib/ungrammar/src/lexer.rs

@@ -0,0 +1,129 @@

+//! Simple hand-written ungrammar lexer

+use crate::error::{bail, Result};

+#[derive(Debug, Eq, PartialEq)]

+pub(crate) enum TokenKind {

+ Node(String),

+ Token(String),

+ Eq,

+ Star,

+ Pipe,

+ QMark,

+ Colon,

+ LParen,

+ RParen,

+#[derive(Debug)]

+pub(crate) struct Token {

+ pub(crate) kind: TokenKind,

+ pub(crate) loc: Location,

+#[derive(Copy, Clone, Default, Debug)]

+pub(crate) struct Location {

+ pub(crate) line: usize,

+ pub(crate) column: usize,

+impl Location {

+ fn advance(&mut self, text: &str) {

+ match text.rfind('\n') {

+ Some(idx) => {

+ self.line += text.chars().filter(|&it| it == '\n').count();

+ self.column = text[idx + 1..].chars().count();

+ }

+ None => self.column += text.chars().count(),

+ }

+pub(crate) fn tokenize(mut input: &str) -> Result<Vec<Token>> {

+ let mut res = Vec::new();

+ let mut loc = Location::default();

+ while !input.is_empty() {

+ let old_input = input;

+ skip_ws(&mut input);

+ skip_comment(&mut input);

+ if old_input.len() == input.len() {

+ match advance(&mut input) {

+ Ok(kind) => {

+ res.push(Token { kind, loc });

+ }

+ Err(err) => return Err(err.with_location(loc)),

+ }

+ let consumed = old_input.len() - input.len();

+ loc.advance(&old_input[..consumed]);

+ }

+ Ok(res)

+fn skip_ws(input: &mut &str) {

+ *input = input.trim_start_matches(is_whitespace)

+fn skip_comment(input: &mut &str) {

+ if input.starts_with("//") {

+ let idx = input.find('\n').map_or(input.len(), |it| it + 1);

+ *input = &input[idx..]

+ }

+fn advance(input: &mut &str) -> Result<TokenKind> {

+ let mut chars = input.chars();

+ let c = chars.next().unwrap();

+ let res = match c {

+ '=' => TokenKind::Eq,

+ '*' => TokenKind::Star,

+ '?' => TokenKind::QMark,

+ '(' => TokenKind::LParen,

+ ')' => TokenKind::RParen,

+ '|' => TokenKind::Pipe,

+ ':' => TokenKind::Colon,

+ '\'' => {

+ let mut buf = String::new();

+ loop {

+ match chars.next() {

+ None => bail!("unclosed token literal"),

+ Some('\\') => match chars.next() {

+ Some(c) if is_escapable(c) => buf.push(c),

+ _ => bail!("invalid escape in token literal"),

+ },

+ Some('\'') => break,

+ Some(c) => buf.push(c),

+ }

+ TokenKind::Token(buf)

+ }

+ c if is_ident_char(c) => {

+ let mut buf = String::new();

+ buf.push(c);

+ loop {

+ match chars.clone().next() {

+ Some(c) if is_ident_char(c) => {

+ chars.next();

+ buf.push(c);

+ }

+ _ => break,

+ }

+ TokenKind::Node(buf)

+ }

+ '\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"),

+ c => bail!("unexpected character: `{}`", c),

+ };

+ *input = chars.as_str();

+ Ok(res)

+fn is_escapable(c: char) -> bool {

+ matches!(c, '\\' | '\'')

+fn is_whitespace(c: char) -> bool {

+ matches!(c, ' ' | '\t' | '\n')

+fn is_ident_char(c: char) -> bool {

+ matches!(c, 'a'..='z' | 'A'..='Z' | '_')

diff --git a/lib/ungrammar/src/lib.rs b/lib/ungrammar/src/lib.rs
new file mode 100644
index 0000000000..6adf8ef8ea
--- /dev/null
+++ b/lib/ungrammar/src/lib.rs

@@ -0,0 +1,137 @@

+//! Ungrammar -- a DSL for specifying concrete syntax tree grammar.

+//!

+//! Producing a parser is an explicit non-goal -- it's ok for this grammar to be

+//! ambiguous, non LL, non LR, etc.

+//!

+//! See this

+//! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html)

+//! for details.

+#![deny(missing_debug_implementations)]

+#![deny(missing_docs)]

+#![deny(rust_2018_idioms)]

+mod error;

+mod lexer;

+mod parser;

+use std::{ops, str::FromStr};

+pub use error::{Error, Result};

+/// Returns a Rust grammar.

+pub fn rust_grammar() -> Grammar {

+ let src = include_str!("../rust.ungram");

+ src.parse().unwrap()

+/// A node, like `A = 'b' | 'c'`.

+///

+/// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a

+/// [`NodeData`].

+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]

+pub struct Node(usize);

+/// A token, denoted with single quotes, like `'+'` or `'struct'`.

+///

+/// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a

+/// [`TokenData`].

+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]

+pub struct Token(usize);

+/// An Ungrammar grammar.

+#[derive(Default, Debug)]

+pub struct Grammar {

+ nodes: Vec<NodeData>,

+ tokens: Vec<TokenData>,

+impl FromStr for Grammar {

+ type Err = Error;

+ fn from_str(s: &str) -> Result<Self> {

+ let tokens = lexer::tokenize(s)?;

+ parser::parse(tokens)

+ }

+impl Grammar {

+ /// Returns an iterator over all nodes in the grammar.

+ pub fn iter(&self) -> impl Iterator<Item = Node> + '_ {

+ (0..self.nodes.len()).map(Node)

+ }

+ /// Returns an iterator over all tokens in the grammar.

+ pub fn tokens(&self) -> impl Iterator<Item = Token> + '_ {

+ (0..self.tokens.len()).map(Token)

+ }

+impl ops::Index<Node> for Grammar {

+ type Output = NodeData;

+ fn index(&self, Node(index): Node) -> &NodeData {

+ &self.nodes[index]

+ }

+impl ops::Index<Token> for Grammar {

+ type Output = TokenData;

+ fn index(&self, Token(index): Token) -> &TokenData {

+ &self.tokens[index]

+ }

+/// Data about a node.

+#[derive(Debug)]

+pub struct NodeData {

+ /// The name of the node.

+ ///

+ /// In the rule `A = 'b' | 'c'`, this is `"A"`.

+ pub name: String,

+ /// The rule for this node.

+ ///

+ /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`.

+ pub rule: Rule,

+/// Data about a token.

+#[derive(Debug)]

+pub struct TokenData {

+ /// The name of the token.

+ pub name: String,

+/// A production rule.

+#[derive(Debug, Clone, Eq, PartialEq)]

+pub enum Rule {

+ /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule).

+ Labeled {

+ /// The label.

+ label: String,

+ /// The rule.

+ rule: Box<Rule>,

+ },

+ /// A node, like `A`.

+ Node(Node),

+ /// A token, like `'struct'`.

+ Token(Token),

+ /// A sequence of rules, like `'while' '(' Expr ')' Stmt`.

+ Seq(Vec<Rule>),

+ /// An alternative between many rules, like `'+' | '-' | '*' | '/'`.

+ Alt(Vec<Rule>),

+ /// An optional rule, like `A?`.

+ Opt(Box<Rule>),

+ /// A repeated rule, like `A*`.

+ Rep(Box<Rule>),

+#[test]

+fn smoke() {

+ let grammar = include_str!("../ungrammar.ungram");

+ let grammar = grammar.parse::<Grammar>().unwrap();

+ drop(grammar)

+#[test]

+fn test_rust_grammar() {

+ let _ = rust_grammar();

diff --git a/lib/ungrammar/src/parser.rs b/lib/ungrammar/src/parser.rs
new file mode 100644
index 0000000000..a4ce9c1202
--- /dev/null
+++ b/lib/ungrammar/src/parser.rs

@@ -0,0 +1,225 @@

+//! Simple hand-written ungrammar parser.

+use std::collections::HashMap;

+use crate::{

+ error::{bail, format_err, Result},

+ lexer::{self, TokenKind},

+ Grammar, Node, NodeData, Rule, Token, TokenData,

+};

+macro_rules! bail {

+ ($loc:expr, $($tt:tt)*) => {{

+ let err = $crate::error::format_err!($($tt)*)

+ .with_location($loc);

+ return Err(err);

+ }};

+pub(crate) fn parse(tokens: Vec<lexer::Token>) -> Result<Grammar> {

+ let mut p = Parser::new(tokens);

+ while !p.is_eof() {

+ node(&mut p)?;

+ }

+ p.finish()

+#[derive(Default)]

+struct Parser {

+ grammar: Grammar,

+ tokens: Vec<lexer::Token>,

+ node_table: HashMap<String, Node>,

+ token_table: HashMap<String, Token>,

+const DUMMY_RULE: Rule = Rule::Node(Node(!0));

+impl Parser {

+ fn new(mut tokens: Vec<lexer::Token>) -> Parser {

+ tokens.reverse();

+ Parser {

+ tokens,

+ ..Parser::default()

+ }

+ fn peek(&self) -> Option<&lexer::Token> {

+ self.peek_n(0)

+ }

+ fn peek_n(&self, n: usize) -> Option<&lexer::Token> {

+ self.tokens.iter().nth_back(n)

+ }

+ fn bump(&mut self) -> Result<lexer::Token> {

+ self.tokens

+ .pop()

+ .ok_or_else(|| format_err!("unexpected EOF"))

+ }

+ fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> {

+ let token = self.bump()?;

+ if token.kind != kind {

+ bail!(token.loc, "unexpected token, expected `{}`", what);

+ }

+ Ok(())

+ }

+ fn is_eof(&self) -> bool {

+ self.tokens.is_empty()

+ }

+ fn finish(self) -> Result<Grammar> {

+ for node_data in &self.grammar.nodes {

+ if matches!(node_data.rule, DUMMY_RULE) {

+ crate::error::bail!("Undefined node: {}", node_data.name)

+ }

+ Ok(self.grammar)

+ }

+ fn intern_node(&mut self, name: String) -> Node {

+ let len = self.node_table.len();

+ let grammar = &mut self.grammar;

+ *self.node_table.entry(name.clone()).or_insert_with(|| {

+ grammar.nodes.push(NodeData {

+ name,

+ rule: DUMMY_RULE,

+ });

+ Node(len)

+ })

+ }

+ fn intern_token(&mut self, name: String) -> Token {

+ let len = self.token_table.len();

+ let grammar = &mut self.grammar;

+ *self.token_table.entry(name.clone()).or_insert_with(|| {

+ grammar.tokens.push(TokenData { name });

+ Token(len)

+ })

+ }

+fn node(p: &mut Parser) -> Result<()> {

+ let token = p.bump()?;

+ let node = match token.kind {

+ TokenKind::Node(it) => p.intern_node(it),

+ _ => bail!(token.loc, "expected ident"),

+ };

+ p.expect(TokenKind::Eq, "=")?;

+ if !matches!(p.grammar[node].rule, DUMMY_RULE) {

+ bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name)

+ }

+ let rule = rule(p)?;

+ p.grammar.nodes[node.0].rule = rule;

+ Ok(())

+fn rule(p: &mut Parser) -> Result<Rule> {

+ if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() {

+ bail!(

+ *loc,

+ "The first element in a sequence of productions or alternatives \

+ must not have a leading pipe (`|`)"

+ );

+ }

+ let lhs = seq_rule(p)?;

+ let mut alt = vec![lhs];

+ while let Some(token) = p.peek() {

+ if token.kind != TokenKind::Pipe {

+ break;

+ }

+ p.bump()?;

+ let rule = seq_rule(p)?;

+ alt.push(rule)

+ }

+ let res = if alt.len() == 1 {

+ alt.pop().unwrap()

+ } else {

+ Rule::Alt(alt)

+ };

+ Ok(res)

+fn seq_rule(p: &mut Parser) -> Result<Rule> {

+ let lhs = atom_rule(p)?;

+ let mut seq = vec![lhs];

+ while let Some(rule) = opt_atom_rule(p)? {

+ seq.push(rule)

+ }

+ let res = if seq.len() == 1 {

+ seq.pop().unwrap()

+ } else {

+ Rule::Seq(seq)

+ };

+ Ok(res)

+fn atom_rule(p: &mut Parser) -> Result<Rule> {

+ match opt_atom_rule(p)? {

+ Some(it) => Ok(it),

+ None => {

+ let token = p.bump()?;

+ bail!(token.loc, "unexpected token")

+ }

+fn opt_atom_rule(p: &mut Parser) -> Result<Option<Rule>> {

+ let token = match p.peek() {

+ Some(it) => it,

+ None => return Ok(None),

+ };

+ let mut res = match &token.kind {

+ TokenKind::Node(name) => {

+ if let Some(lookahead) = p.peek_n(1) {

+ match lookahead.kind {

+ TokenKind::Eq => return Ok(None),

+ TokenKind::Colon => {

+ let label = name.clone();

+ p.bump()?;

+ let rule = atom_rule(p)?;

+ let res = Rule::Labeled {

+ label,

+ rule: Box::new(rule),

+ };

+ return Ok(Some(res));

+ }

+ _ => (),

+ }

+ match p.peek_n(1) {

+ Some(token) if token.kind == TokenKind::Eq => return Ok(None),

+ _ => (),

+ }

+ let name = name.clone();

+ p.bump()?;

+ let node = p.intern_node(name);

+ Rule::Node(node)

+ }

+ TokenKind::Token(name) => {

+ let name = name.clone();

+ p.bump()?;

+ let token = p.intern_token(name);

+ Rule::Token(token)

+ }

+ TokenKind::LParen => {

+ p.bump()?;

+ let rule = rule(p)?;

+ p.expect(TokenKind::RParen, ")")?;

+ rule

+ }

+ _ => return Ok(None),

+ };

+ if let Some(token) = p.peek() {

+ match &token.kind {

+ TokenKind::QMark => {

+ p.bump()?;

+ res = Rule::Opt(Box::new(res));

+ }

+ TokenKind::Star => {

+ p.bump()?;

+ res = Rule::Rep(Box::new(res));

+ }

+ _ => (),

+ }

+ Ok(Some(res))

diff --git a/lib/ungrammar/ungrammar.ungram b/lib/ungrammar/ungrammar.ungram
new file mode 100644
index 0000000000..856a6cede0
--- /dev/null
+++ b/lib/ungrammar/ungrammar.ungram

@@ -0,0 +1,16 @@

+/// ungrammar for ungrammar

+Grammar =

+ Node *

+Node =

+ name:'ident' '=' Rule

+Rule =

+ 'ident'

+| 'token_ident'

+| Rule *

+| Rule ( '|' Rule) *

+| Rule '?'

+| Rule '*'

+| '(' Rule ')'

+| label:'ident' ':' Rule

diff --git a/lib/ungrammar/ungrammar2json/Cargo.toml b/lib/ungrammar/ungrammar2json/Cargo.toml
new file mode 100644
index 0000000000..19ca3d8324
--- /dev/null
+++ b/lib/ungrammar/ungrammar2json/Cargo.toml

@@ -0,0 +1,12 @@

+[package]

+name = "ungrammar2json"

+description = "Convert ungrammar files to JSON"

+version = "1.0.0"

+license = "MIT OR Apache-2.0"

+repository = "https://github.com/matklad/ungrammar"

+authors = ["Aleksey Kladov <[email protected]>"]

+edition = "2018"

+[dependencies]

+write-json = "0.1.1"

+ungrammar = { path = "../", version = "1.1.0" }

diff --git a/lib/ungrammar/ungrammar2json/src/main.rs b/lib/ungrammar/ungrammar2json/src/main.rs
new file mode 100644
index 0000000000..f588ed5eb6
--- /dev/null
+++ b/lib/ungrammar/ungrammar2json/src/main.rs

@@ -0,0 +1,77 @@

+use std::{

+ env,

+ io::{self, Read},

+ process,

+};

+use ungrammar::{Grammar, Rule};

+fn main() {

+ if let Err(err) = try_main() {

+ eprintln!("{}", err);

+ process::exit(101);

+ }

+fn try_main() -> io::Result<()> {

+ if env::args().count() != 1 {

+ eprintln!("Usage: ungrammar2json < grammar.ungram > grammar.json");

+ return Ok(());

+ }

+ let grammar = read_stdin()?;

+ let grammar = grammar

+ .parse::<Grammar>()

+ .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;

+ let mut buf = String::new();

+ grammar_to_json(&grammar, write_json::object(&mut buf));

+ println!("{}", buf);

+ Ok(())

+fn read_stdin() -> io::Result<String> {

+ let mut buf = String::new();

+ io::stdin().lock().read_to_string(&mut buf)?;

+ Ok(buf)

+fn grammar_to_json(grammar: &Grammar, mut obj: write_json::Object<'_>) {

+ for node in grammar.iter() {

+ let node = &grammar[node];

+ rule_to_json(grammar, &node.rule, obj.object(&node.name));

+ }

+fn rule_to_json(grammar: &Grammar, rule: &Rule, mut obj: write_json::Object) {

+ match rule {

+ Rule::Labeled { label, rule } => {

+ obj.string("label", label);

+ rule_to_json(grammar, rule, obj.object("rule"))

+ }

+ Rule::Node(node) => {

+ obj.string("node", &grammar[*node].name);

+ }

+ Rule::Token(token) => {

+ obj.string("token", &grammar[*token].name);

+ }

+ Rule::Seq(rules) | Rule::Alt(rules) => {

+ let tag = match rule {

+ Rule::Seq(_) => "seq",

+ Rule::Alt(_) => "alt",

+ _ => unreachable!(),

+ };

+ let mut array = obj.array(tag);

+ for rule in rules {

+ rule_to_json(grammar, rule, array.object());

+ }

+ Rule::Opt(arg) | Rule::Rep(arg) => {

+ let tag = match rule {

+ Rule::Opt(_) => "opt",

+ Rule::Rep(_) => "rep",

+ _ => unreachable!(),

+ };

+ rule_to_json(grammar, arg, obj.object(tag));

+ }