Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/snippets/parser.rs')
| -rw-r--r-- | helix-core/src/snippets/parser.rs | 935 |
1 files changed, 0 insertions, 935 deletions
diff --git a/helix-core/src/snippets/parser.rs b/helix-core/src/snippets/parser.rs deleted file mode 100644 index 74e940da..00000000 --- a/helix-core/src/snippets/parser.rs +++ /dev/null @@ -1,935 +0,0 @@ -/*! -A parser for LSP/VSCode style snippet syntax -See <https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#snippet_syntax>. - -``` text -any ::= tabstop | placeholder | choice | variable | text -tabstop ::= '$' int | '${' int '}' -placeholder ::= '${' int ':' any '}' -choice ::= '${' int '|' text (',' text)* '|}' -variable ::= '$' var | '${' var }' - | '${' var ':' any '}' - | '${' var '/' regex '/' (format | text)+ '/' options '}' -format ::= '$' int | '${' int '}' - | '${' int ':' '/upcase' | '/downcase' | '/capitalize' '}' - | '${' int ':+' if '}' - | '${' int ':?' if ':' else '}' - | '${' int ':-' else '}' | '${' int ':' else '}' -regex ::= Regular Expression value (ctor-string) -options ::= Regular Expression option (ctor-options) -var ::= [_a-zA-Z] [_a-zA-Z0-9]* -int ::= [0-9]+ -text ::= .* -if ::= text -else ::= text -``` -*/ - -use crate::Tendril; -use helix_parsec::*; - -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum CaseChange { - Upcase, - Downcase, - Capitalize, - PascalCase, - CamelCase, -} - -#[derive(Debug, PartialEq, Eq)] -pub enum FormatItem { - Text(Tendril), - Capture(usize), - CaseChange(usize, CaseChange), - Conditional(usize, Tendril, Tendril), -} - -#[derive(Debug, PartialEq, Eq)] -pub struct Transform { - pub regex: Tendril, - pub replacement: Vec<FormatItem>, - pub options: Tendril, -} - -#[derive(Debug, PartialEq, Eq)] -pub enum SnippetElement { - Tabstop { - tabstop: usize, - transform: Option<Transform>, - }, - Placeholder { - tabstop: usize, - value: Vec<SnippetElement>, - }, - Choice { - tabstop: usize, - choices: Vec<Tendril>, - }, - Variable { - name: Tendril, - default: Option<Vec<SnippetElement>>, - transform: Option<Transform>, - }, - Text(Tendril), -} - -pub fn parse(s: &str) -> Result<Vec<SnippetElement>, &str> { - snippet().parse(s).and_then(|(remainder, snippet)| { - if remainder.is_empty() { - Ok(snippet) - } else { - Err(remainder) - } - }) -} - -fn var<'a>() -> impl Parser<'a, Output = &'a str> { - // var = [_a-zA-Z][_a-zA-Z0-9]* - move |input: &'a str| { - input - .char_indices() - .take_while(|(p, c)| { - *c == '_' - || if *p == 0 { - c.is_ascii_alphabetic() - } else { - c.is_ascii_alphanumeric() - } - }) - .last() - .map(|(index, c)| { - let index = index + c.len_utf8(); - (&input[index..], &input[0..index]) - }) - .ok_or(input) - } -} - -const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$']; -const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '|', ',']; - -fn text<'a>( - escape_chars: &'static [char], - term_chars: &'static [char], -) -> impl Parser<'a, Output = Tendril> { - move |input: &'a str| { - let mut chars = input.char_indices().peekable(); - let mut res = Tendril::new(); - while let Some((i, c)) = chars.next() { - match c { - '\\' => { - if let Some(&(_, c)) = chars.peek() { - if escape_chars.contains(&c) { - chars.next(); - res.push(c); - continue; - } - } - res.push('\\'); - } - c if term_chars.contains(&c) => return Ok((&input[i..], res)), - c => res.push(c), - } - } - - Ok(("", res)) - } -} - -fn digit<'a>() -> impl Parser<'a, Output = usize> { - filter_map(take_while(|c| c.is_ascii_digit()), |s| s.parse().ok()) -} - -fn case_change<'a>() -> impl Parser<'a, Output = CaseChange> { - use CaseChange::*; - - choice!( - map("upcase", |_| Upcase), - map("downcase", |_| Downcase), - map("capitalize", |_| Capitalize), - map("pascalcase", |_| PascalCase), - map("camelcase", |_| CamelCase), - ) -} - -fn format<'a>() -> impl Parser<'a, Output = FormatItem> { - use FormatItem::*; - - choice!( - // '$' int - map(right("$", digit()), Capture), - // '${' int '}' - map(seq!("${", digit(), "}"), |seq| Capture(seq.1)), - // '${' int ':' '/upcase' | '/downcase' | '/capitalize' '}' - map(seq!("${", digit(), ":/", case_change(), "}"), |seq| { - CaseChange(seq.1, seq.3) - }), - // '${' int ':+' if '}' - map( - seq!("${", digit(), ":+", text(TEXT_ESCAPE_CHARS, &['}']), "}"), - |seq| { Conditional(seq.1, seq.3, Tendril::new()) } - ), - // '${' int ':?' if ':' else '}' - map( - seq!( - "${", - digit(), - ":?", - text(TEXT_ESCAPE_CHARS, &[':']), - ":", - text(TEXT_ESCAPE_CHARS, &['}']), - "}" - ), - |seq| { Conditional(seq.1, seq.3, seq.5) } - ), - // '${' int ':-' else '}' | '${' int ':' else '}' - map( - seq!( - "${", - digit(), - ":", - optional("-"), - text(TEXT_ESCAPE_CHARS, &['}']), - "}" - ), - |seq| { Conditional(seq.1, Tendril::new(), seq.4) } - ), - ) -} - -fn regex<'a>() -> impl Parser<'a, Output = Transform> { - map( - seq!( - "/", - // TODO parse as ECMAScript and convert to rust regex - text(&['/'], &['/']), - "/", - zero_or_more(choice!( - format(), - // text doesn't parse $, if format fails we just accept the $ as text - map("$", |_| FormatItem::Text("$".into())), - map(text(&['\\', '/'], &['/', '$']), FormatItem::Text), - )), - "/", - // vscode really doesn't allow escaping } here - // so it's impossible to write a regex escape containing a } - // we can consider deviating here and allowing the escape - text(&[], &['}']), - ), - |(_, value, _, replacement, _, options)| Transform { - regex: value, - replacement, - options, - }, - ) -} - -fn tabstop<'a>() -> impl Parser<'a, Output = SnippetElement> { - map( - or( - map(right("$", digit()), |i| (i, None)), - map( - seq!("${", digit(), optional(regex()), "}"), - |(_, i, transform, _)| (i, transform), - ), - ), - |(tabstop, transform)| SnippetElement::Tabstop { tabstop, transform }, - ) -} - -fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement> { - map( - seq!( - "${", - digit(), - ":", - // according to the grammar there is just a single anything here. - // However in the prose it is explained that placeholders can be nested. - // The example there contains both a placeholder text and a nested placeholder - // which indicates a list. Looking at the VSCode sourcecode, the placeholder - // is indeed parsed as zero_or_more so the grammar is simply incorrect here - zero_or_more(anything(TEXT_ESCAPE_CHARS, true)), - "}" - ), - |seq| SnippetElement::Placeholder { - tabstop: seq.1, - value: seq.3, - }, - ) -} - -fn choice<'a>() -> impl Parser<'a, Output = SnippetElement> { - map( - seq!( - "${", - digit(), - "|", - sep(text(CHOICE_TEXT_ESCAPE_CHARS, &['|', ',']), ","), - "|}", - ), - |seq| SnippetElement::Choice { - tabstop: seq.1, - choices: seq.3, - }, - ) -} - -fn variable<'a>() -> impl Parser<'a, Output = SnippetElement> { - choice!( - // $var - map(right("$", var()), |name| SnippetElement::Variable { - name: name.into(), - default: None, - transform: None, - }), - // ${var} - map(seq!("${", var(), "}",), |values| SnippetElement::Variable { - name: values.1.into(), - default: None, - transform: None, - }), - // ${var:default} - map( - seq!( - "${", - var(), - ":", - zero_or_more(anything(TEXT_ESCAPE_CHARS, true)), - "}", - ), - |values| SnippetElement::Variable { - name: values.1.into(), - default: Some(values.3), - transform: None, - } - ), - // ${var/value/format/options} - map(seq!("${", var(), regex(), "}"), |values| { - SnippetElement::Variable { - name: values.1.into(), - default: None, - transform: Some(values.2), - } - }), - ) -} - -fn anything<'a>( - escape_chars: &'static [char], - end_at_brace: bool, -) -> impl Parser<'a, Output = SnippetElement> { - let term_chars: &[_] = if end_at_brace { &['$', '}'] } else { &['$'] }; - move |input: &'a str| { - let parser = choice!( - tabstop(), - placeholder(), - choice(), - variable(), - map("$", |_| SnippetElement::Text("$".into())), - map(text(escape_chars, term_chars), SnippetElement::Text), - ); - parser.parse(input) - } -} - -fn snippet<'a>() -> impl Parser<'a, Output = Vec<SnippetElement>> { - one_or_more(anything(TEXT_ESCAPE_CHARS, false)) -} - -#[cfg(test)] -mod test { - use crate::snippets::{Snippet, SnippetRenderCtx}; - - use super::SnippetElement::*; - use super::*; - - #[test] - fn empty_string_is_error() { - assert_eq!(Err(""), parse("")); - } - - #[test] - fn parse_placeholders_in_function_call() { - assert_eq!( - Ok(vec![ - Text("match(".into()), - Placeholder { - tabstop: 1, - value: vec![Text("Arg1".into())], - }, - Text(")".into()), - ]), - parse("match(${1:Arg1})") - ); - // The `$0` tabstop should not have placeholder text. The parser should handle this case - // normally and then the placeholder text should be discarded during elaboration. - assert_eq!( - Ok(vec![ - Text("sizeof(".into()), - Placeholder { - tabstop: 0, - value: vec![Text("expression-or-type".into())], - }, - Text(")".into()), - ]), - parse("sizeof(${0:expression-or-type})") - ); - } - - #[test] - fn unterminated_placeholder() { - assert_eq!( - Ok(vec![ - Text("match(".into()), - Text("$".into()), - Text("{1:)".into()) - ]), - parse("match(${1:)") - ) - } - - #[test] - fn parse_empty_placeholder() { - assert_eq!( - Ok(vec![ - Text("match(".into()), - Placeholder { - tabstop: 1, - value: vec![], - }, - Text(")".into()), - ]), - parse("match(${1:})") - ) - } - - #[test] - fn parse_placeholders_in_statement() { - assert_eq!( - Ok(vec![ - Text("local ".into()), - Placeholder { - tabstop: 1, - value: vec![Text("var".into())], - }, - Text(" = ".into()), - Placeholder { - tabstop: 1, - value: vec![Text("value".into())], - }, - ]), - parse("local ${1:var} = ${1:value}") - ) - } - - #[test] - fn parse_tabstop_nested_in_placeholder() { - assert_eq!( - Ok(vec![Placeholder { - tabstop: 1, - value: vec![ - Text("var, ".into()), - Tabstop { - tabstop: 2, - transform: None - } - ], - }]), - parse("${1:var, $2}") - ) - } - - #[test] - fn parse_placeholder_nested_in_placeholder() { - assert_eq!( - Ok({ - vec![Placeholder { - tabstop: 1, - value: vec![ - Text("foo ".into()), - Placeholder { - tabstop: 2, - value: vec![Text("bar".into())], - }, - ], - }] - }), - parse("${1:foo ${2:bar}}") - ) - } - - #[test] - fn parse_all() { - assert_eq!( - Ok(vec![ - Text("hello ".into()), - Tabstop { - tabstop: 1, - transform: None - }, - Tabstop { - tabstop: 2, - transform: None - }, - Text(" ".into()), - Choice { - tabstop: 1, - choices: vec!["one".into(), "two".into(), "three".into()], - }, - Text(" ".into()), - Variable { - name: "name".into(), - default: Some(vec![Text("foo".into())]), - transform: None, - }, - Text(" ".into()), - Variable { - name: "var".into(), - default: None, - transform: None, - }, - Text(" ".into()), - Variable { - name: "TM".into(), - default: None, - transform: None, - }, - ]), - parse("hello $1${2} ${1|one,two,three|} ${name:foo} $var $TM") - ); - } - - #[test] - fn regex_capture_replace() { - assert_eq!( - Ok({ - vec![Variable { - name: "TM_FILENAME".into(), - default: None, - transform: Some(Transform { - regex: "(.*).+$".into(), - replacement: vec![FormatItem::Capture(1), FormatItem::Text("$".into())], - options: Tendril::new(), - }), - }] - }), - parse("${TM_FILENAME/(.*).+$/$1$/}") - ); - } - - #[test] - fn rust_macro() { - assert_eq!( - Ok({ - vec![ - Text("macro_rules! ".into()), - Tabstop { - tabstop: 1, - transform: None, - }, - Text(" {\n (".into()), - Tabstop { - tabstop: 2, - transform: None, - }, - Text(") => {\n ".into()), - Tabstop { - tabstop: 0, - transform: None, - }, - Text("\n };\n}".into()), - ] - }), - parse("macro_rules! $1 {\n ($2) => {\n $0\n };\n}") - ); - } - - fn assert_text(snippet: &str, parsed_text: &str) { - let snippet = Snippet::parse(snippet).unwrap(); - let mut rendered_snippet = snippet.prepare_render(); - let rendered_text = snippet - .render_at( - &mut rendered_snippet, - "".into(), - false, - &mut SnippetRenderCtx::test_ctx(), - 0, - ) - .0; - assert_eq!(rendered_text, parsed_text) - } - - #[test] - fn robust_parsing() { - assert_text("$", "$"); - assert_text("\\\\$", "\\$"); - assert_text("{", "{"); - assert_text("\\}", "}"); - assert_text("\\abc", "\\abc"); - assert_text("foo${f:\\}}bar", "foo}bar"); - assert_text("\\{", "\\{"); - assert_text("I need \\\\\\$", "I need \\$"); - assert_text("\\", "\\"); - assert_text("\\{{", "\\{{"); - assert_text("{{", "{{"); - assert_text("{{dd", "{{dd"); - assert_text("}}", "}}"); - assert_text("ff}}", "ff}}"); - assert_text("farboo", "farboo"); - assert_text("far{{}}boo", "far{{}}boo"); - assert_text("far{{123}}boo", "far{{123}}boo"); - assert_text("far\\{{123}}boo", "far\\{{123}}boo"); - assert_text("far{{id:bern}}boo", "far{{id:bern}}boo"); - assert_text("far{{id:bern {{basel}}}}boo", "far{{id:bern {{basel}}}}boo"); - assert_text( - "far{{id:bern {{id:basel}}}}boo", - "far{{id:bern {{id:basel}}}}boo", - ); - assert_text( - "far{{id:bern {{id2:basel}}}}boo", - "far{{id:bern {{id2:basel}}}}boo", - ); - assert_text("${}$\\a\\$\\}\\\\", "${}$\\a$}\\"); - assert_text("farboo", "farboo"); - assert_text("far{{}}boo", "far{{}}boo"); - assert_text("far{{123}}boo", "far{{123}}boo"); - assert_text("far\\{{123}}boo", "far\\{{123}}boo"); - assert_text("far`123`boo", "far`123`boo"); - assert_text("far\\`123\\`boo", "far\\`123\\`boo"); - assert_text("\\$far-boo", "$far-boo"); - } - - fn assert_snippet(snippet: &str, expect: &[SnippetElement]) { - let elements = parse(snippet).unwrap(); - assert_eq!(elements, expect.to_owned()) - } - - #[test] - fn parse_variable() { - use SnippetElement::*; - assert_snippet( - "$far-boo", - &[ - Variable { - name: "far".into(), - default: None, - transform: None, - }, - Text("-boo".into()), - ], - ); - assert_snippet( - "far$farboo", - &[ - Text("far".into()), - Variable { - name: "farboo".into(), - transform: None, - default: None, - }, - ], - ); - assert_snippet( - "far${farboo}", - &[ - Text("far".into()), - Variable { - name: "farboo".into(), - transform: None, - default: None, - }, - ], - ); - assert_snippet( - "$123", - &[Tabstop { - tabstop: 123, - transform: None, - }], - ); - assert_snippet( - "$farboo", - &[Variable { - name: "farboo".into(), - transform: None, - default: None, - }], - ); - assert_snippet( - "$far12boo", - &[Variable { - name: "far12boo".into(), - transform: None, - default: None, - }], - ); - assert_snippet( - "000_${far}_000", - &[ - Text("000_".into()), - Variable { - name: "far".into(), - transform: None, - default: None, - }, - Text("_000".into()), - ], - ); - } - - #[test] - fn parse_variable_transform() { - assert_snippet( - "${foo///}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: Tendril::new(), - replacement: Vec::new(), - options: Tendril::new(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/regex/format/gmi}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: "regex".into(), - replacement: vec![FormatItem::Text("format".into())], - options: "gmi".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/([A-Z][a-z])/format/}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: "([A-Z][a-z])".into(), - replacement: vec![FormatItem::Text("format".into())], - options: Tendril::new(), - }), - default: None, - }], - ); - - // invalid regex TODO: reneable tests once we actually parse this regex flavor - // assert_text( - // "${foo/([A-Z][a-z])/format/GMI}", - // "${foo/([A-Z][a-z])/format/GMI}", - // ); - // assert_text( - // "${foo/([A-Z][a-z])/format/funky}", - // "${foo/([A-Z][a-z])/format/funky}", - // ); - // assert_text("${foo/([A-Z][a-z]/format/}", "${foo/([A-Z][a-z]/format/}"); - assert_text( - "${foo/regex\\/format/options}", - "${foo/regex\\/format/options}", - ); - - // tricky regex - assert_snippet( - "${foo/m\\/atch/$1/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: "m/atch".into(), - replacement: vec![FormatItem::Capture(1)], - options: "i".into(), - }), - default: None, - }], - ); - - // incomplete - assert_text("${foo///", "${foo///"); - assert_text("${foo/regex/format/options", "${foo/regex/format/options"); - - // format string - assert_snippet( - "${foo/.*/${0:fooo}/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![FormatItem::Conditional(0, Tendril::new(), "fooo".into())], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/.*/${1}/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![FormatItem::Capture(1)], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/.*/$1/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![FormatItem::Capture(1)], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/.*/This-$1-encloses/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![ - FormatItem::Text("This-".into()), - FormatItem::Capture(1), - FormatItem::Text("-encloses".into()), - ], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/.*/complex${1:else}/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![ - FormatItem::Text("complex".into()), - FormatItem::Conditional(1, Tendril::new(), "else".into()), - ], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/.*/complex${1:-else}/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![ - FormatItem::Text("complex".into()), - FormatItem::Conditional(1, Tendril::new(), "else".into()), - ], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/.*/complex${1:+if}/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![ - FormatItem::Text("complex".into()), - FormatItem::Conditional(1, "if".into(), Tendril::new()), - ], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/.*/complex${1:?if:else}/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![ - FormatItem::Text("complex".into()), - FormatItem::Conditional(1, "if".into(), "else".into()), - ], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${foo/.*/complex${1:/upcase}/i}", - &[Variable { - name: "foo".into(), - transform: Some(Transform { - regex: ".*".into(), - replacement: vec![ - FormatItem::Text("complex".into()), - FormatItem::CaseChange(1, CaseChange::Upcase), - ], - options: "i".into(), - }), - default: None, - }], - ); - assert_snippet( - "${TM_DIRECTORY/src\\//$1/}", - &[Variable { - name: "TM_DIRECTORY".into(), - transform: Some(Transform { - regex: "src/".into(), - replacement: vec![FormatItem::Capture(1)], - options: Tendril::new(), - }), - default: None, - }], - ); - assert_snippet( - "${TM_SELECTED_TEXT/a/\\/$1/g}", - &[Variable { - name: "TM_SELECTED_TEXT".into(), - transform: Some(Transform { - regex: "a".into(), - replacement: vec![FormatItem::Text("/".into()), FormatItem::Capture(1)], - options: "g".into(), - }), - default: None, - }], - ); - assert_snippet( - "${TM_SELECTED_TEXT/a/in\\/$1ner/g}", - &[Variable { - name: "TM_SELECTED_TEXT".into(), - transform: Some(Transform { - regex: "a".into(), - replacement: vec![ - FormatItem::Text("in/".into()), - FormatItem::Capture(1), - FormatItem::Text("ner".into()), - ], - options: "g".into(), - }), - default: None, - }], - ); - assert_snippet( - "${TM_SELECTED_TEXT/a/end\\//g}", - &[Variable { - name: "TM_SELECTED_TEXT".into(), - transform: Some(Transform { - regex: "a".into(), - replacement: vec![FormatItem::Text("end/".into())], - options: "g".into(), - }), - default: None, - }], - ); - } - // TODO port more tests from https://github.com/microsoft/vscode/blob/dce493cb6e36346ef2714e82c42ce14fc461b15c/src/vs/editor/contrib/snippet/test/browser/snippetParser.test.ts -} |