rust-analyzer

Unnamed repository; edit this file 'description' to name the repository.

master 2Branches 0Tags

Clone

HTTPS

SSH

Open with VS Code

Diffstat (limited to 'crates/parser/src/lexed_str.rs')

-rw-r--r--

crates/parser/src/lexed_str.rs

137

1 files changed, 64 insertions, 73 deletions

diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 0fa9a26454..8fff1c3db7 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs

@@ -11,7 +11,8 @@

use std::ops;

use rustc_literal_escaper::{

- EscapeError, Mode, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,

+ EscapeError, Mode, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,

+ unescape_str,

};

use crate::{

@@ -43,7 +44,9 @@ impl<'a> LexedStr<'a> {

// Re-create the tokenizer from scratch every token because `GuardedStrPrefix` is one token in the lexer

// but we want to split it to two in edition <2024.

- while let Some(token) = rustc_lexer::tokenize(&text[conv.offset..]).next() {

+ while let Some(token) =

+ rustc_lexer::tokenize(&text[conv.offset..], rustc_lexer::FrontmatterAllowed::No).next()

+ {

let token_text = &text[conv.offset..][..token.len as usize];

conv.extend_token(&token.kind, token_text);

@@ -57,7 +60,7 @@ impl<'a> LexedStr<'a> {

return None;

}

- let token = rustc_lexer::tokenize(text).next()?;

+ let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next()?;

if token.len as usize != text.len() {

return None;

}

@@ -151,14 +154,14 @@ impl<'a> Converter<'a> {

self.res

}

- fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {

+ fn push(&mut self, kind: SyntaxKind, len: usize, errors: Vec<String>) {

self.res.push(kind, self.offset);

self.offset += len;

- if let Some(err) = err {

- let token = self.res.len() as u32;

- let msg = err.to_owned();

- self.res.error.push(LexError { msg, token });

+ for msg in errors {

+ if !msg.is_empty() {

+ self.res.error.push(LexError { msg, token: self.res.len() as u32 });

+ }

}

@@ -167,14 +170,16 @@ impl<'a> Converter<'a> {

// We drop some useful information here (see patterns with double dots `..`)

// Storing that info in `SyntaxKind` is not possible due to its layout requirements of

// being `u16` that come from `rowan::SyntaxKind`.

- let mut err = "";

+ let mut errors: Vec<String> = vec![];

let syntax_kind = {

match kind {

rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT,

rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated } => {

if !terminated {

- err = "Missing trailing `*/` symbols to terminate the block comment";

+ errors.push(

+ "Missing trailing `*/` symbols to terminate the block comment".into(),

+ );

}

COMMENT

}

@@ -184,9 +189,9 @@ impl<'a> Converter<'a> {

invalid_infostring,

} => {

if *has_invalid_preceding_whitespace {

- err = "invalid preceding whitespace for frontmatter opening"

+ errors.push("invalid preceding whitespace for frontmatter opening".into());

} else if *invalid_infostring {

- err = "invalid infostring for frontmatter"

+ errors.push("invalid infostring for frontmatter".into());

}

FRONTMATTER

}

@@ -198,7 +203,7 @@ impl<'a> Converter<'a> {

SyntaxKind::from_keyword(token_text, self.edition).unwrap_or(IDENT)

}

rustc_lexer::TokenKind::InvalidIdent => {

- err = "Ident contains invalid characters";

+ errors.push("Ident contains invalid characters".into());

IDENT

}

@@ -206,7 +211,7 @@ impl<'a> Converter<'a> {

rustc_lexer::TokenKind::GuardedStrPrefix if self.edition.at_least_2024() => {

// FIXME: rustc does something better for recovery.

- err = "Invalid string literal (reserved syntax)";

+ errors.push("Invalid string literal (reserved syntax)".into());

ERROR

}

rustc_lexer::TokenKind::GuardedStrPrefix => {

@@ -222,12 +227,12 @@ impl<'a> Converter<'a> {

rustc_lexer::TokenKind::Lifetime { starts_with_number } => {

if *starts_with_number {

- err = "Lifetime name cannot start with a number";

+ errors.push("Lifetime name cannot start with a number".into());

}

LIFETIME_IDENT

}

rustc_lexer::TokenKind::UnknownPrefixLifetime => {

- err = "Unknown lifetime prefix";

+ errors.push("Unknown lifetime prefix".into());

LIFETIME_IDENT

}

rustc_lexer::TokenKind::RawLifetime => LIFETIME_IDENT,

@@ -262,119 +267,128 @@ impl<'a> Converter<'a> {

rustc_lexer::TokenKind::Unknown => ERROR,

rustc_lexer::TokenKind::UnknownPrefix if token_text == "builtin" => IDENT,

rustc_lexer::TokenKind::UnknownPrefix => {

- err = "unknown literal prefix";

+ errors.push("unknown literal prefix".into());

IDENT

}

rustc_lexer::TokenKind::Eof => EOF,

}

};

- let err = if err.is_empty() { None } else { Some(err) };

- self.push(syntax_kind, token_text.len(), err);

+ self.push(syntax_kind, token_text.len(), errors);

}

fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {

- let mut err = "";

+ let invalid_raw_msg = String::from("Invalid raw string literal");

+ let mut errors = vec![];

+ let mut no_end_quote = |c: char, kind: &str| {

+ errors.push(format!("Missing trailing `{c}` symbol to terminate the {kind} literal"));

+ };

let syntax_kind = match *kind {

rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {

if empty_int {

- err = "Missing digits after the integer base prefix";

+ errors.push("Missing digits after the integer base prefix".into());

}

INT_NUMBER

}

rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {

if empty_exponent {

- err = "Missing digits after the exponent symbol";

+ errors.push("Missing digits after the exponent symbol".into());

}

FLOAT_NUMBER

}

rustc_lexer::LiteralKind::Char { terminated } => {

if !terminated {

- err = "Missing trailing `'` symbol to terminate the character literal";

+ no_end_quote('\'', "character");

} else {

let text = &self.res.text[self.offset + 1..][..len - 1];

- let i = text.rfind('\'').unwrap();

- let text = &text[..i];

+ let text = &text[..text.rfind('\'').unwrap()];

if let Err(e) = unescape_char(text) {

- err = error_to_diagnostic_message(e, Mode::Char);

+ errors.push(err_to_msg(e, Mode::Char));

}

CHAR

}

rustc_lexer::LiteralKind::Byte { terminated } => {

if !terminated {

- err = "Missing trailing `'` symbol to terminate the byte literal";

+ no_end_quote('\'', "byte");

} else {

let text = &self.res.text[self.offset + 2..][..len - 2];

- let i = text.rfind('\'').unwrap();

- let text = &text[..i];

+ let text = &text[..text.rfind('\'').unwrap()];

if let Err(e) = unescape_byte(text) {

- err = error_to_diagnostic_message(e, Mode::Byte);

+ errors.push(err_to_msg(e, Mode::Byte));

}

BYTE

}

rustc_lexer::LiteralKind::Str { terminated } => {

if !terminated {

- err = "Missing trailing `\"` symbol to terminate the string literal";

+ no_end_quote('"', "string");

} else {

let text = &self.res.text[self.offset + 1..][..len - 1];

- let i = text.rfind('"').unwrap();

- let text = &text[..i];

- err = unescape_string_error_message(text, Mode::Str);

+ let text = &text[..text.rfind('"').unwrap()];

+ unescape_str(text, |_, res| {

+ if let Err(e) = res {

+ errors.push(err_to_msg(e, Mode::Str));

+ }

+ });

}

STRING

}

rustc_lexer::LiteralKind::ByteStr { terminated } => {

if !terminated {

- err = "Missing trailing `\"` symbol to terminate the byte string literal";

+ no_end_quote('"', "byte string");

} else {

let text = &self.res.text[self.offset + 2..][..len - 2];

- let i = text.rfind('"').unwrap();

- let text = &text[..i];

- err = unescape_string_error_message(text, Mode::ByteStr);

+ let text = &text[..text.rfind('"').unwrap()];

+ unescape_byte_str(text, |_, res| {

+ if let Err(e) = res {

+ errors.push(err_to_msg(e, Mode::ByteStr));

+ }

+ });

}

BYTE_STRING

}

rustc_lexer::LiteralKind::CStr { terminated } => {

if !terminated {

- err = "Missing trailing `\"` symbol to terminate the string literal";

+ no_end_quote('"', "C string")

} else {

let text = &self.res.text[self.offset + 2..][..len - 2];

- let i = text.rfind('"').unwrap();

- let text = &text[..i];

- err = unescape_string_error_message(text, Mode::CStr);

+ let text = &text[..text.rfind('"').unwrap()];

+ unescape_c_str(text, |_, res| {

+ if let Err(e) = res {

+ errors.push(err_to_msg(e, Mode::CStr));

+ }

+ });

}

C_STRING

}

rustc_lexer::LiteralKind::RawStr { n_hashes } => {

if n_hashes.is_none() {

- err = "Invalid raw string literal";

+ errors.push(invalid_raw_msg);

}

STRING

}

rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {

if n_hashes.is_none() {

- err = "Invalid raw string literal";

+ errors.push(invalid_raw_msg);

}

BYTE_STRING

}

rustc_lexer::LiteralKind::RawCStr { n_hashes } => {

if n_hashes.is_none() {

- err = "Invalid raw string literal";

+ errors.push(invalid_raw_msg);

}

C_STRING

}

};

- let err = if err.is_empty() { None } else { Some(err) };

- self.push(syntax_kind, len, err);

+ self.push(syntax_kind, len, errors);

}

-fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {

+fn err_to_msg(error: EscapeError, mode: Mode) -> String {

match error {

EscapeError::ZeroChars => "empty character literal",

EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",

@@ -410,28 +424,5 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {

EscapeError::UnskippedWhitespaceWarning => "",

EscapeError::MultipleSkippedLinesWarning => "",

}

-fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {

- let mut error_message = "";

- match mode {

- Mode::CStr => {

- unescape_mixed(text, mode, &mut |_, res| {

- if let Err(e) = res {

- error_message = error_to_diagnostic_message(e, mode);

- }

- });

- }

- Mode::ByteStr | Mode::Str => {

- unescape_unicode(text, mode, &mut |_, res| {

- if let Err(e) = res {

- error_message = error_to_diagnostic_message(e, mode);

- }

- });

- }

- _ => {

- // Other Modes are not supported yet or do not apply

- }

- error_message

+ .into()

}