Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/parser/src/lexed_str.rs')
-rw-r--r--crates/parser/src/lexed_str.rs137
1 files changed, 64 insertions, 73 deletions
diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 0fa9a26454..8fff1c3db7 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -11,7 +11,8 @@
use std::ops;
use rustc_literal_escaper::{
- EscapeError, Mode, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
+ EscapeError, Mode, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
+ unescape_str,
};
use crate::{
@@ -43,7 +44,9 @@ impl<'a> LexedStr<'a> {
// Re-create the tokenizer from scratch every token because `GuardedStrPrefix` is one token in the lexer
// but we want to split it to two in edition <2024.
- while let Some(token) = rustc_lexer::tokenize(&text[conv.offset..]).next() {
+ while let Some(token) =
+ rustc_lexer::tokenize(&text[conv.offset..], rustc_lexer::FrontmatterAllowed::No).next()
+ {
let token_text = &text[conv.offset..][..token.len as usize];
conv.extend_token(&token.kind, token_text);
@@ -57,7 +60,7 @@ impl<'a> LexedStr<'a> {
return None;
}
- let token = rustc_lexer::tokenize(text).next()?;
+ let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next()?;
if token.len as usize != text.len() {
return None;
}
@@ -151,14 +154,14 @@ impl<'a> Converter<'a> {
self.res
}
- fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {
+ fn push(&mut self, kind: SyntaxKind, len: usize, errors: Vec<String>) {
self.res.push(kind, self.offset);
self.offset += len;
- if let Some(err) = err {
- let token = self.res.len() as u32;
- let msg = err.to_owned();
- self.res.error.push(LexError { msg, token });
+ for msg in errors {
+ if !msg.is_empty() {
+ self.res.error.push(LexError { msg, token: self.res.len() as u32 });
+ }
}
}
@@ -167,14 +170,16 @@ impl<'a> Converter<'a> {
// We drop some useful information here (see patterns with double dots `..`)
// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
// being `u16` that come from `rowan::SyntaxKind`.
- let mut err = "";
+ let mut errors: Vec<String> = vec![];
let syntax_kind = {
match kind {
rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT,
rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated } => {
if !terminated {
- err = "Missing trailing `*/` symbols to terminate the block comment";
+ errors.push(
+ "Missing trailing `*/` symbols to terminate the block comment".into(),
+ );
}
COMMENT
}
@@ -184,9 +189,9 @@ impl<'a> Converter<'a> {
invalid_infostring,
} => {
if *has_invalid_preceding_whitespace {
- err = "invalid preceding whitespace for frontmatter opening"
+ errors.push("invalid preceding whitespace for frontmatter opening".into());
} else if *invalid_infostring {
- err = "invalid infostring for frontmatter"
+ errors.push("invalid infostring for frontmatter".into());
}
FRONTMATTER
}
@@ -198,7 +203,7 @@ impl<'a> Converter<'a> {
SyntaxKind::from_keyword(token_text, self.edition).unwrap_or(IDENT)
}
rustc_lexer::TokenKind::InvalidIdent => {
- err = "Ident contains invalid characters";
+ errors.push("Ident contains invalid characters".into());
IDENT
}
@@ -206,7 +211,7 @@ impl<'a> Converter<'a> {
rustc_lexer::TokenKind::GuardedStrPrefix if self.edition.at_least_2024() => {
// FIXME: rustc does something better for recovery.
- err = "Invalid string literal (reserved syntax)";
+ errors.push("Invalid string literal (reserved syntax)".into());
ERROR
}
rustc_lexer::TokenKind::GuardedStrPrefix => {
@@ -222,12 +227,12 @@ impl<'a> Converter<'a> {
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
if *starts_with_number {
- err = "Lifetime name cannot start with a number";
+ errors.push("Lifetime name cannot start with a number".into());
}
LIFETIME_IDENT
}
rustc_lexer::TokenKind::UnknownPrefixLifetime => {
- err = "Unknown lifetime prefix";
+ errors.push("Unknown lifetime prefix".into());
LIFETIME_IDENT
}
rustc_lexer::TokenKind::RawLifetime => LIFETIME_IDENT,
@@ -262,119 +267,128 @@ impl<'a> Converter<'a> {
rustc_lexer::TokenKind::Unknown => ERROR,
rustc_lexer::TokenKind::UnknownPrefix if token_text == "builtin" => IDENT,
rustc_lexer::TokenKind::UnknownPrefix => {
- err = "unknown literal prefix";
+ errors.push("unknown literal prefix".into());
IDENT
}
rustc_lexer::TokenKind::Eof => EOF,
}
};
- let err = if err.is_empty() { None } else { Some(err) };
- self.push(syntax_kind, token_text.len(), err);
+ self.push(syntax_kind, token_text.len(), errors);
}
fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
- let mut err = "";
+ let invalid_raw_msg = String::from("Invalid raw string literal");
+
+ let mut errors = vec![];
+ let mut no_end_quote = |c: char, kind: &str| {
+ errors.push(format!("Missing trailing `{c}` symbol to terminate the {kind} literal"));
+ };
let syntax_kind = match *kind {
rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {
if empty_int {
- err = "Missing digits after the integer base prefix";
+ errors.push("Missing digits after the integer base prefix".into());
}
INT_NUMBER
}
rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {
if empty_exponent {
- err = "Missing digits after the exponent symbol";
+ errors.push("Missing digits after the exponent symbol".into());
}
FLOAT_NUMBER
}
rustc_lexer::LiteralKind::Char { terminated } => {
if !terminated {
- err = "Missing trailing `'` symbol to terminate the character literal";
+ no_end_quote('\'', "character");
} else {
let text = &self.res.text[self.offset + 1..][..len - 1];
- let i = text.rfind('\'').unwrap();
- let text = &text[..i];
+ let text = &text[..text.rfind('\'').unwrap()];
if let Err(e) = unescape_char(text) {
- err = error_to_diagnostic_message(e, Mode::Char);
+ errors.push(err_to_msg(e, Mode::Char));
}
}
CHAR
}
rustc_lexer::LiteralKind::Byte { terminated } => {
if !terminated {
- err = "Missing trailing `'` symbol to terminate the byte literal";
+ no_end_quote('\'', "byte");
} else {
let text = &self.res.text[self.offset + 2..][..len - 2];
- let i = text.rfind('\'').unwrap();
- let text = &text[..i];
+ let text = &text[..text.rfind('\'').unwrap()];
if let Err(e) = unescape_byte(text) {
- err = error_to_diagnostic_message(e, Mode::Byte);
+ errors.push(err_to_msg(e, Mode::Byte));
}
}
-
BYTE
}
rustc_lexer::LiteralKind::Str { terminated } => {
if !terminated {
- err = "Missing trailing `\"` symbol to terminate the string literal";
+ no_end_quote('"', "string");
} else {
let text = &self.res.text[self.offset + 1..][..len - 1];
- let i = text.rfind('"').unwrap();
- let text = &text[..i];
- err = unescape_string_error_message(text, Mode::Str);
+ let text = &text[..text.rfind('"').unwrap()];
+ unescape_str(text, |_, res| {
+ if let Err(e) = res {
+ errors.push(err_to_msg(e, Mode::Str));
+ }
+ });
}
STRING
}
rustc_lexer::LiteralKind::ByteStr { terminated } => {
if !terminated {
- err = "Missing trailing `\"` symbol to terminate the byte string literal";
+ no_end_quote('"', "byte string");
} else {
let text = &self.res.text[self.offset + 2..][..len - 2];
- let i = text.rfind('"').unwrap();
- let text = &text[..i];
- err = unescape_string_error_message(text, Mode::ByteStr);
+ let text = &text[..text.rfind('"').unwrap()];
+ unescape_byte_str(text, |_, res| {
+ if let Err(e) = res {
+ errors.push(err_to_msg(e, Mode::ByteStr));
+ }
+ });
}
BYTE_STRING
}
rustc_lexer::LiteralKind::CStr { terminated } => {
if !terminated {
- err = "Missing trailing `\"` symbol to terminate the string literal";
+ no_end_quote('"', "C string")
} else {
let text = &self.res.text[self.offset + 2..][..len - 2];
- let i = text.rfind('"').unwrap();
- let text = &text[..i];
- err = unescape_string_error_message(text, Mode::CStr);
+ let text = &text[..text.rfind('"').unwrap()];
+ unescape_c_str(text, |_, res| {
+ if let Err(e) = res {
+ errors.push(err_to_msg(e, Mode::CStr));
+ }
+ });
}
C_STRING
}
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
if n_hashes.is_none() {
- err = "Invalid raw string literal";
+ errors.push(invalid_raw_msg);
}
STRING
}
rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
if n_hashes.is_none() {
- err = "Invalid raw string literal";
+ errors.push(invalid_raw_msg);
}
BYTE_STRING
}
rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
if n_hashes.is_none() {
- err = "Invalid raw string literal";
+ errors.push(invalid_raw_msg);
}
C_STRING
}
};
- let err = if err.is_empty() { None } else { Some(err) };
- self.push(syntax_kind, len, err);
+ self.push(syntax_kind, len, errors);
}
}
-fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
+fn err_to_msg(error: EscapeError, mode: Mode) -> String {
match error {
EscapeError::ZeroChars => "empty character literal",
EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
@@ -410,28 +424,5 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
EscapeError::UnskippedWhitespaceWarning => "",
EscapeError::MultipleSkippedLinesWarning => "",
}
-}
-
-fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
- let mut error_message = "";
- match mode {
- Mode::CStr => {
- unescape_mixed(text, mode, &mut |_, res| {
- if let Err(e) = res {
- error_message = error_to_diagnostic_message(e, mode);
- }
- });
- }
- Mode::ByteStr | Mode::Str => {
- unescape_unicode(text, mode, &mut |_, res| {
- if let Err(e) = res {
- error_message = error_to_diagnostic_message(e, mode);
- }
- });
- }
- _ => {
- // Other Modes are not supported yet or do not apply
- }
- }
- error_message
+ .into()
}