Unnamed repository; edit this file 'description' to name the repository.
add diagnosis messages for chars and byte literal errors
Pol Valletbó 2023-10-11
parent 8a23314 · commit 1fe6ac8
-rw-r--r--crates/parser/src/lexed_str.rs55
-rw-r--r--crates/parser/test_data/lexer/err/byte_char_literals.rast92
-rw-r--r--crates/parser/test_data/lexer/err/byte_char_literals.rs47
-rw-r--r--crates/parser/test_data/lexer/err/char_literals.rast92
-rw-r--r--crates/parser/test_data/lexer/err/char_literals.rs47
-rw-r--r--crates/parser/test_data/lexer/ok/byte_strings.rast6
-rw-r--r--crates/parser/test_data/lexer/ok/byte_strings.rs6
-rw-r--r--crates/parser/test_data/lexer/ok/chars.rast2
-rw-r--r--crates/parser/test_data/lexer/ok/chars.rs2
9 files changed, 337 insertions, 12 deletions
diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 30c1c4f8c7..031ac27724 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -9,8 +9,11 @@
//! include info about comments and whitespace.
use rustc_dependencies::lexer as rustc_lexer;
+
use std::ops;
+use rustc_lexer::unescape::{Mode, EscapeError};
+
use crate::{
SyntaxKind::{self, *},
T,
@@ -254,13 +257,28 @@ impl<'a> Converter<'a> {
rustc_lexer::LiteralKind::Char { terminated } => {
if !terminated {
err = "Missing trailing `'` symbol to terminate the character literal";
+ } else {
+ let text = &self.res.text[self.offset + 1..][..len - 1];
+ let i = text.rfind('\'').unwrap();
+ let text = &text[..i];
+ if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
+ err = error_to_diagnostic_message(e, Mode::Char);
+ }
}
CHAR
}
rustc_lexer::LiteralKind::Byte { terminated } => {
if !terminated {
err = "Missing trailing `'` symbol to terminate the byte literal";
+ } else {
+ let text = &self.res.text[self.offset + 2..][..len - 2];
+ let i = text.rfind('\'').unwrap();
+ let text = &text[..i];
+ if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
+ err = error_to_diagnostic_message(e, Mode::Byte);
+ }
}
+
BYTE
}
rustc_lexer::LiteralKind::Str { terminated } => {
@@ -305,3 +323,40 @@ impl<'a> Converter<'a> {
self.push(syntax_kind, len, err);
}
}
+
+fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
+ match error {
+ EscapeError::ZeroChars => "empty character literal",
+ EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
+ EscapeError::LoneSlash => "",
+ EscapeError::InvalidEscape if mode == Mode::Byte || mode == Mode::ByteStr => {
+ "unknown byte escape"
+ }
+ EscapeError::InvalidEscape => "unknown character escape",
+ EscapeError::BareCarriageReturn => "",
+ EscapeError::BareCarriageReturnInRawString => "",
+ EscapeError::EscapeOnlyChar if mode == Mode::Byte => "byte constant must be escaped",
+ EscapeError::EscapeOnlyChar => "character constant must be escaped",
+ EscapeError::TooShortHexEscape => "numeric character escape is too short",
+ EscapeError::InvalidCharInHexEscape => "invalid character in numeric character escape",
+ EscapeError::OutOfRangeHexEscape => "out of range hex escape",
+ EscapeError::NoBraceInUnicodeEscape => "incorrect unicode escape sequence",
+ EscapeError::InvalidCharInUnicodeEscape => "invalid character in unicode escape",
+ EscapeError::EmptyUnicodeEscape => "empty unicode escape",
+ EscapeError::UnclosedUnicodeEscape => "unterminated unicode escape",
+ EscapeError::LeadingUnderscoreUnicodeEscape => "invalid start of unicode escape",
+ EscapeError::OverlongUnicodeEscape => "overlong unicode escape",
+ EscapeError::LoneSurrogateUnicodeEscape => "invalid unicode character escape",
+ EscapeError::OutOfRangeUnicodeEscape => "invalid unicode character escape",
+ EscapeError::UnicodeEscapeInByte => "unicode escape in byte string",
+ EscapeError::NonAsciiCharInByte if mode == Mode::Byte => {
+ "non-ASCII character in byte literal"
+ }
+ EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => {
+ "non-ASCII character in byte string literal"
+ }
+ EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
+ EscapeError::UnskippedWhitespaceWarning => "",
+ EscapeError::MultipleSkippedLinesWarning => "",
+ }
+}
diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rast b/crates/parser/test_data/lexer/err/byte_char_literals.rast
new file mode 100644
index 0000000000..24892bc239
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_char_literals.rast
@@ -0,0 +1,92 @@
+BYTE "b''" error: empty character literal
+WHITESPACE "\n"
+BYTE "b'\\'" error: Missing trailing `'` symbol to terminate the byte literal
+WHITESPACE "\n"
+BYTE "b'\n'" error: byte constant must be escaped
+WHITESPACE "\n"
+BYTE "b'spam'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\x0ff'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\\"a'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\na'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\ra'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\ta'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\\\a'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\'a'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\0a'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\v'" error: unknown byte escape
+WHITESPACE "\n"
+BYTE "b'\\💩'" error: unknown byte escape
+WHITESPACE "\n"
+BYTE "b'\\●'" error: unknown byte escape
+WHITESPACE "\n"
+BYTE "b'\\\\\\r'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+BYTE "b'\\x'" error: numeric character escape is too short
+WHITESPACE "\n"
+BYTE "b'\\x0'" error: numeric character escape is too short
+WHITESPACE "\n"
+BYTE "b'\\xf'" error: numeric character escape is too short
+WHITESPACE "\n"
+BYTE "b'\\xa'" error: numeric character escape is too short
+WHITESPACE "\n"
+BYTE "b'\\xx'" error: invalid character in numeric character escape
+WHITESPACE "\n"
+BYTE "b'\\xы'" error: invalid character in numeric character escape
+WHITESPACE "\n"
+BYTE "b'\\x🦀'" error: invalid character in numeric character escape
+WHITESPACE "\n"
+BYTE "b'\\xtt'" error: invalid character in numeric character escape
+WHITESPACE "\n"
+BYTE "b'\\xff'" error: out of range hex escape
+WHITESPACE "\n"
+BYTE "b'\\xFF'" error: out of range hex escape
+WHITESPACE "\n"
+BYTE "b'\\x80'" error: out of range hex escape
+WHITESPACE "\n"
+BYTE "b'\\u'" error: incorrect unicode escape sequence
+WHITESPACE "\n"
+BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence
+WHITESPACE "\n"
+BYTE "b'\\u{0x}'" error: invalid character in unicode escape
+WHITESPACE "\n"
+BYTE "b'\\u{'" error: unterminated unicode escape
+WHITESPACE "\n"
+BYTE "b'\\u{0000'" error: unterminated unicode escape
+WHITESPACE "\n"
+BYTE "b'\\u{}'" error: empty unicode escape
+WHITESPACE "\n"
+BYTE "b'\\u{_0000}'" error: invalid start of unicode escape
+WHITESPACE "\n"
+BYTE "b'\\u{0000000}'" error: overlong unicode escape
+WHITESPACE "\n"
+BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape
+WHITESPACE "\n"
+BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
+WHITESPACE "\n"
+BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
+WHITESPACE "\n"
+BYTE "b'\\u{DC00}'" error: invalid unicode character escape
+WHITESPACE "\n"
+BYTE "b'\\u{DDDD}'" error: invalid unicode character escape
+WHITESPACE "\n"
+BYTE "b'\\u{DFFF}'" error: invalid unicode character escape
+WHITESPACE "\n"
+BYTE "b'\\u{D800}'" error: invalid unicode character escape
+WHITESPACE "\n"
+BYTE "b'\\u{DAAA}'" error: invalid unicode character escape
+WHITESPACE "\n"
+BYTE "b'\\u{DBFF}'" error: invalid unicode character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rs b/crates/parser/test_data/lexer/err/byte_char_literals.rs
new file mode 100644
index 0000000000..9f2f4309e7
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_char_literals.rs
@@ -0,0 +1,47 @@
+b''
+b'\'
+b'
+'
+b'spam'
+b'\x0ff'
+b'\"a'
+b'\na'
+b'\ra'
+b'\ta'
+b'\\a'
+b'\'a'
+b'\0a'
+b'\u{0}x'
+b'\u{1F63b}}'
+b'\v'
+b'\💩'
+b'\●'
+b'\\\r'
+b'\x'
+b'\x0'
+b'\xf'
+b'\xa'
+b'\xx'
+b'\xы'
+b'\x🦀'
+b'\xtt'
+b'\xff'
+b'\xFF'
+b'\x80'
+b'\u'
+b'\u[0123]'
+b'\u{0x}'
+b'\u{'
+b'\u{0000'
+b'\u{}'
+b'\u{_0000}'
+b'\u{0000000}'
+b'\u{FFFFFF}'
+b'\u{ffffff}'
+b'\u{ffffff}'
+b'\u{DC00}'
+b'\u{DDDD}'
+b'\u{DFFF}'
+b'\u{D800}'
+b'\u{DAAA}'
+b'\u{DBFF}'
diff --git a/crates/parser/test_data/lexer/err/char_literals.rast b/crates/parser/test_data/lexer/err/char_literals.rast
new file mode 100644
index 0000000000..b1e1364d4c
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/char_literals.rast
@@ -0,0 +1,92 @@
+CHAR "'hello'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "''" error: empty character literal
+WHITESPACE "\n"
+CHAR "'\n'" error: character constant must be escaped
+WHITESPACE "\n"
+CHAR "'spam'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\x0ff'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\\"a'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\na'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\ra'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\ta'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\\\a'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\'a'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\0a'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\u{0}x'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\u{1F63b}}'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\v'" error: unknown character escape
+WHITESPACE "\n"
+CHAR "'\\💩'" error: unknown character escape
+WHITESPACE "\n"
+CHAR "'\\●'" error: unknown character escape
+WHITESPACE "\n"
+CHAR "'\\\\\\r'" error: character literal may only contain one codepoint
+WHITESPACE "\n"
+CHAR "'\\x'" error: numeric character escape is too short
+WHITESPACE "\n"
+CHAR "'\\x0'" error: numeric character escape is too short
+WHITESPACE "\n"
+CHAR "'\\xf'" error: numeric character escape is too short
+WHITESPACE "\n"
+CHAR "'\\xa'" error: numeric character escape is too short
+WHITESPACE "\n"
+CHAR "'\\xx'" error: invalid character in numeric character escape
+WHITESPACE "\n"
+CHAR "'\\xы'" error: invalid character in numeric character escape
+WHITESPACE "\n"
+CHAR "'\\x🦀'" error: invalid character in numeric character escape
+WHITESPACE "\n"
+CHAR "'\\xtt'" error: invalid character in numeric character escape
+WHITESPACE "\n"
+CHAR "'\\xff'" error: out of range hex escape
+WHITESPACE "\n"
+CHAR "'\\xFF'" error: out of range hex escape
+WHITESPACE "\n"
+CHAR "'\\x80'" error: out of range hex escape
+WHITESPACE "\n"
+CHAR "'\\u'" error: incorrect unicode escape sequence
+WHITESPACE "\n"
+CHAR "'\\u[0123]'" error: incorrect unicode escape sequence
+WHITESPACE "\n"
+CHAR "'\\u{0x}'" error: invalid character in unicode escape
+WHITESPACE "\n"
+CHAR "'\\u{'" error: unterminated unicode escape
+WHITESPACE "\n"
+CHAR "'\\u{0000'" error: unterminated unicode escape
+WHITESPACE "\n"
+CHAR "'\\u{}'" error: empty unicode escape
+WHITESPACE "\n"
+CHAR "'\\u{_0000}'" error: invalid start of unicode escape
+WHITESPACE "\n"
+CHAR "'\\u{0000000}'" error: overlong unicode escape
+WHITESPACE "\n"
+CHAR "'\\u{FFFFFF}'" error: invalid unicode character escape
+WHITESPACE "\n"
+CHAR "'\\u{ffffff}'" error: invalid unicode character escape
+WHITESPACE "\n"
+CHAR "'\\u{ffffff}'" error: invalid unicode character escape
+WHITESPACE "\n"
+CHAR "'\\u{DC00}'" error: invalid unicode character escape
+WHITESPACE "\n"
+CHAR "'\\u{DDDD}'" error: invalid unicode character escape
+WHITESPACE "\n"
+CHAR "'\\u{DFFF}'" error: invalid unicode character escape
+WHITESPACE "\n"
+CHAR "'\\u{D800}'" error: invalid unicode character escape
+WHITESPACE "\n"
+CHAR "'\\u{DAAA}'" error: invalid unicode character escape
+WHITESPACE "\n"
+CHAR "'\\u{DBFF}'" error: invalid unicode character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/char_literals.rs b/crates/parser/test_data/lexer/err/char_literals.rs
new file mode 100644
index 0000000000..291f99d802
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/char_literals.rs
@@ -0,0 +1,47 @@
+'hello'
+''
+'
+'
+'spam'
+'\x0ff'
+'\"a'
+'\na'
+'\ra'
+'\ta'
+'\\a'
+'\'a'
+'\0a'
+'\u{0}x'
+'\u{1F63b}}'
+'\v'
+'\💩'
+'\●'
+'\\\r'
+'\x'
+'\x0'
+'\xf'
+'\xa'
+'\xx'
+'\xы'
+'\x🦀'
+'\xtt'
+'\xff'
+'\xFF'
+'\x80'
+'\u'
+'\u[0123]'
+'\u{0x}'
+'\u{'
+'\u{0000'
+'\u{}'
+'\u{_0000}'
+'\u{0000000}'
+'\u{FFFFFF}'
+'\u{ffffff}'
+'\u{ffffff}'
+'\u{DC00}'
+'\u{DDDD}'
+'\u{DFFF}'
+'\u{D800}'
+'\u{DAAA}'
+'\u{DBFF}'
diff --git a/crates/parser/test_data/lexer/ok/byte_strings.rast b/crates/parser/test_data/lexer/ok/byte_strings.rast
index c848ac368e..fd20ca57ac 100644
--- a/crates/parser/test_data/lexer/ok/byte_strings.rast
+++ b/crates/parser/test_data/lexer/ok/byte_strings.rast
@@ -1,13 +1,9 @@
-BYTE "b''"
-WHITESPACE " "
BYTE "b'x'"
WHITESPACE " "
BYTE_STRING "b\"foo\""
WHITESPACE " "
BYTE_STRING "br\"\""
WHITESPACE "\n"
-BYTE "b''suf"
-WHITESPACE " "
BYTE_STRING "b\"\"ix"
WHITESPACE " "
BYTE_STRING "br\"\"br"
@@ -17,6 +13,4 @@ WHITESPACE " "
BYTE "b'\\\\'"
WHITESPACE " "
BYTE "b'\\''"
-WHITESPACE " "
-BYTE "b'hello'"
WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/ok/byte_strings.rs b/crates/parser/test_data/lexer/ok/byte_strings.rs
index b54930f5e6..65460d02cb 100644
--- a/crates/parser/test_data/lexer/ok/byte_strings.rs
+++ b/crates/parser/test_data/lexer/ok/byte_strings.rs
@@ -1,3 +1,3 @@
-b'' b'x' b"foo" br""
-b''suf b""ix br""br
-b'\n' b'\\' b'\'' b'hello'
+b'x' b"foo" br""
+b""ix br""br
+b'\n' b'\\' b'\''
diff --git a/crates/parser/test_data/lexer/ok/chars.rast b/crates/parser/test_data/lexer/ok/chars.rast
index 66e58cc298..07172a4ecc 100644
--- a/crates/parser/test_data/lexer/ok/chars.rast
+++ b/crates/parser/test_data/lexer/ok/chars.rast
@@ -4,8 +4,6 @@ CHAR "' '"
WHITESPACE " "
CHAR "'0'"
WHITESPACE " "
-CHAR "'hello'"
-WHITESPACE " "
CHAR "'\\x7f'"
WHITESPACE " "
CHAR "'\\n'"
diff --git a/crates/parser/test_data/lexer/ok/chars.rs b/crates/parser/test_data/lexer/ok/chars.rs
index 454ee0a5f6..15f52c113c 100644
--- a/crates/parser/test_data/lexer/ok/chars.rs
+++ b/crates/parser/test_data/lexer/ok/chars.rs
@@ -1 +1 @@
-'x' ' ' '0' 'hello' '\x7f' '\n' '\\' '\''
+'x' ' ' '0' '\x7f' '\n' '\\' '\''