Unnamed repository; edit this file 'description' to name the repository.
Auto merge of #15746 - pvalletbo:string-literals-diagnose, r=Veykril
String literals diagnose Continues the work from #15744 to add diagnosis errors to Str, ByteStr, and CStr literal kinds. Also replaces `unescape_char` for `unescape_byte` to use the correct method for Byte literals.
bors 2023-10-17
parent 2910dbf · parent 6845c80 · commit d6afb4f
-rw-r--r--crates/parser/src/lexed_str.rs41
-rw-r--r--crates/parser/test_data/lexer/err/byte_char_literals.rast28
-rw-r--r--crates/parser/test_data/lexer/err/byte_char_literals.rs3
-rw-r--r--crates/parser/test_data/lexer/err/byte_strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/byte_strings.rs14
-rw-r--r--crates/parser/test_data/lexer/err/c_strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/c_strings.rs14
-rw-r--r--crates/parser/test_data/lexer/err/strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/strings.rs14
9 files changed, 177 insertions, 21 deletions
diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 8e8bdce1ee..b9e7566fdf 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -274,7 +274,7 @@ impl<'a> Converter<'a> {
let text = &self.res.text[self.offset + 2..][..len - 2];
let i = text.rfind('\'').unwrap();
let text = &text[..i];
- if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
+ if let Err(e) = rustc_lexer::unescape::unescape_byte(text) {
err = error_to_diagnostic_message(e, Mode::Byte);
}
}
@@ -284,18 +284,33 @@ impl<'a> Converter<'a> {
rustc_lexer::LiteralKind::Str { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the string literal";
+ } else {
+ let text = &self.res.text[self.offset + 1..][..len - 1];
+ let i = text.rfind('"').unwrap();
+ let text = &text[..i];
+ err = unescape_string_error_message(text, Mode::Str);
}
STRING
}
rustc_lexer::LiteralKind::ByteStr { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the byte string literal";
+ } else {
+ let text = &self.res.text[self.offset + 2..][..len - 2];
+ let i = text.rfind('"').unwrap();
+ let text = &text[..i];
+ err = unescape_string_error_message(text, Mode::ByteStr);
}
BYTE_STRING
}
rustc_lexer::LiteralKind::CStr { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the string literal";
+ } else {
+ let text = &self.res.text[self.offset + 2..][..len - 2];
+ let i = text.rfind('"').unwrap();
+ let text = &text[..i];
+ err = unescape_string_error_message(text, Mode::CStr);
}
C_STRING
}
@@ -360,3 +375,27 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
EscapeError::MultipleSkippedLinesWarning => "",
}
}
+
+fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
+ let mut error_message = "";
+ match mode {
+ Mode::CStr => {
+ rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
+ if let Err(e) = res {
+ error_message = error_to_diagnostic_message(e, mode);
+ }
+ });
+ }
+ Mode::ByteStr | Mode::Str => {
+ rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| {
+ if let Err(e) = res {
+ error_message = error_to_diagnostic_message(e, mode);
+ }
+ });
+ }
+ _ => {
+ // Other Modes are not supported yet or do not apply
+ }
+ }
+ error_message
+}
diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rast b/crates/parser/test_data/lexer/err/byte_char_literals.rast
index 24892bc239..7603c9099d 100644
--- a/crates/parser/test_data/lexer/err/byte_char_literals.rast
+++ b/crates/parser/test_data/lexer/err/byte_char_literals.rast
@@ -22,9 +22,9 @@ BYTE "b'\\'a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
BYTE "b'\\0a'" error: character literal may only contain one codepoint
WHITESPACE "\n"
-BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint
+BYTE "b'\\u{0}x'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint
+BYTE "b'\\u{1F63b}}'" error: unicode escape in byte string
WHITESPACE "\n"
BYTE "b'\\v'" error: unknown byte escape
WHITESPACE "\n"
@@ -50,12 +50,6 @@ BYTE "b'\\x🦀'" error: invalid character in numeric character escape
WHITESPACE "\n"
BYTE "b'\\xtt'" error: invalid character in numeric character escape
WHITESPACE "\n"
-BYTE "b'\\xff'" error: out of range hex escape
-WHITESPACE "\n"
-BYTE "b'\\xFF'" error: out of range hex escape
-WHITESPACE "\n"
-BYTE "b'\\x80'" error: out of range hex escape
-WHITESPACE "\n"
BYTE "b'\\u'" error: incorrect unicode escape sequence
WHITESPACE "\n"
BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence
@@ -72,21 +66,21 @@ BYTE "b'\\u{_0000}'" error: invalid start of unicode escape
WHITESPACE "\n"
BYTE "b'\\u{0000000}'" error: overlong unicode escape
WHITESPACE "\n"
-BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape
+BYTE "b'\\u{FFFFFF}'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
+BYTE "b'\\u{ffffff}'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
+BYTE "b'\\u{ffffff}'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{DC00}'" error: invalid unicode character escape
+BYTE "b'\\u{DC00}'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{DDDD}'" error: invalid unicode character escape
+BYTE "b'\\u{DDDD}'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{DFFF}'" error: invalid unicode character escape
+BYTE "b'\\u{DFFF}'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{D800}'" error: invalid unicode character escape
+BYTE "b'\\u{D800}'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{DAAA}'" error: invalid unicode character escape
+BYTE "b'\\u{DAAA}'" error: unicode escape in byte string
WHITESPACE "\n"
-BYTE "b'\\u{DBFF}'" error: invalid unicode character escape
+BYTE "b'\\u{DBFF}'" error: unicode escape in byte string
WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rs b/crates/parser/test_data/lexer/err/byte_char_literals.rs
index 9f2f4309e7..b2d06e490b 100644
--- a/crates/parser/test_data/lexer/err/byte_char_literals.rs
+++ b/crates/parser/test_data/lexer/err/byte_char_literals.rs
@@ -25,9 +25,6 @@ b'\xx'
b'\xы'
b'\x🦀'
b'\xtt'
-b'\xff'
-b'\xFF'
-b'\x80'
b'\u'
b'\u[0123]'
b'\u{0x}'
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rast b/crates/parser/test_data/lexer/err/byte_strings.rast
new file mode 100644
index 0000000000..e8d8ff8cef
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rast
@@ -0,0 +1,28 @@
+BYTE_STRING "b\"\\💩\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\●\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rs b/crates/parser/test_data/lexer/err/byte_strings.rs
new file mode 100644
index 0000000000..e74847137b
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rs
@@ -0,0 +1,14 @@
+b"\💩"
+b"\●"
+b"\u{_0000}"
+b"\u{0000000}"
+b"\u{FFFFFF}"
+b"\u{ffffff}"
+b"\u{ffffff}"
+b"\u{DC00}"
+b"\u{DDDD}"
+b"\u{DFFF}"
+b"\u{D800}"
+b"\u{DAAA}"
+b"\u{DBFF}"
+b"\xы"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rast b/crates/parser/test_data/lexer/err/c_strings.rast
new file mode 100644
index 0000000000..1b4424ba5c
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rast
@@ -0,0 +1,28 @@
+C_STRING "c\"\\💩\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\●\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rs b/crates/parser/test_data/lexer/err/c_strings.rs
new file mode 100644
index 0000000000..1b78ffc28a
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rs
@@ -0,0 +1,14 @@
+c"\💩"
+c"\●"
+c"\u{_0000}"
+c"\u{0000000}"
+c"\u{FFFFFF}"
+c"\u{ffffff}"
+c"\u{ffffff}"
+c"\u{DC00}"
+c"\u{DDDD}"
+c"\u{DFFF}"
+c"\u{D800}"
+c"\u{DAAA}"
+c"\u{DBFF}"
+c"\xы"
diff --git a/crates/parser/test_data/lexer/err/strings.rast b/crates/parser/test_data/lexer/err/strings.rast
new file mode 100644
index 0000000000..0cd1747208
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rast
@@ -0,0 +1,28 @@
+STRING "\"\\💩\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\●\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/strings.rs b/crates/parser/test_data/lexer/err/strings.rs
new file mode 100644
index 0000000000..2499516d3f
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rs
@@ -0,0 +1,14 @@
+"\💩"
+"\●"
+"\u{_0000}"
+"\u{0000000}"
+"\u{FFFFFF}"
+"\u{ffffff}"
+"\u{ffffff}"
+"\u{DC00}"
+"\u{DDDD}"
+"\u{DFFF}"
+"\u{D800}"
+"\u{DAAA}"
+"\u{DBFF}"
+"\xы"