Unnamed repository; edit this file 'description' to name the repository.
fix: handle errors for string byte string and c_string
Pol Valletbรณ 2023-10-11
parent 677e6f3 · commit e1aeb7f
-rw-r--r--crates/parser/src/lexed_str.rs42
-rw-r--r--crates/parser/test_data/lexer/err/byte_strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/byte_strings.rs14
-rw-r--r--crates/parser/test_data/lexer/err/c_strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/c_strings.rs14
-rw-r--r--crates/parser/test_data/lexer/err/strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/strings.rs14
7 files changed, 167 insertions, 1 deletions
diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 84cedc1fa3..c2e25daf37 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -8,7 +8,10 @@
//! Note that these tokens, unlike the tokens we feed into the parser, do
//! include info about comments and whitespace.
-use rustc_dependencies::lexer as rustc_lexer;
+use rustc_dependencies::lexer::{
+ self as rustc_lexer,
+ unescape::{unescape_c_string, unescape_literal},
+};
use std::ops;
@@ -284,18 +287,45 @@ impl<'a> Converter<'a> {
rustc_lexer::LiteralKind::Str { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the string literal";
+ } else {
+ let text = &self.res.text[self.offset + 1..][..len - 1];
+ let i = text.rfind('"').unwrap();
+ let text = &text[..i];
+ rustc_lexer::unescape::unescape_literal(text, Mode::Str, &mut |_, res| {
+ if let Err(e) = res {
+ err = error_to_diagnostic_message(e, Mode::Str);
+ }
+ });
}
STRING
}
rustc_lexer::LiteralKind::ByteStr { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the byte string literal";
+ } else {
+ let text = &self.res.text[self.offset + 2..][..len - 2];
+ let i = text.rfind('"').unwrap();
+ let text = &text[..i];
+ rustc_lexer::unescape::unescape_literal(text, Mode::ByteStr, &mut |_, res| {
+ if let Err(e) = res {
+ err = error_to_diagnostic_message(e, Mode::ByteStr);
+ }
+ })
}
BYTE_STRING
}
rustc_lexer::LiteralKind::CStr { terminated } => {
if !terminated {
err = "Missing trailing `\"` symbol to terminate the string literal";
+ } else {
+ let text = &self.res.text[self.offset + 2..][..len - 2];
+ let i = text.rfind('"').unwrap();
+ let text = &text[..i];
+ rustc_lexer::unescape::unescape_c_string(text, Mode::CStr, &mut |_, res| {
+ if let Err(e) = res {
+ err = error_to_diagnostic_message(e, Mode::CStr);
+ }
+ })
}
C_STRING
}
@@ -360,3 +390,13 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
EscapeError::MultipleSkippedLinesWarning => "",
}
}
+
+fn fill_unescape_string_error(text: &str, mode: Mode, mut error_message: &str) {
+
+ rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
+ if let Err(e) = res {
+ error_message = error_to_diagnostic_message(e, mode);
+ }
+ });
+}
+
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rast b/crates/parser/test_data/lexer/err/byte_strings.rast
new file mode 100644
index 0000000000..e8d8ff8cef
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rast
@@ -0,0 +1,28 @@
+BYTE_STRING "b\"\\๐Ÿ’ฉ\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\โ—\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\xั‹\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rs b/crates/parser/test_data/lexer/err/byte_strings.rs
new file mode 100644
index 0000000000..e74847137b
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rs
@@ -0,0 +1,14 @@
+b"\๐Ÿ’ฉ"
+b"\โ—"
+b"\u{_0000}"
+b"\u{0000000}"
+b"\u{FFFFFF}"
+b"\u{ffffff}"
+b"\u{ffffff}"
+b"\u{DC00}"
+b"\u{DDDD}"
+b"\u{DFFF}"
+b"\u{D800}"
+b"\u{DAAA}"
+b"\u{DBFF}"
+b"\xั‹"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rast b/crates/parser/test_data/lexer/err/c_strings.rast
new file mode 100644
index 0000000000..1b4424ba5c
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rast
@@ -0,0 +1,28 @@
+C_STRING "c\"\\๐Ÿ’ฉ\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\โ—\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\xั‹\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rs b/crates/parser/test_data/lexer/err/c_strings.rs
new file mode 100644
index 0000000000..1b78ffc28a
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rs
@@ -0,0 +1,14 @@
+c"\๐Ÿ’ฉ"
+c"\โ—"
+c"\u{_0000}"
+c"\u{0000000}"
+c"\u{FFFFFF}"
+c"\u{ffffff}"
+c"\u{ffffff}"
+c"\u{DC00}"
+c"\u{DDDD}"
+c"\u{DFFF}"
+c"\u{D800}"
+c"\u{DAAA}"
+c"\u{DBFF}"
+c"\xั‹"
diff --git a/crates/parser/test_data/lexer/err/strings.rast b/crates/parser/test_data/lexer/err/strings.rast
new file mode 100644
index 0000000000..0cd1747208
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rast
@@ -0,0 +1,28 @@
+STRING "\"\\๐Ÿ’ฉ\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\โ—\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\xั‹\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/strings.rs b/crates/parser/test_data/lexer/err/strings.rs
new file mode 100644
index 0000000000..2499516d3f
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rs
@@ -0,0 +1,14 @@
+"\๐Ÿ’ฉ"
+"\โ—"
+"\u{_0000}"
+"\u{0000000}"
+"\u{FFFFFF}"
+"\u{ffffff}"
+"\u{ffffff}"
+"\u{DC00}"
+"\u{DDDD}"
+"\u{DFFF}"
+"\u{D800}"
+"\u{DAAA}"
+"\u{DBFF}"
+"\xั‹"