Unnamed repository; edit this file 'description' to name the repository.
Auto merge of #15568 - Veykril:upstream-rustc-ap, r=Veykril
Replace format-args parser with upstream fork Turns out we can't bump rustc_abi right now because it got its generics removed accidentally https://github.com/rust-lang/rust/pull/107163
bors 2023-09-07
parent d04cff6 · parent 10b0cd7 · commit 8edd81f
-rw-r--r--Cargo.lock75
-rw-r--r--Cargo.toml7
-rw-r--r--crates/hir-def/Cargo.toml7
-rw-r--r--crates/hir-def/src/hir/format_args.rs3
-rw-r--r--crates/hir-def/src/hir/format_args/parse.rs1023
-rw-r--r--crates/hir-ty/Cargo.toml3
6 files changed, 43 insertions, 1075 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 203d7348be..bd6554bf88 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -541,7 +541,7 @@ dependencies = [
"mbe",
"once_cell",
"profile",
- "ra-ap-rustc_lexer",
+ "ra-ap-rustc_parse_format",
"rustc-hash",
"smallvec",
"stdx",
@@ -1483,16 +1483,36 @@ dependencies = [
]
[[package]]
+name = "ra-ap-rustc_index"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07b5fa61d34da18e148dc3a81f654488ea07f40938d8aefb17f8b64bb78c6120"
+dependencies = [
+ "arrayvec",
+ "smallvec",
+]
+
+[[package]]
name = "ra-ap-rustc_lexer"
-version = "0.1.0"
+version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1c145702ed3f237918e512685185dc8a4d0edc3a5326c63d20361d8ba9b45b3"
+checksum = "f2e2f6b48422e4eed5218277ab7cc9733e60dd8f3167f4f36a49a0cafe4dc195"
dependencies = [
- "unic-emoji-char",
+ "unicode-properties",
"unicode-xid",
]
[[package]]
+name = "ra-ap-rustc_parse_format"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3c7369ad01cc79f9e3513c9f6a6326f6b980100e4862a7ac71b9991c88108bb"
+dependencies = [
+ "ra-ap-rustc_index",
+ "ra-ap-rustc_lexer",
+]
+
+[[package]]
name = "rayon"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2066,47 +2086,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e5df347f0bf3ec1d670aad6ca5c6a1859cd9ea61d2113125794654ccced68f"
[[package]]
-name = "unic-char-property"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
-dependencies = [
- "unic-char-range",
-]
-
-[[package]]
-name = "unic-char-range"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"
-
-[[package]]
-name = "unic-common"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
-
-[[package]]
-name = "unic-emoji-char"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d"
-dependencies = [
- "unic-char-property",
- "unic-char-range",
- "unic-ucd-version",
-]
-
-[[package]]
-name = "unic-ucd-version"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
-dependencies = [
- "unic-common",
-]
-
-[[package]]
name = "unicase"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2137,6 +2116,12 @@ dependencies = [
]
[[package]]
+name = "unicode-properties"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7f91c8b21fbbaa18853c3d0801c78f4fc94cdb976699bb03e832e75f7fd22f0"
+
+[[package]]
name = "unicode-segmentation"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index e97e58f527..cab88fc18c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -103,4 +103,9 @@ triomphe = { version = "0.1.8", default-features = false, features = ["std"] }
# can't upgrade due to dashmap depending on 0.12.3 currently
hashbrown = { version = "0.12.3", features = ["inline-more"], default-features = false }
-rustc_lexer = { version = "0.1.0", package = "ra-ap-rustc_lexer" }
+rustc_lexer = { version = "0.10.0", package = "ra-ap-rustc_lexer" }
+rustc_parse_format = { version = "0.10.0", package = "ra-ap-rustc_parse_format", default-features = false }
+
+# Upstream broke this for us so we can't update it
+rustc_abi = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_abi", default-features = false }
+rustc_index = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_index", default-features = false }
diff --git a/crates/hir-def/Cargo.toml b/crates/hir-def/Cargo.toml
index 4640ee5140..8cf61ee04d 100644
--- a/crates/hir-def/Cargo.toml
+++ b/crates/hir-def/Cargo.toml
@@ -31,9 +31,10 @@ smallvec.workspace = true
hashbrown.workspace = true
triomphe.workspace = true
-rustc_abi = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_abi", default-features = false }
-rustc_index = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_index", default-features = false }
-rustc_lexer = { version = "0.1.0", package = "ra-ap-rustc_lexer" }
+rustc_abi.workspace = true
+rustc_index.workspace = true
+rustc_parse_format.workspace = true
+
# local deps
stdx.workspace = true
diff --git a/crates/hir-def/src/hir/format_args.rs b/crates/hir-def/src/hir/format_args.rs
index d8f8e6026a..75025a984f 100644
--- a/crates/hir-def/src/hir/format_args.rs
+++ b/crates/hir-def/src/hir/format_args.rs
@@ -2,6 +2,7 @@
use std::mem;
use hir_expand::name::Name;
+use rustc_parse_format as parse;
use syntax::{
ast::{self, IsString},
AstToken, SmolStr, TextRange,
@@ -9,8 +10,6 @@ use syntax::{
use crate::hir::ExprId;
-mod parse;
-
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FormatArgs {
pub template: Box<[FormatArgsPiece]>,
diff --git a/crates/hir-def/src/hir/format_args/parse.rs b/crates/hir-def/src/hir/format_args/parse.rs
deleted file mode 100644
index 22efa3883d..0000000000
--- a/crates/hir-def/src/hir/format_args/parse.rs
+++ /dev/null
@@ -1,1023 +0,0 @@
-//! Macro support for format strings
-//!
-//! These structures are used when parsing format strings for the compiler.
-//! Parsing does not happen at runtime: structures of `std::fmt::rt` are
-//! generated instead.
-
-// This is a copy of
-// https://github.com/Veykril/rust/blob/b89d7d6882532686fd90a89cec1a0fd386f0ade3/compiler/rustc_parse_format/src/lib.rs#L999-L1000
-// with the dependency of rustc-data-structures stripped out.
-
-// #![doc(
-// html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/",
-// html_playground_url = "https://play.rust-lang.org/",
-// test(attr(deny(warnings)))
-// )]
-// #![deny(rustc::untranslatable_diagnostic)]
-// #![deny(rustc::diagnostic_outside_of_impl)]
-// We want to be able to build this crate with a stable compiler, so no
-// `#![feature]` attributes should be added.
-#![allow(dead_code, unreachable_pub)]
-
-use rustc_lexer::unescape;
-pub use Alignment::*;
-pub use Count::*;
-pub use Piece::*;
-pub use Position::*;
-
-use std::iter;
-use std::str;
-use std::string;
-
-// Note: copied from rustc_span
-/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
-pub struct InnerSpan {
- pub start: usize,
- pub end: usize,
-}
-
-impl InnerSpan {
- pub fn new(start: usize, end: usize) -> InnerSpan {
- InnerSpan { start, end }
- }
-}
-
-/// The location and before/after width of a character whose width has changed from its source code
-/// representation
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct InnerWidthMapping {
- /// Index of the character in the source
- pub position: usize,
- /// The inner width in characters
- pub before: usize,
- /// The transformed width in characters
- pub after: usize,
-}
-
-impl InnerWidthMapping {
- pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
- InnerWidthMapping { position, before, after }
- }
-}
-
-/// Whether the input string is a literal. If yes, it contains the inner width mappings.
-#[derive(Clone, PartialEq, Eq)]
-enum InputStringKind {
- NotALiteral,
- Literal { width_mappings: Vec<InnerWidthMapping> },
-}
-
-/// The type of format string that we are parsing.
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
-pub enum ParseMode {
- /// A normal format string as per `format_args!`.
- Format,
- /// An inline assembly template string for `asm!`.
- InlineAsm,
-}
-
-#[derive(Copy, Clone)]
-struct InnerOffset(usize);
-
-impl InnerOffset {
- fn to(self, end: InnerOffset) -> InnerSpan {
- InnerSpan::new(self.0, end.0)
- }
-}
-
-/// A piece is a portion of the format string which represents the next part
-/// to emit. These are emitted as a stream by the `Parser` class.
-#[derive(Clone, Debug, PartialEq)]
-pub enum Piece<'a> {
- /// A literal string which should directly be emitted
- String(&'a str),
- /// This describes that formatting should process the next argument (as
- /// specified inside) for emission.
- NextArgument(Box<Argument<'a>>),
-}
-
-/// Representation of an argument specification.
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub struct Argument<'a> {
- /// Where to find this argument
- pub position: Position<'a>,
- /// The span of the position indicator. Includes any whitespace in implicit
- /// positions (`{ }`).
- pub position_span: InnerSpan,
- /// How to format the argument
- pub format: FormatSpec<'a>,
-}
-
-/// Specification for the formatting of an argument in the format string.
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub struct FormatSpec<'a> {
- /// Optionally specified character to fill alignment with.
- pub fill: Option<char>,
- /// Span of the optionally specified fill character.
- pub fill_span: Option<InnerSpan>,
- /// Optionally specified alignment.
- pub align: Alignment,
- /// The `+` or `-` flag.
- pub sign: Option<Sign>,
- /// The `#` flag.
- pub alternate: bool,
- /// The `0` flag.
- pub zero_pad: bool,
- /// The `x` or `X` flag. (Only for `Debug`.)
- pub debug_hex: Option<DebugHex>,
- /// The integer precision to use.
- pub precision: Count<'a>,
- /// The span of the precision formatting flag (for diagnostics).
- pub precision_span: Option<InnerSpan>,
- /// The string width requested for the resulting format.
- pub width: Count<'a>,
- /// The span of the width formatting flag (for diagnostics).
- pub width_span: Option<InnerSpan>,
- /// The descriptor string representing the name of the format desired for
- /// this argument, this can be empty or any number of characters, although
- /// it is required to be one word.
- pub ty: &'a str,
- /// The span of the descriptor string (for diagnostics).
- pub ty_span: Option<InnerSpan>,
-}
-
-/// Enum describing where an argument for a format can be located.
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum Position<'a> {
- /// The argument is implied to be located at an index
- ArgumentImplicitlyIs(usize),
- /// The argument is located at a specific index given in the format,
- ArgumentIs(usize),
- /// The argument has a name.
- ArgumentNamed(&'a str),
-}
-
-impl Position<'_> {
- pub fn index(&self) -> Option<usize> {
- match self {
- ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i),
- _ => None,
- }
- }
-}
-
-/// Enum of alignments which are supported.
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum Alignment {
- /// The value will be aligned to the left.
- AlignLeft,
- /// The value will be aligned to the right.
- AlignRight,
- /// The value will be aligned in the center.
- AlignCenter,
- /// The value will take on a default alignment.
- AlignUnknown,
-}
-
-/// Enum for the sign flags.
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum Sign {
- /// The `+` flag.
- Plus,
- /// The `-` flag.
- Minus,
-}
-
-/// Enum for the debug hex flags.
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum DebugHex {
- /// The `x` flag in `{:x?}`.
- Lower,
- /// The `X` flag in `{:X?}`.
- Upper,
-}
-
-/// A count is used for the precision and width parameters of an integer, and
-/// can reference either an argument or a literal integer.
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum Count<'a> {
- /// The count is specified explicitly.
- CountIs(usize),
- /// The count is specified by the argument with the given name.
- CountIsName(&'a str, InnerSpan),
- /// The count is specified by the argument at the given index.
- CountIsParam(usize),
- /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index.
- CountIsStar(usize),
- /// The count is implied and cannot be explicitly specified.
- CountImplied,
-}
-
-pub struct ParseError {
- pub description: string::String,
- pub note: Option<string::String>,
- pub label: string::String,
- pub span: InnerSpan,
- pub secondary_label: Option<(string::String, InnerSpan)>,
- pub should_be_replaced_with_positional_argument: bool,
-}
-
-/// The parser structure for interpreting the input format string. This is
-/// modeled as an iterator over `Piece` structures to form a stream of tokens
-/// being output.
-///
-/// This is a recursive-descent parser for the sake of simplicity, and if
-/// necessary there's probably lots of room for improvement performance-wise.
-pub struct Parser<'a> {
- mode: ParseMode,
- input: &'a str,
- cur: iter::Peekable<str::CharIndices<'a>>,
- /// Error messages accumulated during parsing
- pub errors: Vec<ParseError>,
- /// Current position of implicit positional argument pointer
- pub curarg: usize,
- /// `Some(raw count)` when the string is "raw", used to position spans correctly
- style: Option<usize>,
- /// Start and end byte offset of every successfully parsed argument
- pub arg_places: Vec<InnerSpan>,
- /// Characters whose length has been changed from their in-code representation
- width_map: Vec<InnerWidthMapping>,
- /// Span of the last opening brace seen, used for error reporting
- last_opening_brace: Option<InnerSpan>,
- /// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
- append_newline: bool,
- /// Whether this formatting string was written directly in the source. This controls whether we
- /// can use spans to refer into it and give better error messages.
- /// N.B: This does _not_ control whether implicit argument captures can be used.
- pub is_source_literal: bool,
- /// Start position of the current line.
- cur_line_start: usize,
- /// Start and end byte offset of every line of the format string. Excludes
- /// newline characters and leading whitespace.
- pub line_spans: Vec<InnerSpan>,
-}
-
-impl<'a> Iterator for Parser<'a> {
- type Item = Piece<'a>;
-
- fn next(&mut self) -> Option<Piece<'a>> {
- if let Some(&(pos, c)) = self.cur.peek() {
- match c {
- '{' => {
- let curr_last_brace = self.last_opening_brace;
- let byte_pos = self.to_span_index(pos);
- let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
- self.last_opening_brace = Some(byte_pos.to(lbrace_end));
- self.cur.next();
- if self.consume('{') {
- self.last_opening_brace = curr_last_brace;
-
- Some(String(self.string(pos + 1)))
- } else {
- let arg = self.argument(lbrace_end);
- if let Some(rbrace_pos) = self.consume_closing_brace(&arg) {
- if self.is_source_literal {
- let lbrace_byte_pos = self.to_span_index(pos);
- let rbrace_byte_pos = self.to_span_index(rbrace_pos);
-
- let width = self.to_span_width(rbrace_pos);
-
- self.arg_places.push(
- lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
- );
- }
- } else {
- if let Some(&(_, maybe)) = self.cur.peek() {
- if maybe == '?' {
- self.suggest_format();
- } else {
- self.suggest_positional_arg_instead_of_captured_arg(arg);
- }
- }
- }
- Some(NextArgument(Box::new(arg)))
- }
- }
- '}' => {
- self.cur.next();
- if self.consume('}') {
- Some(String(self.string(pos + 1)))
- } else {
- let err_pos = self.to_span_index(pos);
- self.err_with_note(
- "unmatched `}` found",
- "unmatched `}`",
- "if you intended to print `}`, you can escape it using `}}`",
- err_pos.to(err_pos),
- );
- None
- }
- }
- _ => Some(String(self.string(pos))),
- }
- } else {
- if self.is_source_literal {
- let span = self.span(self.cur_line_start, self.input.len());
- if self.line_spans.last() != Some(&span) {
- self.line_spans.push(span);
- }
- }
- None
- }
- }
-}
-
-impl<'a> Parser<'a> {
- /// Creates a new parser for the given format string
- pub fn new(
- s: &'a str,
- style: Option<usize>,
- snippet: Option<string::String>,
- append_newline: bool,
- mode: ParseMode,
- ) -> Parser<'a> {
- let input_string_kind = find_width_map_from_snippet(s, snippet, style);
- let (width_map, is_source_literal) = match input_string_kind {
- InputStringKind::Literal { width_mappings } => (width_mappings, true),
- InputStringKind::NotALiteral => (Vec::new(), false),
- };
-
- Parser {
- mode,
- input: s,
- cur: s.char_indices().peekable(),
- errors: vec![],
- curarg: 0,
- style,
- arg_places: vec![],
- width_map,
- last_opening_brace: None,
- append_newline,
- is_source_literal,
- cur_line_start: 0,
- line_spans: vec![],
- }
- }
-
- /// Notifies of an error. The message doesn't actually need to be of type
- /// String, but I think it does when this eventually uses conditions so it
- /// might as well start using it now.
- fn err<S1: Into<string::String>, S2: Into<string::String>>(
- &mut self,
- description: S1,
- label: S2,
- span: InnerSpan,
- ) {
- self.errors.push(ParseError {
- description: description.into(),
- note: None,
- label: label.into(),
- span,
- secondary_label: None,
- should_be_replaced_with_positional_argument: false,
- });
- }
-
- /// Notifies of an error. The message doesn't actually need to be of type
- /// String, but I think it does when this eventually uses conditions so it
- /// might as well start using it now.
- fn err_with_note<
- S1: Into<string::String>,
- S2: Into<string::String>,
- S3: Into<string::String>,
- >(
- &mut self,
- description: S1,
- label: S2,
- note: S3,
- span: InnerSpan,
- ) {
- self.errors.push(ParseError {
- description: description.into(),
- note: Some(note.into()),
- label: label.into(),
- span,
- secondary_label: None,
- should_be_replaced_with_positional_argument: false,
- });
- }
-
- /// Optionally consumes the specified character. If the character is not at
- /// the current position, then the current iterator isn't moved and `false` is
- /// returned, otherwise the character is consumed and `true` is returned.
- fn consume(&mut self, c: char) -> bool {
- self.consume_pos(c).is_some()
- }
-
- /// Optionally consumes the specified character. If the character is not at
- /// the current position, then the current iterator isn't moved and `None` is
- /// returned, otherwise the character is consumed and the current position is
- /// returned.
- fn consume_pos(&mut self, c: char) -> Option<usize> {
- if let Some(&(pos, maybe)) = self.cur.peek() {
- if c == maybe {
- self.cur.next();
- return Some(pos);
- }
- }
- None
- }
-
- fn remap_pos(&self, mut pos: usize) -> InnerOffset {
- for width in &self.width_map {
- if pos > width.position {
- pos += width.before - width.after;
- } else if pos == width.position && width.after == 0 {
- pos += width.before;
- } else {
- break;
- }
- }
-
- InnerOffset(pos)
- }
-
- fn to_span_index(&self, pos: usize) -> InnerOffset {
- // This handles the raw string case, the raw argument is the number of #
- // in r###"..."### (we need to add one because of the `r`).
- let raw = self.style.map_or(0, |raw| raw + 1);
- let pos = self.remap_pos(pos);
- InnerOffset(raw + pos.0 + 1)
- }
-
- fn to_span_width(&self, pos: usize) -> usize {
- let pos = self.remap_pos(pos);
- match self.width_map.iter().find(|w| w.position == pos.0) {
- Some(w) => w.before,
- None => 1,
- }
- }
-
- fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
- let start = self.to_span_index(start_pos);
- let end = self.to_span_index(end_pos);
- start.to(end)
- }
-
- /// Forces consumption of the specified character. If the character is not
- /// found, an error is emitted.
- fn consume_closing_brace(&mut self, arg: &Argument<'_>) -> Option<usize> {
- self.ws();
-
- let pos;
- let description;
-
- if let Some(&(peek_pos, maybe)) = self.cur.peek() {
- if maybe == '}' {
- self.cur.next();
- return Some(peek_pos);
- }
-
- pos = peek_pos;
- description = format!("expected `'}}'`, found `{maybe:?}`");
- } else {
- description = "expected `'}'` but string was terminated".to_owned();
- // point at closing `"`
- pos = self.input.len() - if self.append_newline { 1 } else { 0 };
- }
-
- let pos = self.to_span_index(pos);
-
- let label = "expected `'}'`".to_owned();
- let (note, secondary_label) = if arg.format.fill == Some('}') {
- (
- Some("the character `'}'` is interpreted as a fill character because of the `:` that precedes it".to_owned()),
- arg.format.fill_span.map(|sp| ("this is not interpreted as a formatting closing brace".to_owned(), sp)),
- )
- } else {
- (
- Some("if you intended to print `{`, you can escape it using `{{`".to_owned()),
- self.last_opening_brace.map(|sp| ("because of this opening brace".to_owned(), sp)),
- )
- };
-
- self.errors.push(ParseError {
- description,
- note,
- label,
- span: pos.to(pos),
- secondary_label,
- should_be_replaced_with_positional_argument: false,
- });
-
- None
- }
-
- /// Consumes all whitespace characters until the first non-whitespace character
- fn ws(&mut self) {
- while let Some(&(_, c)) = self.cur.peek() {
- if c.is_whitespace() {
- self.cur.next();
- } else {
- break;
- }
- }
- }
-
- /// Parses all of a string which is to be considered a "raw literal" in a
- /// format string. This is everything outside of the braces.
- fn string(&mut self, start: usize) -> &'a str {
- // we may not consume the character, peek the iterator
- while let Some(&(pos, c)) = self.cur.peek() {
- match c {
- '{' | '}' => {
- return &self.input[start..pos];
- }
- '\n' if self.is_source_literal => {
- self.line_spans.push(self.span(self.cur_line_start, pos));
- self.cur_line_start = pos + 1;
- self.cur.next();
- }
- _ => {
- if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() {
- self.cur_line_start = pos + c.len_utf8();
- }
- self.cur.next();
- }
- }
- }
- &self.input[start..self.input.len()]
- }
-
- /// Parses an `Argument` structure, or what's contained within braces inside the format string.
- fn argument(&mut self, start: InnerOffset) -> Argument<'a> {
- let pos = self.position();
-
- let end = self
- .cur
- .clone()
- .find(|(_, ch)| !ch.is_whitespace())
- .map_or(start, |(end, _)| self.to_span_index(end));
- let position_span = start.to(end);
-
- let format = match self.mode {
- ParseMode::Format => self.format(),
- ParseMode::InlineAsm => self.inline_asm(),
- };
-
- // Resolve position after parsing format spec.
- let pos = match pos {
- Some(position) => position,
- None => {
- let i = self.curarg;
- self.curarg += 1;
- ArgumentImplicitlyIs(i)
- }
- };
-
- Argument { position: pos, position_span, format }
- }
-
- /// Parses a positional argument for a format. This could either be an
- /// integer index of an argument, a named argument, or a blank string.
- /// Returns `Some(parsed_position)` if the position is not implicitly
- /// consuming a macro argument, `None` if it's the case.
- fn position(&mut self) -> Option<Position<'a>> {
- if let Some(i) = self.integer() {
- Some(ArgumentIs(i))
- } else {
- match self.cur.peek() {
- Some(&(_, c)) if rustc_lexer::is_id_start(c) => Some(ArgumentNamed(self.word())),
-
- // This is an `ArgumentNext`.
- // Record the fact and do the resolution after parsing the
- // format spec, to make things like `{:.*}` work.
- _ => None,
- }
- }
- }
-
- fn current_pos(&mut self) -> usize {
- if let Some(&(pos, _)) = self.cur.peek() {
- pos
- } else {
- self.input.len()
- }
- }
-
- /// Parses a format specifier at the current position, returning all of the
- /// relevant information in the `FormatSpec` struct.
- fn format(&mut self) -> FormatSpec<'a> {
- let mut spec = FormatSpec {
- fill: None,
- fill_span: None,
- align: AlignUnknown,
- sign: None,
- alternate: false,
- zero_pad: false,
- debug_hex: None,
- precision: CountImplied,
- precision_span: None,
- width: CountImplied,
- width_span: None,
- ty: &self.input[..0],
- ty_span: None,
- };
- if !self.consume(':') {
- return spec;
- }
-
- // fill character
- if let Some(&(idx, c)) = self.cur.peek() {
- if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) {
- spec.fill = Some(c);
- spec.fill_span = Some(self.span(idx, idx + 1));
- self.cur.next();
- }
- }
- // Alignment
- if self.consume('<') {
- spec.align = AlignLeft;
- } else if self.consume('>') {
- spec.align = AlignRight;
- } else if self.consume('^') {
- spec.align = AlignCenter;
- }
- // Sign flags
- if self.consume('+') {
- spec.sign = Some(Sign::Plus);
- } else if self.consume('-') {
- spec.sign = Some(Sign::Minus);
- }
- // Alternate marker
- if self.consume('#') {
- spec.alternate = true;
- }
- // Width and precision
- let mut havewidth = false;
-
- if self.consume('0') {
- // small ambiguity with '0$' as a format string. In theory this is a
- // '0' flag and then an ill-formatted format string with just a '$'
- // and no count, but this is better if we instead interpret this as
- // no '0' flag and '0$' as the width instead.
- if let Some(end) = self.consume_pos('$') {
- spec.width = CountIsParam(0);
- spec.width_span = Some(self.span(end - 1, end + 1));
- havewidth = true;
- } else {
- spec.zero_pad = true;
- }
- }
-
- if !havewidth {
- let start = self.current_pos();
- spec.width = self.count(start);
- if spec.width != CountImplied {
- let end = self.current_pos();
- spec.width_span = Some(self.span(start, end));
- }
- }
-
- if let Some(start) = self.consume_pos('.') {
- if self.consume('*') {
- // Resolve `CountIsNextParam`.
- // We can do this immediately as `position` is resolved later.
- let i = self.curarg;
- self.curarg += 1;
- spec.precision = CountIsStar(i);
- } else {
- spec.precision = self.count(start + 1);
- }
- let end = self.current_pos();
- spec.precision_span = Some(self.span(start, end));
- }
-
- let ty_span_start = self.current_pos();
- // Optional radix followed by the actual format specifier
- if self.consume('x') {
- if self.consume('?') {
- spec.debug_hex = Some(DebugHex::Lower);
- spec.ty = "?";
- } else {
- spec.ty = "x";
- }
- } else if self.consume('X') {
- if self.consume('?') {
- spec.debug_hex = Some(DebugHex::Upper);
- spec.ty = "?";
- } else {
- spec.ty = "X";
- }
- } else if self.consume('?') {
- spec.ty = "?";
- } else {
- spec.ty = self.word();
- if !spec.ty.is_empty() {
- let ty_span_end = self.current_pos();
- spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
- }
- }
- spec
- }
-
- /// Parses an inline assembly template modifier at the current position, returning the modifier
- /// in the `ty` field of the `FormatSpec` struct.
- fn inline_asm(&mut self) -> FormatSpec<'a> {
- let mut spec = FormatSpec {
- fill: None,
- fill_span: None,
- align: AlignUnknown,
- sign: None,
- alternate: false,
- zero_pad: false,
- debug_hex: None,
- precision: CountImplied,
- precision_span: None,
- width: CountImplied,
- width_span: None,
- ty: &self.input[..0],
- ty_span: None,
- };
- if !self.consume(':') {
- return spec;
- }
-
- let ty_span_start = self.current_pos();
- spec.ty = self.word();
- if !spec.ty.is_empty() {
- let ty_span_end = self.current_pos();
- spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
- }
-
- spec
- }
-
- /// Parses a `Count` parameter at the current position. This does not check
- /// for 'CountIsNextParam' because that is only used in precision, not
- /// width.
- fn count(&mut self, start: usize) -> Count<'a> {
- if let Some(i) = self.integer() {
- if self.consume('$') {
- CountIsParam(i)
- } else {
- CountIs(i)
- }
- } else {
- let tmp = self.cur.clone();
- let word = self.word();
- if word.is_empty() {
- self.cur = tmp;
- CountImplied
- } else if let Some(end) = self.consume_pos('$') {
- let name_span = self.span(start, end);
- CountIsName(word, name_span)
- } else {
- self.cur = tmp;
- CountImplied
- }
- }
- }
-
- /// Parses a word starting at the current position. A word is the same as
- /// Rust identifier, except that it can't start with `_` character.
- fn word(&mut self) -> &'a str {
- let start = match self.cur.peek() {
- Some(&(pos, c)) if rustc_lexer::is_id_start(c) => {
- self.cur.next();
- pos
- }
- _ => {
- return "";
- }
- };
- let mut end = None;
- while let Some(&(pos, c)) = self.cur.peek() {
- if rustc_lexer::is_id_continue(c) {
- self.cur.next();
- } else {
- end = Some(pos);
- break;
- }
- }
- let end = end.unwrap_or(self.input.len());
- let word = &self.input[start..end];
- if word == "_" {
- self.err_with_note(
- "invalid argument name `_`",
- "invalid argument name",
- "argument name cannot be a single underscore",
- self.span(start, end),
- );
- }
- word
- }
-
- fn integer(&mut self) -> Option<usize> {
- let mut cur: usize = 0;
- let mut found = false;
- let mut overflow = false;
- let start = self.current_pos();
- while let Some(&(_, c)) = self.cur.peek() {
- if let Some(i) = c.to_digit(10) {
- let (tmp, mul_overflow) = cur.overflowing_mul(10);
- let (tmp, add_overflow) = tmp.overflowing_add(i as usize);
- if mul_overflow || add_overflow {
- overflow = true;
- }
- cur = tmp;
- found = true;
- self.cur.next();
- } else {
- break;
- }
- }
-
- if overflow {
- let end = self.current_pos();
- let overflowed_int = &self.input[start..end];
- self.err(
- format!(
- "integer `{}` does not fit into the type `usize` whose range is `0..={}`",
- overflowed_int,
- usize::MAX
- ),
- "integer out of range for `usize`",
- self.span(start, end),
- );
- }
-
- found.then_some(cur)
- }
-
- fn suggest_format(&mut self) {
- if let (Some(pos), Some(_)) = (self.consume_pos('?'), self.consume_pos(':')) {
- let word = self.word();
- let _end = self.current_pos();
- let pos = self.to_span_index(pos);
- self.errors.insert(
- 0,
- ParseError {
- description: "expected format parameter to occur after `:`".to_owned(),
- note: Some(format!("`?` comes after `:`, try `{}:{}` instead", word, "?")),
- label: "expected `?` to occur after `:`".to_owned(),
- span: pos.to(pos),
- secondary_label: None,
- should_be_replaced_with_positional_argument: false,
- },
- );
- }
- }
-
- fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) {
- if let Some(end) = self.consume_pos('.') {
- let byte_pos = self.to_span_index(end);
- let start = InnerOffset(byte_pos.0 + 1);
- let field = self.argument(start);
- // We can only parse `foo.bar` field access, any deeper nesting,
- // or another type of expression, like method calls, are not supported
- if !self.consume('}') {
- return;
- }
- if let ArgumentNamed(_) = arg.position {
- if let ArgumentNamed(_) = field.position {
- self.errors.insert(
- 0,
- ParseError {
- description: "field access isn't supported".to_string(),
- note: None,
- label: "not supported".to_string(),
- span: InnerSpan::new(arg.position_span.start, field.position_span.end),
- secondary_label: None,
- should_be_replaced_with_positional_argument: true,
- },
- );
- }
- }
- }
- }
-}
-
-/// Finds the indices of all characters that have been processed and differ between the actual
-/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
-/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
-fn find_width_map_from_snippet(
- input: &str,
- snippet: Option<string::String>,
- str_style: Option<usize>,
-) -> InputStringKind {
- let snippet = match snippet {
- Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
- _ => return InputStringKind::NotALiteral,
- };
-
- if str_style.is_some() {
- return InputStringKind::Literal { width_mappings: Vec::new() };
- }
-
- // Strip quotes.
- let snippet = &snippet[1..snippet.len() - 1];
-
- // Macros like `println` add a newline at the end. That technically doesn't make them "literals" anymore, but it's fine
- // since we will never need to point our spans there, so we lie about it here by ignoring it.
- // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines.
- // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up.
- // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up.
- let input_no_nl = input.trim_end_matches('\n');
- let Some(unescaped) = unescape_string(snippet) else {
- return InputStringKind::NotALiteral;
- };
-
- let unescaped_no_nl = unescaped.trim_end_matches('\n');
-
- if unescaped_no_nl != input_no_nl {
- // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect.
- // This can for example happen with proc macros that respan generated literals.
- return InputStringKind::NotALiteral;
- }
-
- let mut s = snippet.char_indices();
- let mut width_mappings = vec![];
- while let Some((pos, c)) = s.next() {
- match (c, s.clone().next()) {
- // skip whitespace and empty lines ending in '\\'
- ('\\', Some((_, '\n'))) => {
- let _ = s.next();
- let mut width = 2;
-
- while let Some((_, c)) = s.clone().next() {
- if matches!(c, ' ' | '\n' | '\t') {
- width += 1;
- let _ = s.next();
- } else {
- break;
- }
- }
-
- width_mappings.push(InnerWidthMapping::new(pos, width, 0));
- }
- ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
- width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
- let _ = s.next();
- }
- ('\\', Some((_, 'x'))) => {
- // consume `\xAB` literal
- s.nth(2);
- width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
- }
- ('\\', Some((_, 'u'))) => {
- let mut width = 2;
- let _ = s.next();
-
- if let Some((_, next_c)) = s.next() {
- if next_c == '{' {
- // consume up to 6 hexanumeric chars
- let digits_len =
- s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count();
-
- let len_utf8 = s
- .as_str()
- .get(..digits_len)
- .and_then(|digits| u32::from_str_radix(digits, 16).ok())
- .and_then(char::from_u32)
- .map_or(1, char::len_utf8);
-
- // Skip the digits, for chars that encode to more than 1 utf-8 byte
- // exclude as many digits as it is greater than 1 byte
- //
- // So for a 3 byte character, exclude 2 digits
- let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
-
- // skip '{' and '}' also
- width += required_skips + 2;
-
- s.nth(digits_len);
- } else if next_c.is_digit(16) {
- width += 1;
-
- // We suggest adding `{` and `}` when appropriate, accept it here as if
- // it were correct
- let mut i = 0; // consume up to 6 hexanumeric chars
- while let (Some((_, c)), _) = (s.next(), i < 6) {
- if c.is_digit(16) {
- width += 1;
- } else {
- break;
- }
- i += 1;
- }
- }
- }
-
- width_mappings.push(InnerWidthMapping::new(pos, width, 1));
- }
- _ => {}
- }
- }
-
- InputStringKind::Literal { width_mappings }
-}
-
-fn unescape_string(string: &str) -> Option<string::String> {
- let mut buf = string::String::new();
- let mut ok = true;
- unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| {
- match unescaped_char {
- Ok(c) => buf.push(c),
- Err(_) => ok = false,
- }
- });
-
- ok.then_some(buf)
-}
diff --git a/crates/hir-ty/Cargo.toml b/crates/hir-ty/Cargo.toml
index abc19d63ab..b95ae05ccd 100644
--- a/crates/hir-ty/Cargo.toml
+++ b/crates/hir-ty/Cargo.toml
@@ -32,7 +32,8 @@ once_cell = "1.17.0"
triomphe.workspace = true
nohash-hasher.workspace = true
typed-arena = "2.0.1"
-rustc_index = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_index", default-features = false }
+
+rustc_index.workspace = true
# local deps
stdx.workspace = true