Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/command_line.rs')
| -rw-r--r-- | helix-core/src/command_line.rs | 1279 |
1 files changed, 0 insertions, 1279 deletions
diff --git a/helix-core/src/command_line.rs b/helix-core/src/command_line.rs deleted file mode 100644 index 8e209d61..00000000 --- a/helix-core/src/command_line.rs +++ /dev/null @@ -1,1279 +0,0 @@ -//! Types and parsing code for command mode (`:`) input. -//! -//! Command line parsing is done in steps: -//! -//! * The `Tokenizer` iterator returns `Token`s from the command line input naively - without -//! accounting for a command's signature. -//! * When executing a command (pressing `<ret>` in command mode), tokens are expanded with -//! information from the editor like the current cursor line or column. Otherwise the tokens -//! are unwrapped to their inner content. -//! * `Args` interprets the contents (potentially expanded) as flags or positional arguments. -//! When executing a command, `Args` performs validations like checking the number of positional -//! arguments supplied and whether duplicate or unknown flags were supplied. -//! -//! `Args` is the interface used by typable command implementations. `Args` may be treated as a -//! slice of `Cow<str>` or `&str` to access positional arguments, for example `for arg in args` -//! iterates over positional args (never flags) and `&args[0]` always corresponds to the first -//! positional. Use `Args::has_flag` and `Args::get_flag` to read any specified flags. -//! -//! `Args` and `Tokenizer` are intertwined. `Args` may ask the `Tokenizer` for the rest of the -//! command line as a single token after the configured number of positionals has been reached -//! (according to `raw_after`). This is used for the custom parsing in `:set-option` and -//! `:toggle-option` for example. Outside of executing commands, the `Tokenizer` can be used -//! directly to interpret a string according to the regular tokenization rules. -//! -//! This module also defines structs for configuring the parsing of the command line for a -//! command. See `Flag` and `Signature`. - -use std::{borrow::Cow, collections::HashMap, error::Error, fmt, ops, slice, vec}; - -/// Splits a command line into the command and arguments parts. -/// -/// The third tuple member describes whether the command part is finished. When this boolean is -/// true the completion code for the command line should complete command names, otherwise -/// command arguments. -pub fn split(line: &str) -> (&str, &str, bool) { - const SEPARATOR_PATTERN: [char; 2] = [' ', '\t']; - - let (command, rest) = line.split_once(SEPARATOR_PATTERN).unwrap_or((line, "")); - - let complete_command = - command.is_empty() || (rest.trim().is_empty() && !line.ends_with(SEPARATOR_PATTERN)); - - (command, rest, complete_command) -} - -/// A Unix-like flag that a command may accept. -/// -/// For example the `:sort` command accepts a `--reverse` (or `-r` for shorthand) boolean flag -/// which controls the direction of sorting. Flags may accept an argument by setting the -/// `completions` field to `Some`. -#[derive(Debug, Clone, Copy)] -pub struct Flag { - /// The name of the flag. - /// - /// This value is also used to construct the "longhand" version of the flag. For example a - /// flag with a name "reverse" has a longhand `--reverse`. - /// - /// This value should be supplied when reading a flag out of the [Args] with [Args::get_flag] - /// and [Args::has_flag]. The `:sort` command implementation for example should ask for - /// `args.has_flag("reverse")`. - pub name: &'static str, - /// The character that can be used as a shorthand for the flag, optionally. - /// - /// For example a flag like "reverse" mentioned above might take an alias `Some('r')` to - /// allow specifying the flag as `-r`. - pub alias: Option<char>, - pub doc: &'static str, - /// The completion values to use when specifying an argument for a flag. - /// - /// This should be set to `None` for boolean flags and `Some(&["foo", "bar", "baz"])` for - /// example for flags which accept options, with the strings corresponding to values that - /// should be shown in completion. - pub completions: Option<&'static [&'static str]>, -} - -impl Flag { - // This allows defining flags with the `..Flag::DEFAULT` shorthand. The `name` and `doc` - // fields should always be overwritten. - pub const DEFAULT: Self = Self { - name: "", - doc: "", - alias: None, - completions: None, - }; -} - -/// A description of how a command's input should be handled. -/// -/// Each typable command defines a signature (with the help of `Signature::DEFAULT`) at least to -/// declare how many positional arguments it accepts. Command flags are also declared in this -/// struct. The `raw_after` option may be set optionally to avoid evaluating quotes in parts of -/// the command line (useful for shell commands for example). -#[derive(Debug, Clone, Copy)] -#[allow(clippy::manual_non_exhaustive)] -pub struct Signature { - /// The minimum and (optionally) maximum number of positional arguments a command may take. - /// - /// For example accepting exactly one positional can be specified with `(1, Some(1))` while - /// accepting zero-or-more positionals can be specified as `(0, None)`. - /// - /// The number of positionals is checked when hitting `<ret>` in command mode. If the actual - /// number of positionals is outside the declared range then the command is not executed and - /// an error is shown instead. For example `:write` accepts zero or one positional arguments - /// (`(0, Some(1))`). A command line like `:write a.txt b.txt` is outside the declared range - /// and is not accepted. - pub positionals: (usize, Option<usize>), - /// The number of **positional** arguments for the parser to read with normal quoting rules. - /// - /// Once the number has been exceeded then the tokenizer returns the rest of the input as a - /// `TokenKind::Expand` token (see `Tokenizer::rest`), meaning that quoting rules do not apply - /// and none of the remaining text may be treated as a flag. - /// - /// If this is set to `None` then the entire command line is parsed with normal quoting and - /// flag rules. - /// - /// A good example use-case for this option is `:toggle-option` which sets `Some(1)`. - /// Everything up to the first positional argument is interpreted according to normal rules - /// and the rest of the input is parsed "raw". This allows `:toggle-option` to perform custom - /// parsing on the rest of the input - namely parsing complicated values as a JSON stream. - /// `:toggle-option` could accept a flag in the future. If so, the flag would need to come - /// before the first positional argument. - /// - /// Consider these lines for `:toggle-option` which sets `Some(1)`: - /// - /// * `:toggle foo` has one positional "foo" and no flags. - /// * `:toggle foo bar` has two positionals. Expansions for `bar` are evaluated but quotes - /// and anything that looks like a flag are treated literally. - /// * `:toggle foo --bar` has two positionals: `["foo", "--bar"]`. `--bar` is not considered - /// to be a flag because it comes after the first positional. - /// * `:toggle --bar foo` has one positional "foo" and one flag "--bar". - /// * `:toggle --bar foo --baz` has two positionals `["foo", "--baz"]` and one flag "--bar". - pub raw_after: Option<u8>, - /// A set of flags that a command may accept. - /// - /// See the `Flag` struct for more info. - pub flags: &'static [Flag], - /// Do not set this field. Use `..Signature::DEFAULT` to construct a `Signature` instead. - // This field allows adding new fields later with minimal code changes. This works like a - // `#[non_exhaustive]` annotation except that it supports the `..Signature::DEFAULT` - // shorthand. - pub _dummy: (), -} - -impl Signature { - // This allows defining signatures with the `..Signature::DEFAULT` shorthand. The - // `positionals` field should always be overwritten. - pub const DEFAULT: Self = Self { - positionals: (0, None), - raw_after: None, - flags: &[], - _dummy: (), - }; - - fn check_positional_count(&self, actual: usize) -> Result<(), ParseArgsError<'static>> { - let (min, max) = self.positionals; - if min <= actual && max.unwrap_or(usize::MAX) >= actual { - Ok(()) - } else { - Err(ParseArgsError::WrongPositionalCount { min, max, actual }) - } - } -} - -#[derive(Debug, PartialEq, Eq)] -pub enum ParseArgsError<'a> { - WrongPositionalCount { - min: usize, - max: Option<usize>, - actual: usize, - }, - UnterminatedToken { - token: Token<'a>, - }, - DuplicatedFlag { - flag: &'static str, - }, - UnknownFlag { - text: Cow<'a, str>, - }, - FlagMissingArgument { - flag: &'static str, - }, - MissingExpansionDelimiter { - expansion: &'a str, - }, - UnknownExpansion { - kind: &'a str, - }, -} - -impl fmt::Display for ParseArgsError<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::WrongPositionalCount { min, max, actual } => { - write!(f, "expected ")?; - let maybe_plural = |n| if n == 1 { "" } else { "s" }; - match (min, max) { - (0, Some(0)) => write!(f, "no arguments")?, - (min, Some(max)) if min == max => { - write!(f, "exactly {min} argument{}", maybe_plural(*min))? - } - (min, _) if actual < min => { - write!(f, "at least {min} argument{}", maybe_plural(*min))? - } - (_, Some(max)) if actual > max => { - write!(f, "at most {max} argument{}", maybe_plural(*max))? - } - // `actual` must be either less than `min` or greater than `max` for this type - // to be constructed. - _ => unreachable!(), - } - - write!(f, ", got {actual}") - } - Self::UnterminatedToken { token } => { - write!(f, "unterminated token {}", token.content) - } - Self::DuplicatedFlag { flag } => { - write!(f, "flag '--{flag}' specified more than once") - } - Self::UnknownFlag { text } => write!(f, "unknown flag '{text}'"), - Self::FlagMissingArgument { flag } => { - write!(f, "flag '--{flag}' missing an argument") - } - Self::MissingExpansionDelimiter { expansion } => { - if expansion.is_empty() { - write!(f, "'%' was not properly escaped. Please use '%%'") - } else { - write!(f, "missing a string delimiter after '%{expansion}'") - } - } - Self::UnknownExpansion { kind } => { - write!(f, "unknown expansion '{kind}'") - } - } - } -} - -impl Error for ParseArgsError<'_> {} - -/// The kind of expansion to use on the token's content. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ExpansionKind { - /// Expand variables from the editor's state. - /// - /// For example `%{cursor_line}`. - Variable, - /// Treat the token contents as hexadecimal corresponding to a Unicode codepoint value. - /// - /// For example `%u{25CF}`. - Unicode, - /// Run the token's contents via the configured shell program. - /// - /// For example `%sh{echo hello}`. - Shell, -} - -impl ExpansionKind { - pub const VARIANTS: &'static [Self] = &[Self::Variable, Self::Unicode, Self::Shell]; - - pub const fn as_str(&self) -> &'static str { - match self { - Self::Variable => "", - Self::Unicode => "u", - Self::Shell => "sh", - } - } - - pub fn from_kind(name: &str) -> Option<Self> { - match name { - "" => Some(Self::Variable), - "u" => Some(Self::Unicode), - "sh" => Some(Self::Shell), - _ => None, - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Quote { - Single, - Backtick, -} - -impl Quote { - pub const fn char(&self) -> char { - match self { - Self::Single => '\'', - Self::Backtick => '`', - } - } - - // Quotes can be escaped by doubling them: `'hello '' world'` becomes `hello ' world`. - pub const fn escape(&self) -> &'static str { - match self { - Self::Single => "''", - Self::Backtick => "``", - } - } -} - -/// The type of argument being written. -/// -/// The token kind decides how an argument in the command line will be expanded upon hitting -/// `<ret>` in command mode. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum TokenKind { - /// Unquoted text. - /// - /// For example in `:echo hello world`, "hello" and "world" are raw tokens. - Unquoted, - /// Quoted text which is interpreted literally. - /// - /// The purpose of this kind is to avoid splitting arguments on whitespace. For example - /// `:open 'a b.txt'` will result in opening a file with a single argument `"a b.txt"`. - /// - /// Using expansions within single quotes or backticks will result in the expansion text - /// being shown literally. For example `:echo '%u{0020}'` will print `"%u{0020}"` to the - /// statusline. - Quoted(Quote), - /// Text within double quote delimiters (`"`). - /// - /// The inner text of a double quoted argument can be further expanded. For example - /// `:echo "line: #%{cursor_line}"` could print `"line: #1"` to the statusline. - Expand, - /// An expansion / "percent token". - /// - /// These take the form `%[<kind>]<open><contents><close>`. See `ExpansionKind`. - Expansion(ExpansionKind), - /// A token kind that exists for the sake of completion. - /// - /// In input like `%foo` this token contains the text `"%foo"`. The content start is the byte - /// after the percent token. - /// - /// When `Tokenizer` is passed `true` for its `validate` parameter this token cannot be - /// returned: inputs that would return this token get a validation error instead. - ExpansionKind, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Token<'a> { - pub kind: TokenKind, - /// The byte index into the input where the token's content starts. - /// - /// For quoted text this means the byte after the quote. For expansions this means the byte - /// after the opening delimiter. - pub content_start: usize, - /// The inner content of the token. - /// - /// Usually this content borrows from the input but an owned value may be used in cases of - /// escaping. On Unix systems a raw token like `a\ b` has the contents `"a b"`. - pub content: Cow<'a, str>, - /// Whether the token's opening delimiter is closed. - /// - /// For example a quote `"foo"` is closed but not `"foo` or an expansion `%sh{..}` is closed - /// but not `%sh{echo {}`. - pub is_terminated: bool, -} - -impl<'a> Token<'a> { - pub fn empty_at(content_start: usize) -> Self { - Self { - kind: TokenKind::Unquoted, - content_start, - content: Cow::Borrowed(""), - is_terminated: false, - } - } - - pub fn expand(content: impl Into<Cow<'a, str>>) -> Self { - Self { - kind: TokenKind::Expand, - content_start: 0, - content: content.into(), - is_terminated: true, - } - } -} - -#[derive(Debug)] -pub struct Tokenizer<'a> { - input: &'a str, - /// Whether to return errors in the iterator for failed validations like unterminated strings - /// or expansions. When this is set to `false` the iterator will never return `Err`. - validate: bool, - /// The current byte index of the input being considered. - pos: usize, -} - -impl<'a> Tokenizer<'a> { - pub fn new(input: &'a str, validate: bool) -> Self { - Self { - input, - validate, - pos: 0, - } - } - - /// Returns the current byte index position of the parser in the input. - pub fn pos(&self) -> usize { - self.pos - } - - /// Returns the rest of the input as a single `TokenKind::Expand` token literally. - /// - /// Returns `None` if the tokenizer is already at the end of the input or advances the - /// tokenizer to the end of the input otherwise. Leading whitespace characters are skipped. - /// Quoting is not interpreted. - pub fn rest(&mut self) -> Option<Token<'a>> { - self.skip_blanks(); - - if self.pos == self.input.len() { - return None; - } - - let content_start = self.pos; - self.pos = self.input.len(); - Some(Token { - kind: TokenKind::Expand, - content_start, - content: Cow::Borrowed(&self.input[content_start..]), - is_terminated: false, - }) - } - - fn byte(&self) -> Option<u8> { - self.input.as_bytes().get(self.pos).copied() - } - - fn peek_byte(&self) -> Option<u8> { - self.input.as_bytes().get(self.pos + 1).copied() - } - - fn prev_byte(&self) -> Option<u8> { - self.pos - .checked_sub(1) - .map(|idx| self.input.as_bytes()[idx]) - } - - fn skip_blanks(&mut self) { - while let Some(b' ' | b'\t') = self.byte() { - self.pos += 1; - } - } - - fn parse_unquoted(&mut self) -> Cow<'a, str> { - // Note that `String::new` starts with no allocation. We only allocate if we see a - // backslash escape (on Unix only). - let mut escaped = String::new(); - let mut start = self.pos; - - while let Some(byte) = self.byte() { - if matches!(byte, b' ' | b'\t') { - if cfg!(unix) && self.prev_byte() == Some(b'\\') { - // Push everything up to but not including the backslash and then this - // whitespace character. - escaped.push_str(&self.input[start..self.pos - 1]); - escaped.push(byte as char); - start = self.pos + 1; - } else if escaped.is_empty() { - return Cow::Borrowed(&self.input[start..self.pos]); - } else { - break; - } - } - - self.pos += 1; - } - - // Special case for a trailing backslash on Unix: exclude the backslash from the content. - // This improves the behavior of completions like `":open a\\"` (trailing backslash). - let end = if cfg!(unix) && self.prev_byte() == Some(b'\\') { - self.pos - 1 - } else { - self.pos - }; - - if escaped.is_empty() { - assert_eq!(self.pos, self.input.len()); - Cow::Borrowed(&self.input[start..end]) - } else { - escaped.push_str(&self.input[start..end]); - Cow::Owned(escaped) - } - } - - /// Parses a string quoted by the given grapheme cluster. - /// - /// The position of the tokenizer is asserted to be immediately after the quote grapheme - /// cluster. - fn parse_quoted(&mut self, quote: u8) -> (Cow<'a, str>, bool) { - assert_eq!(self.byte(), Some(quote)); - self.pos += 1; - - let mut escaped = String::new(); - while let Some(offset) = self.input[self.pos..].find(quote as char) { - let idx = self.pos + offset; - if self.input.as_bytes().get(idx + 1) == Some("e) { - // Treat two quotes in a row as an escape. - escaped.push_str(&self.input[self.pos..idx + 1]); - // Advance past the escaped quote. - self.pos = idx + 2; - } else { - // Otherwise this quote string is finished. - let quoted = if escaped.is_empty() { - Cow::Borrowed(&self.input[self.pos..idx]) - } else { - escaped.push_str(&self.input[self.pos..idx]); - Cow::Owned(escaped) - }; - // Advance past the closing quote. - self.pos = idx + 1; - return (quoted, true); - } - } - - let quoted = if escaped.is_empty() { - Cow::Borrowed(&self.input[self.pos..]) - } else { - escaped.push_str(&self.input[self.pos..]); - Cow::Owned(escaped) - }; - self.pos = self.input.len(); - - (quoted, false) - } - - /// Parses the percent token expansion under the tokenizer's cursor. - /// - /// This function should only be called when the tokenizer's cursor is on a non-escaped - /// percent token. - pub fn parse_percent_token(&mut self) -> Option<Result<Token<'a>, ParseArgsError<'a>>> { - assert_eq!(self.byte(), Some(b'%')); - - self.pos += 1; - let kind_start = self.pos; - self.pos += self.input[self.pos..] - .bytes() - .take_while(|b| b.is_ascii_lowercase()) - .count(); - let kind = &self.input[kind_start..self.pos]; - - let (open, close) = match self.byte() { - // We support a couple of hard-coded chars only to make sure we can provide more - // useful errors and avoid weird behavior in case of typos. These should cover - // practical cases. - Some(b'(') => (b'(', b')'), - Some(b'[') => (b'[', b']'), - Some(b'{') => (b'{', b'}'), - Some(b'<') => (b'<', b'>'), - Some(b'\'') => (b'\'', b'\''), - Some(b'\"') => (b'\"', b'\"'), - Some(b'|') => (b'|', b'|'), - Some(_) | None => { - return Some(if self.validate { - Err(ParseArgsError::MissingExpansionDelimiter { expansion: kind }) - } else { - Ok(Token { - kind: TokenKind::ExpansionKind, - content_start: kind_start, - content: Cow::Borrowed(kind), - is_terminated: false, - }) - }); - } - }; - // The content start for expansions is the start of the content - after the opening - // delimiter grapheme. - let content_start = self.pos + 1; - let kind = match ExpansionKind::from_kind(kind) { - Some(kind) => TokenKind::Expansion(kind), - None if self.validate => { - return Some(Err(ParseArgsError::UnknownExpansion { kind })); - } - None => TokenKind::Expand, - }; - - let (content, is_terminated) = if open == close { - self.parse_quoted(open) - } else { - self.parse_quoted_balanced(open, close) - }; - - let token = Token { - kind, - content_start, - content, - is_terminated, - }; - - if self.validate && !is_terminated { - return Some(Err(ParseArgsError::UnterminatedToken { token })); - } - - Some(Ok(token)) - } - - /// Parse the next string under the cursor given an open and closing pair. - /// - /// The open and closing pair are different ASCII characters. The cursor is asserted to be - /// immediately after the opening delimiter. - /// - /// This function parses with nesting support. `%sh{echo {hello}}` for example should consume - /// the entire input and not quit after the first '}' character is found. - fn parse_quoted_balanced(&mut self, open: u8, close: u8) -> (Cow<'a, str>, bool) { - assert_eq!(self.byte(), Some(open)); - self.pos += 1; - let start = self.pos; - let mut level = 1; - - while let Some(offset) = self.input[self.pos..].find([open as char, close as char]) { - let idx = self.pos + offset; - // Move past the delimiter. - self.pos = idx + 1; - - let byte = self.input.as_bytes()[idx]; - if byte == open { - level += 1; - } else if byte == close { - level -= 1; - if level == 0 { - break; - } - } else { - unreachable!() - } - } - - let is_terminated = level == 0; - let end = if is_terminated { - // Exclude the closing delimiter from the token's content. - self.pos - 1 - } else { - // When the token is not closed, advance to the end of the input. - self.pos = self.input.len(); - self.pos - }; - - (Cow::Borrowed(&self.input[start..end]), is_terminated) - } -} - -impl<'a> Iterator for Tokenizer<'a> { - type Item = Result<Token<'a>, ParseArgsError<'a>>; - - fn next(&mut self) -> Option<Self::Item> { - self.skip_blanks(); - - let byte = self.byte()?; - match byte { - b'"' | b'\'' | b'`' => { - let content_start = self.pos + 1; - let (content, is_terminated) = self.parse_quoted(byte); - let token = Token { - kind: match byte { - b'"' => TokenKind::Expand, - b'\'' => TokenKind::Quoted(Quote::Single), - b'`' => TokenKind::Quoted(Quote::Backtick), - _ => unreachable!(), - }, - content_start, - content, - is_terminated, - }; - - Some(if self.validate && !is_terminated { - Err(ParseArgsError::UnterminatedToken { token }) - } else { - Ok(token) - }) - } - b'%' => self.parse_percent_token(), - _ => { - let content_start = self.pos; - - // Allow backslash escaping on Unix for quotes or expansions - if cfg!(unix) - && byte == b'\\' - && matches!(self.peek_byte(), Some(b'"' | b'\'' | b'`' | b'%')) - { - self.pos += 1; - } - - Some(Ok(Token { - kind: TokenKind::Unquoted, - content_start, - content: self.parse_unquoted(), - is_terminated: false, - })) - } - } - } -} - -#[derive(Debug, Default, Clone, Copy)] -pub enum CompletionState { - #[default] - Positional, - Flag(Option<Flag>), - FlagArgument(Flag), -} - -/// A set of arguments provided to a command on the command line. -/// -/// Regular arguments are called "positional" arguments (or "positionals" for short). Command line -/// input might also specify "flags" which can modify a command's behavior. -/// -/// ```rust,ignore -/// // Say that the command accepts a "bar" flag which doesn't accept an argument itself. -/// // This input has two positionals, "foo" and "baz" and one flag "--bar". -/// let args = Args::parse("foo --bar baz", /* .. */); -/// // `Args` may be treated like a slice to access positionals. -/// assert_eq!(args.len(), 2); -/// assert_eq!(&args[0], "foo"); -/// assert_eq!(&args[1], "baz"); -/// // Use `has_flag` or `get_flag` to access flags. -/// assert!(args.has_flag("bar")); -/// ``` -/// -/// The `Args` type can be treated mostly the same as a slice when accessing positional arguments. -/// Common slice methods like `len`, `get`, `first` and `join` only expose positional arguments. -/// Additionally, common syntax like `for arg in args` or `&args[idx]` is supported for accessing -/// positional arguments. -/// -/// To look up flags, use `Args::get_flag` for flags which should accept an argument or -/// `Args::has_flag` for boolean flags. -/// -/// The way that `Args` is parsed from the input depends on a command's `Signature`. See the -/// `Signature` type for more details. -#[derive(Debug)] -pub struct Args<'a> { - signature: Signature, - /// Whether to validate the arguments. - /// See the `ParseArgsError` type for the validations. - validate: bool, - /// Whether args pushed with `Self::push` should be treated as positionals even if they - /// start with '-'. - only_positionals: bool, - state: CompletionState, - positionals: Vec<Cow<'a, str>>, - flags: HashMap<&'static str, Cow<'a, str>>, -} - -impl Default for Args<'_> { - fn default() -> Self { - Self { - signature: Signature::DEFAULT, - validate: Default::default(), - only_positionals: Default::default(), - state: CompletionState::default(), - positionals: Default::default(), - flags: Default::default(), - } - } -} - -impl<'a> Args<'a> { - pub fn new(signature: Signature, validate: bool) -> Self { - Self { - signature, - validate, - only_positionals: false, - positionals: Vec::new(), - flags: HashMap::new(), - state: CompletionState::default(), - } - } - - /// Reads the next token out of the given parser. - /// - /// If the command's signature sets a maximum number of positionals (via `raw_after`) then - /// the token may contain the rest of the parser's input. - pub fn read_token<'p>( - &mut self, - parser: &mut Tokenizer<'p>, - ) -> Result<Option<Token<'p>>, ParseArgsError<'p>> { - if self - .signature - .raw_after - .is_some_and(|max| self.len() >= max as usize) - { - self.only_positionals = true; - Ok(parser.rest()) - } else { - parser.next().transpose() - } - } - - /// Parses the given command line according to a command's signature. - /// - /// The `try_map_fn` function can be used to try changing each token before it is considered - /// as an argument - this is used for variable expansion. - pub fn parse<M>( - line: &'a str, - signature: Signature, - validate: bool, - mut try_map_fn: M, - ) -> Result<Self, Box<dyn Error + 'a>> - where - // Note: this is a `FnMut` in case we decide to allow caching expansions in the future. - // The `mut` is not currently used. - M: FnMut(Token<'a>) -> Result<Cow<'a, str>, Box<dyn Error>>, - { - let mut tokenizer = Tokenizer::new(line, validate); - let mut args = Self::new(signature, validate); - - while let Some(token) = args.read_token(&mut tokenizer)? { - let arg = try_map_fn(token)?; - args.push(arg)?; - } - - args.finish()?; - - Ok(args) - } - - /// Adds the given argument token. - /// - /// Once all arguments have been added, `Self::finish` should be called to perform any - /// closing validations. - pub fn push(&mut self, arg: Cow<'a, str>) -> Result<(), ParseArgsError<'a>> { - if !self.only_positionals && arg == "--" { - // "--" marks the end of flags, everything after is a positional even if it starts - // with '-'. - self.only_positionals = true; - self.state = CompletionState::Flag(None); - } else if let Some(flag) = self.flag_awaiting_argument() { - // If the last token was a flag which accepts an argument, treat this token as a flag - // argument. - self.flags.insert(flag.name, arg); - self.state = CompletionState::FlagArgument(flag); - } else if !self.only_positionals && arg.starts_with('-') { - // If the token starts with '-' and we are not only accepting positional arguments, - // treat this token as a flag. - let flag = if let Some(longhand) = arg.strip_prefix("--") { - self.signature - .flags - .iter() - .find(|flag| flag.name == longhand) - } else { - let shorthand = arg.strip_prefix('-').unwrap(); - self.signature.flags.iter().find(|flag| { - flag.alias - .is_some_and(|ch| shorthand == ch.encode_utf8(&mut [0; 4])) - }) - }; - - let Some(flag) = flag else { - if self.validate { - return Err(ParseArgsError::UnknownFlag { text: arg }); - } - - self.positionals.push(arg); - self.state = CompletionState::Flag(None); - return Ok(()); - }; - - if self.validate && self.flags.contains_key(flag.name) { - return Err(ParseArgsError::DuplicatedFlag { flag: flag.name }); - } - - self.flags.insert(flag.name, Cow::Borrowed("")); - self.state = CompletionState::Flag(Some(*flag)); - } else { - // Otherwise this token is a positional argument. - self.positionals.push(arg); - self.state = CompletionState::Positional; - } - - Ok(()) - } - - /// Performs any validations that must be done after the input args are finished being pushed - /// with `Self::push`. - fn finish(&self) -> Result<(), ParseArgsError<'a>> { - if !self.validate { - return Ok(()); - }; - - if let Some(flag) = self.flag_awaiting_argument() { - return Err(ParseArgsError::FlagMissingArgument { flag: flag.name }); - } - self.signature - .check_positional_count(self.positionals.len())?; - - Ok(()) - } - - fn flag_awaiting_argument(&self) -> Option<Flag> { - match self.state { - CompletionState::Flag(flag) => flag.filter(|f| f.completions.is_some()), - _ => None, - } - } - - /// Returns the kind of argument the last token is considered to be. - /// - /// For example if the last argument in the command line is `--foo` then the argument may be - /// considered to be a flag. - pub fn completion_state(&self) -> CompletionState { - self.state - } - - /// Returns the number of positionals supplied in the input. - /// - /// This number does not account for any flags passed in the input. - pub fn len(&self) -> usize { - self.positionals.len() - } - - /// Checks whether the arguments contain no positionals. - /// - /// Note that this function returns `true` if there are no positional arguments even if the - /// input contained flags. - pub fn is_empty(&self) -> bool { - self.positionals.is_empty() - } - - /// Gets the first positional argument, if one exists. - pub fn first(&'a self) -> Option<&'a str> { - self.positionals.first().map(AsRef::as_ref) - } - - /// Gets the positional argument at the given index, if one exists. - pub fn get(&'a self, index: usize) -> Option<&'a str> { - self.positionals.get(index).map(AsRef::as_ref) - } - - /// Flattens all positional arguments together with the given separator between each - /// positional. - pub fn join(&self, sep: &str) -> String { - self.positionals.join(sep) - } - - /// Returns an iterator over all positional arguments. - pub fn iter(&self) -> slice::Iter<'_, Cow<'_, str>> { - self.positionals.iter() - } - - /// Gets the value associated with a flag's long name if the flag was provided. - /// - /// This function should be preferred over [Self::has_flag] when the flag accepts an argument. - pub fn get_flag(&'a self, name: &'static str) -> Option<&'a str> { - debug_assert!( - self.signature.flags.iter().any(|flag| flag.name == name), - "flag '--{name}' does not belong to the command's signature" - ); - debug_assert!( - self.signature - .flags - .iter() - .any(|flag| flag.name == name && flag.completions.is_some()), - "Args::get_flag was used for '--{name}' but should only be used for flags with arguments, use Args::has_flag instead" - ); - - self.flags.get(name).map(AsRef::as_ref) - } - - /// Checks if a flag was provided in the arguments. - /// - /// This function should be preferred over [Self::get_flag] for boolean flags - flags that - /// either are present or not. - pub fn has_flag(&self, name: &'static str) -> bool { - debug_assert!( - self.signature.flags.iter().any(|flag| flag.name == name), - "flag '--{name}' does not belong to the command's signature" - ); - debug_assert!( - self.signature - .flags - .iter() - .any(|flag| flag.name == name && flag.completions.is_none()), - "Args::has_flag was used for '--{name}' but should only be used for flags without arguments, use Args::get_flag instead" - ); - - self.flags.contains_key(name) - } -} - -// `arg[n]` -impl ops::Index<usize> for Args<'_> { - type Output = str; - - fn index(&self, index: usize) -> &Self::Output { - self.positionals[index].as_ref() - } -} - -// `for arg in args { .. }` -impl<'a> IntoIterator for Args<'a> { - type Item = Cow<'a, str>; - type IntoIter = vec::IntoIter<Cow<'a, str>>; - - fn into_iter(self) -> Self::IntoIter { - self.positionals.into_iter() - } -} - -// `for arg in &args { .. }` -impl<'i, 'a> IntoIterator for &'i Args<'a> { - type Item = &'i Cow<'a, str>; - type IntoIter = slice::Iter<'i, Cow<'a, str>>; - - fn into_iter(self) -> Self::IntoIter { - self.positionals.iter() - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[track_caller] - fn assert_tokens(input: &str, expected: &[&str]) { - let actual: Vec<_> = Tokenizer::new(input, true) - .map(|arg| arg.unwrap().content) - .collect(); - let actual: Vec<_> = actual.iter().map(|c| c.as_ref()).collect(); - - assert_eq!(actual.as_slice(), expected); - } - - #[track_caller] - fn assert_incomplete_tokens(input: &str, expected: &[&str]) { - assert!( - Tokenizer::new(input, true).collect::<Result<Vec<_>, _>>().is_err(), - "`assert_incomplete_tokens` only accepts input that fails validation, consider using `assert_tokens` instead" - ); - - let actual: Vec<_> = Tokenizer::new(input, false) - .map(|arg| arg.unwrap().content) - .collect(); - let actual: Vec<_> = actual.iter().map(|c| c.as_ref()).collect(); - - assert_eq!(actual.as_slice(), expected); - } - - #[test] - fn tokenize_unquoted() { - assert_tokens("", &[]); - assert_tokens("hello", &["hello"]); - assert_tokens("hello world", &["hello", "world"]); - // Any amount of whitespace is considered a separator. - assert_tokens("hello\t \tworld", &["hello", "world"]); - } - - // This escaping behavior is specific to Unix systems. - #[cfg(unix)] - #[test] - fn tokenize_backslash_unix() { - assert_tokens(r#"hello\ world"#, &["hello world"]); - assert_tokens(r#"one\ two three"#, &["one two", "three"]); - assert_tokens(r#"one two\ three"#, &["one", "two three"]); - // Trailing backslash is ignored - this improves completions. - assert_tokens(r#"hello\"#, &["hello"]); - // The backslash at the start of the double quote makes the quote be treated as raw. - // For the backslash before the ending quote the token is already considered raw so the - // backslash and quote are treated literally. - assert_tokens( - r#"echo \"hello world\""#, - &["echo", r#""hello"#, r#"world\""#], - ); - } - - #[test] - fn tokenize_backslash() { - assert_tokens(r#"\n"#, &["\\n"]); - assert_tokens(r#"'\'"#, &["\\"]); - } - - #[test] - fn tokenize_quoting() { - // Using a quote character twice escapes it. - assert_tokens(r#"''"#, &[""]); - assert_tokens(r#""""#, &[""]); - assert_tokens(r#"``"#, &[""]); - assert_tokens(r#"echo """#, &["echo", ""]); - - assert_tokens(r#"'hello'"#, &["hello"]); - assert_tokens(r#"'hello world'"#, &["hello world"]); - - assert_tokens(r#""hello "" world""#, &["hello \" world"]); - } - - #[test] - fn tokenize_percent() { - // Pair delimiters: - assert_tokens(r#"echo %{hello world}"#, &["echo", "hello world"]); - assert_tokens(r#"echo %[hello world]"#, &["echo", "hello world"]); - assert_tokens(r#"echo %(hello world)"#, &["echo", "hello world"]); - assert_tokens(r#"echo %<hello world>"#, &["echo", "hello world"]); - assert_tokens(r#"echo %|hello world|"#, &["echo", "hello world"]); - assert_tokens(r#"echo %'hello world'"#, &["echo", "hello world"]); - assert_tokens(r#"echo %"hello world""#, &["echo", "hello world"]); - // When invoking a command, double percents can be used within a string as an escape for - // the percent. This is done in the expansion code though, not in the parser here. - assert_tokens(r#"echo "%%hello world""#, &["echo", "%%hello world"]); - // Different kinds of quotes nested: - assert_tokens( - r#"echo "%sh{echo 'hello world'}""#, - &["echo", r#"%sh{echo 'hello world'}"#], - ); - // Nesting of the expansion delimiter: - assert_tokens(r#"echo %{hello {x} world}"#, &["echo", "hello {x} world"]); - assert_tokens( - r#"echo %{hello {{😎}} world}"#, - &["echo", "hello {{😎}} world"], - ); - - // Balanced nesting: - assert_tokens( - r#"echo %{hello {}} world}"#, - &["echo", "hello {}", "world}"], - ); - - // Recursive expansions: - assert_tokens( - r#"echo %sh{echo "%{cursor_line}"}"#, - &["echo", r#"echo "%{cursor_line}""#], - ); - // Completion should provide variable names here. (Unbalanced nesting) - assert_incomplete_tokens(r#"echo %sh{echo "%{c"#, &["echo", r#"echo "%{c"#]); - assert_incomplete_tokens(r#"echo %{hello {{} world}"#, &["echo", "hello {{} world}"]); - } - - fn parse_signature<'a>( - input: &'a str, - signature: Signature, - ) -> Result<Args<'a>, Box<dyn std::error::Error + 'a>> { - Args::parse(input, signature, true, |token| Ok(token.content)) - } - - #[test] - fn signature_validation_positionals() { - let signature = Signature { - positionals: (2, Some(3)), - ..Signature::DEFAULT - }; - - assert!(parse_signature("hello world", signature).is_ok()); - assert!(parse_signature("foo bar baz", signature).is_ok()); - assert!(parse_signature(r#"a "b c" d"#, signature).is_ok()); - - assert!(parse_signature("hello", signature).is_err()); - assert!(parse_signature("foo bar baz quiz", signature).is_err()); - - let signature = Signature { - positionals: (1, None), - ..Signature::DEFAULT - }; - - assert!(parse_signature("a", signature).is_ok()); - assert!(parse_signature("a b", signature).is_ok()); - assert!(parse_signature(r#"a "b c" d"#, signature).is_ok()); - - assert!(parse_signature("", signature).is_err()); - } - - #[test] - fn flags() { - let signature = Signature { - positionals: (1, Some(2)), - flags: &[ - Flag { - name: "foo", - alias: Some('f'), - doc: "", - completions: None, - }, - Flag { - name: "bar", - alias: Some('b'), - doc: "", - completions: Some(&[]), - }, - ], - ..Signature::DEFAULT - }; - - let args = parse_signature("hello", signature).unwrap(); - assert_eq!(args.len(), 1); - assert_eq!(&args[0], "hello"); - assert!(!args.has_flag("foo")); - assert!(args.get_flag("bar").is_none()); - - let args = parse_signature("--bar abcd hello world --foo", signature).unwrap(); - assert_eq!(args.len(), 2); - assert_eq!(&args[0], "hello"); - assert_eq!(&args[1], "world"); - assert!(args.has_flag("foo")); - assert_eq!(args.get_flag("bar"), Some("abcd")); - - let args = parse_signature("hello -f -b abcd world", signature).unwrap(); - assert_eq!(args.len(), 2); - assert_eq!(&args[0], "hello"); - assert_eq!(&args[1], "world"); - assert!(args.has_flag("foo")); - assert_eq!(args.get_flag("bar"), Some("abcd")); - - // The signature requires at least one positional. - assert!(parse_signature("--foo", signature).is_err()); - // And at most two. - assert!(parse_signature("abc --bar baz def efg", signature).is_err()); - - let args = parse_signature(r#"abc -b "xyz 123" def"#, signature).unwrap(); - assert_eq!(args.len(), 2); - assert_eq!(&args[0], "abc"); - assert_eq!(&args[1], "def"); - assert_eq!(args.get_flag("bar"), Some("xyz 123")); - - // Unknown flags are validation errors. - assert!(parse_signature(r#"foo --quiz"#, signature).is_err()); - // Duplicated flags are parsing errors. - assert!(parse_signature(r#"--foo bar --foo"#, signature).is_err()); - assert!(parse_signature(r#"-f bar --foo"#, signature).is_err()); - - // "--" can be used to mark the end of flags. Everything after is considered a positional. - let args = parse_signature(r#"hello --bar baz -- --foo"#, signature).unwrap(); - assert_eq!(args.len(), 2); - assert_eq!(&args[0], "hello"); - assert_eq!(&args[1], "--foo"); - assert_eq!(args.get_flag("bar"), Some("baz")); - assert!(!args.has_flag("foo")); - } - - #[test] - fn raw_after() { - let signature = Signature { - positionals: (1, Some(1)), - raw_after: Some(0), - ..Signature::DEFAULT - }; - - // All quoting and escaping is treated literally in raw mode. - let args = parse_signature(r#"'\'"#, signature).unwrap(); - assert_eq!(args.len(), 1); - assert_eq!(&args[0], "'\\'"); - let args = parse_signature(r#"\''"#, signature).unwrap(); - assert_eq!(args.len(), 1); - assert_eq!(&args[0], "\\''"); - - // Leading space is trimmed. - let args = parse_signature(r#" %sh{foo}"#, signature).unwrap(); - assert_eq!(args.len(), 1); - assert_eq!(&args[0], "%sh{foo}"); - - let signature = Signature { - positionals: (1, Some(2)), - raw_after: Some(1), - ..Signature::DEFAULT - }; - - let args = parse_signature("foo", signature).unwrap(); - assert_eq!(args.len(), 1); - assert_eq!(&args[0], "foo"); - - // "--bar" is treated as a positional. - let args = parse_signature("foo --bar", signature).unwrap(); - assert_eq!(args.len(), 2); - assert_eq!(&args[0], "foo"); - assert_eq!(&args[1], "--bar"); - - let args = parse_signature("abc def ghi", signature).unwrap(); - assert_eq!(args.len(), 2); - assert_eq!(&args[0], "abc"); - assert_eq!(&args[1], "def ghi"); - - let args = parse_signature("rulers [20, 30]", signature).unwrap(); - assert_eq!(args.len(), 2); - assert_eq!(&args[0], "rulers"); - assert_eq!(&args[1], "[20, 30]"); - - let args = - parse_signature(r#"gutters ["diff"] ["diff", "diagnostics"]"#, signature).unwrap(); - assert_eq!(args.len(), 2); - assert_eq!(&args[0], "gutters"); - assert_eq!(&args[1], r#"["diff"] ["diff", "diagnostics"]"#); - } -} |