Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/shellwords.rs')
| -rw-r--r-- | helix-core/src/shellwords.rs | 945 |
1 files changed, 253 insertions, 692 deletions
diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs index edfd9ad1..9d873c36 100644 --- a/helix-core/src/shellwords.rs +++ b/helix-core/src/shellwords.rs @@ -1,358 +1,6 @@ -use smartstring::{LazyCompact, SmartString}; use std::borrow::Cow; -/// A utility for parsing shell-like command lines. -/// -/// The `Shellwords` struct takes an input string and allows extracting the command and its arguments. -/// -/// # Features -/// -/// - Parses command and arguments from input strings. -/// - Supports single, double, and backtick quoted arguments. -/// - Respects backslash escaping in arguments. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// # use helix_core::shellwords::Shellwords; -/// let shellwords = Shellwords::from(":o helix-core/src/shellwords.rs"); -/// assert_eq!(":o", shellwords.command()); -/// assert_eq!("helix-core/src/shellwords.rs", shellwords.args().next().unwrap()); -/// ``` -/// -/// Empty command: -/// -/// ``` -/// # use helix_core::shellwords::Shellwords; -/// let shellwords = Shellwords::from(" "); -/// assert!(shellwords.command().is_empty()); -/// ``` -/// -/// # Iterator -/// -/// The `args` method returns a non-allocating iterator, `Args`, over the arguments of the input. -/// -/// ``` -/// # use helix_core::shellwords::Shellwords; -/// let shellwords = Shellwords::from(":o a b c"); -/// let mut args = shellwords.args(); -/// assert_eq!(Some("a"), args.next()); -/// assert_eq!(Some("b"), args.next()); -/// assert_eq!(Some("c"), args.next()); -/// assert_eq!(None, args.next()); -/// ``` -#[derive(Clone, Copy)] -pub struct Shellwords<'a> { - input: &'a str, -} - -impl<'a> From<&'a str> for Shellwords<'a> { - #[inline] - fn from(input: &'a str) -> Self { - Self { input } - } -} - -impl<'a> From<&'a String> for Shellwords<'a> { - #[inline] - fn from(input: &'a String) -> Self { - Self { input } - } -} - -impl<'a> From<&'a Cow<'a, str>> for Shellwords<'a> { - #[inline] - fn from(input: &'a Cow<str>) -> Self { - Self { input } - } -} - -impl<'a> Shellwords<'a> { - #[inline] - #[must_use] - pub fn command(&self) -> &str { - self.input - .split_once(' ') - .map_or(self.input, |(command, _)| command) - } - - #[inline] - #[must_use] - pub fn args(&self) -> Args<'a> { - let args = self.input.split_once(' ').map_or("", |(_, args)| args); - Args::parse(args) - } - - #[inline] - pub fn input(&self) -> &str { - self.input - } - - /// Checks that the input ends with a whitespace character which is not escaped. - /// - /// # Examples - /// - /// ```rust - /// # use helix_core::shellwords::Shellwords; - /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true); - /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true); - /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true); - /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false); - /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), true); - /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false); - /// ``` - #[inline] - pub fn ends_with_whitespace(&self) -> bool { - self.input.ends_with(' ') - } -} - -/// An iterator over an input string which yields arguments. -/// -/// Splits on whitespace, but respects quoted substrings (using double quotes, single quotes, or backticks). -#[derive(Debug, Clone)] -pub struct Args<'a> { - input: &'a str, - idx: usize, - start: usize, -} - -impl<'a> Args<'a> { - #[inline] - fn parse(input: &'a str) -> Self { - Self { - input, - idx: 0, - start: 0, - } - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.input.is_empty() - } - - /// Returns the args exactly as input. - /// - /// # Examples - /// ``` - /// # use helix_core::shellwords::Args; - /// let args = Args::from(r#"sed -n "s/test t/not /p""#); - /// assert_eq!(r#"sed -n "s/test t/not /p""#, args.raw()); - /// - /// let args = Args::from(r#"cat "file name with space.txt""#); - /// assert_eq!(r#"cat "file name with space.txt""#, args.raw()); - /// ``` - #[inline] - pub fn raw(&self) -> &str { - self.input - } - - /// Returns the remainder of the args exactly as input. - /// - /// # Examples - /// ``` - /// # use helix_core::shellwords::Args; - /// let mut args = Args::from(r#"sed -n "s/test t/not /p""#); - /// assert_eq!("sed", args.next().unwrap()); - /// assert_eq!(r#"-n "s/test t/not /p""#, args.rest()); - /// ``` - /// - /// Never calling `next` and using `rest` is functionally equivalent to calling `raw`. - #[inline] - pub fn rest(&self) -> &str { - &self.input[self.idx..] - } - - /// Returns a reference to the `next()` value without advancing the iterator. - /// - /// Unlike `std::iter::Peakable::peek` this does not return a double reference, `&&str` - /// but a normal `&str`. - #[inline] - #[must_use] - pub fn peek(&self) -> Option<&str> { - self.clone().next() - } - - /// Returns the total number of arguments given in a command. - /// - /// This count is aware of all parsing rules for `Args`. - #[must_use] - pub fn arg_count(&self) -> usize { - Self { - input: self.input, - idx: 0, - start: 0, - } - .fold(0, |acc, _| acc + 1) - } - - /// Convenient function to return an empty `Args`. - /// - /// When used in any iteration, it will always return `None`. - #[inline(always)] - pub const fn empty() -> Self { - Self { - input: "", - idx: 0, - start: 0, - } - } -} - -impl<'a> Iterator for Args<'a> { - type Item = &'a str; - - #[inline] - #[allow(clippy::too_many_lines)] - fn next(&mut self) -> Option<Self::Item> { - // The parser loop is split into three main blocks to handle different types of input processing: - // - // 1. Quote block: - // - Detects an unescaped quote character, either starting an in-quote scan or, if already in-quote, - // locating the closing quote to return the quoted argument. - // - Handles cases where mismatched quotes are ignored and when quotes appear as the last character. - // - // 2. Whitespace block: - // - Handles arguments separated by whitespace (space or tab), respecting quotes so quoted phrases - // remain grouped together. - // - Splits arguments by whitespace when outside of a quoted context and updates boundaries accordingly. - // - // 3. Catch-all block: - // - Handles any other character, updating the `is_escaped` status if a backslash is encountered, - // advancing the loop to the next character. - - let bytes = self.input.as_bytes(); - let mut in_quotes = false; - let mut quote = b'\0'; - let mut is_escaped = false; - - while self.idx < bytes.len() { - match bytes[self.idx] { - b'"' | b'\'' | b'`' if !is_escaped => { - if in_quotes { - // Found the proper closing quote, so can return the arg and advance the state along. - if bytes[self.idx] == quote { - let arg = Some(&self.input[self.start..self.idx]); - self.idx += 1; - self.start = self.idx; - return arg; - } - // If quote does not match the type of the opening quote, then do nothing and advance. - self.idx += 1; - } else if self.idx == bytes.len() - 1 { - // Special case for when a quote is the last input in args. - // e.g: :read "file with space.txt"" - // This preserves the quote as an arg: - // - `file with space` - // - `"` - let arg = Some(&self.input[self.idx..]); - self.idx = bytes.len(); - self.start = bytes.len(); - return arg; - } else { - // Found opening quote. - in_quotes = true; - // Kind of quote that was found. - quote = bytes[self.idx]; - - if self.start < self.idx { - // When part of the input ends in a quote, `one two" three`, this properly returns the `two` - // before advancing to the quoted arg for the next iteration: - // - `one` <- previous arg - // - `two` <- this step - // - ` three` <- next arg - let arg = Some(&self.input[self.start..self.idx]); - self.idx += 1; - self.start = self.idx; - return arg; - } - - // Advance after quote. - self.idx += 1; - // Exclude quote from arg output. - self.start = self.idx; - } - } - b' ' | b'\t' if !in_quotes => { - // Found a true whitespace separator that wasn't inside quotes. - - // Check if there is anything to return or if its just advancing over whitespace. - // `start` will only be less than `idx` when there is something to return. - if self.start < self.idx { - let arg = Some(&self.input[self.start..self.idx]); - self.idx += 1; - self.start = self.idx; - return arg; - } - - // Advance beyond the whitespace. - self.idx += 1; - - // This is where `start` will be set to the start of an arg boundary, either encountering a word - // boundary or a quote boundary. If it finds a quote, then it will be advanced again in that part - // of the code. Either way, all that remains for the check above will be to return a full arg. - self.start = self.idx; - } - _ => { - // If previous loop didn't find any backslash and was already escaped it will change to false - // as the backslash chain was broken. - // - // If the previous loop had no backslash escape, and found one this iteration, then its the start - // of an escape chain. - is_escaped = match (is_escaped, bytes[self.idx]) { - (false, b'\\') => true, // Set `is_escaped` if the current byte is a backslash - _ => false, //Reset `is_escaped` if it was true, otherwise keep `is_escaped` as false - }; - - // Advance to next `char`. - self.idx += 1; - } - } - } - - // Fallback that catches when the loop would have exited but failed to return the arg between start and the end. - if self.start < bytes.len() { - let arg = Some(&self.input[self.start..]); - self.start = bytes.len(); - return arg; - } - - // All args have been parsed. - None - } - - fn count(self) -> usize - where - Self: Sized, - { - panic!("use `arg_count` instead to get the number of arguments."); - } -} - -impl<'a> From<&'a String> for Args<'a> { - fn from(args: &'a String) -> Self { - Args::parse(args) - } -} - -impl<'a> From<&'a str> for Args<'a> { - fn from(args: &'a str) -> Self { - Args::parse(args) - } -} - -impl<'a> From<&'a Cow<'_, str>> for Args<'a> { - fn from(args: &'a Cow<str>) -> Self { - Args::parse(args) - } -} - /// Auto escape for shellwords usage. -#[inline] -#[must_use] pub fn escape(input: Cow<str>) -> Cow<str> { if !input.chars().any(|x| x.is_ascii_whitespace()) { input @@ -365,141 +13,186 @@ pub fn escape(input: Cow<str>) -> Cow<str> { buf })) } else { - Cow::Owned(format!("\"{input}\"")) + Cow::Owned(format!("\"{}\"", input)) } } -/// Unescapes a string, converting escape sequences into their literal characters. -/// -/// This function handles the following escape sequences: -/// - `\\n` is converted to `\n` (newline) -/// - `\\t` is converted to `\t` (tab) -/// - `\\u{...}` is converted to the corresponding Unicode character -/// -/// Other escape sequences, such as `\\` followed by any character not listed above, will remain unchanged. -/// -/// If input is invalid, for example if there is invalid unicode, \u{999999999}, it will return the input as is. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// # use helix_core::shellwords::unescape; -/// let unescaped = unescape("hello\\nworld"); -/// assert_eq!("hello\nworld", unescaped); -/// ``` -/// -/// Unescaping tabs: -/// -/// ``` -/// # use helix_core::shellwords::unescape; -/// let unescaped = unescape("hello\\tworld"); -/// assert_eq!("hello\tworld", unescaped); -/// ``` -/// -/// Unescaping Unicode characters: -/// -/// ``` -/// # use helix_core::shellwords::unescape; -/// let unescaped = unescape("hello\\u{1f929}world"); -/// assert_eq!("hello\u{1f929}world", unescaped); -/// assert_eq!("hello🤩world", unescaped); -/// ``` -/// -/// Handling backslashes: -/// -/// ``` -/// # use helix_core::shellwords::unescape; -/// let unescaped = unescape(r"hello\\world"); -/// assert_eq!(r"hello\\world", unescaped); -/// -/// let unescaped = unescape(r"hello\\\\world"); -/// assert_eq!(r"hello\\\\world", unescaped); -/// ``` -/// -/// # Note -/// -/// This function is opinionated, with a clear purpose of handling user input, not a general or generic unescaping utility, and does not unescape sequences like `\\'` or `\\\"`, leaving them as is. -#[inline] -#[must_use] -pub fn unescape(input: &str) -> Cow<'_, str> { - enum State { - Normal, - Escaped, - Unicode, - } +enum State { + OnWhitespace, + Unquoted, + UnquotedEscaped, + Quoted, + QuoteEscaped, + Dquoted, + DquoteEscaped, +} - let mut unescaped = String::new(); - let mut state = State::Normal; - let mut is_escaped = false; - // NOTE: Max unicode code point is U+10FFFF for a maximum of 6 chars - let mut unicode = SmartString::<LazyCompact>::new_const(); +pub struct Shellwords<'a> { + state: State, + /// Shellwords where whitespace and escapes has been resolved. + words: Vec<Cow<'a, str>>, + /// The parts of the input that are divided into shellwords. This can be + /// used to retrieve the original text for a given word by looking up the + /// same index in the Vec as the word in `words`. + parts: Vec<&'a str>, +} - for (idx, ch) in input.char_indices() { - match state { - State::Normal => match ch { - '\\' => { - if !is_escaped { - // PERF: As not every separator will be escaped, we use `String::new` as that has no initial - // allocation. If an escape is found, then we reserve capacity thats the len of the separator, - // as the new unescaped string will be at least that long. - unescaped.reserve(input.len()); - if idx > 0 { - // First time finding an escape, so all prior chars can be added to the new unescaped - // version if its not the very first char found. - unescaped.push_str(&input[0..idx]); +impl<'a> From<&'a str> for Shellwords<'a> { + fn from(input: &'a str) -> Self { + use State::*; + + let mut state = Unquoted; + let mut words = Vec::new(); + let mut parts = Vec::new(); + let mut escaped = String::with_capacity(input.len()); + + let mut part_start = 0; + let mut unescaped_start = 0; + let mut end = 0; + + for (i, c) in input.char_indices() { + state = match state { + OnWhitespace => match c { + '"' => { + end = i; + Dquoted + } + '\'' => { + end = i; + Quoted + } + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + UnquotedEscaped + } else { + OnWhitespace } } - state = State::Escaped; - is_escaped = true; - } - _ => { - if is_escaped { - unescaped.push(ch); + c if c.is_ascii_whitespace() => { + end = i; + OnWhitespace } - } - }, - State::Escaped => { - match ch { - 'n' => unescaped.push('\n'), - 't' => unescaped.push('\t'), - 'u' => { - state = State::Unicode; - continue; + _ => Unquoted, + }, + Unquoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + UnquotedEscaped + } else { + Unquoted + } } - // Uncomment if you want to handle '\\' to '\' - // '\\' => unescaped.push('\\'), - _ => { - unescaped.push('\\'); - unescaped.push(ch); + c if c.is_ascii_whitespace() => { + end = i; + OnWhitespace } - } - state = State::Normal; + _ => Unquoted, + }, + UnquotedEscaped => Unquoted, + Quoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + QuoteEscaped + } else { + Quoted + } + } + '\'' => { + end = i; + OnWhitespace + } + _ => Quoted, + }, + QuoteEscaped => Quoted, + Dquoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + DquoteEscaped + } else { + Dquoted + } + } + '"' => { + end = i; + OnWhitespace + } + _ => Dquoted, + }, + DquoteEscaped => Dquoted, + }; + + let c_len = c.len_utf8(); + if i == input.len() - c_len && end == 0 { + end = i + c_len; } - State::Unicode => match ch { - '{' => continue, - '}' => { - let Ok(digit) = u32::from_str_radix(&unicode, 16) else { - return input.into(); - }; - let Some(point) = char::from_u32(digit) else { - return input.into(); - }; - unescaped.push(point); - // Might be more unicode to unescape so clear for reuse. - unicode.clear(); - state = State::Normal; + + if end > 0 { + let esc_trim = escaped.trim(); + let inp = &input[unescaped_start..end]; + + if !(esc_trim.is_empty() && inp.trim().is_empty()) { + if esc_trim.is_empty() { + words.push(inp.into()); + parts.push(inp); + } else { + words.push([escaped, inp.into()].concat().into()); + parts.push(&input[part_start..end]); + escaped = "".to_string(); + } } - _ => unicode.push(ch), - }, + unescaped_start = i + 1; + part_start = i + 1; + end = 0; + } + } + + debug_assert!(words.len() == parts.len()); + + Self { + state, + words, + parts, } } +} - if is_escaped { - unescaped.into() - } else { - input.into() +impl<'a> Shellwords<'a> { + /// Checks that the input ends with a whitespace character which is not escaped. + /// + /// # Examples + /// + /// ```rust + /// use helix_core::shellwords::Shellwords; + /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false); + /// #[cfg(unix)] + /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false); + /// #[cfg(unix)] + /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false); + /// ``` + pub fn ends_with_whitespace(&self) -> bool { + matches!(self.state, State::OnWhitespace) + } + + /// Returns the list of shellwords calculated from the input string. + pub fn words(&self) -> &[Cow<'a, str>] { + &self.words + } + + /// Returns a list of strings which correspond to [`Self::words`] but represent the original + /// text in the input string - including escape characters - without separating whitespace. + pub fn parts(&self) -> &[&'a str] { + &self.parts } } @@ -508,202 +201,114 @@ mod test { use super::*; #[test] - fn base() { + #[cfg(windows)] + fn test_normal() { let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; let shellwords = Shellwords::from(input); - let args = vec![ - "single_word", - "twó", - "wörds", - r"\three\", - r#"\"with\"#, - r"escaping\\", + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó"), + Cow::from("wörds"), + Cow::from("\\three\\"), + Cow::from("\\"), + Cow::from("with\\ escaping\\\\"), ]; - - assert_eq!(":o", shellwords.command()); - assert_eq!(args, shellwords.args().collect::<Vec<_>>()); - } - - #[test] - fn should_have_empty_args() { - let shellwords = Shellwords::from(":quit"); - assert!( - shellwords.args().is_empty(), - "args: `{}`", - shellwords.args().next().unwrap() - ); - assert!(shellwords.args().next().is_none()); - } - - #[test] - fn should_return_empty_command() { - let shellwords = Shellwords::from(" "); - assert!(shellwords.command().is_empty()); - } - - #[test] - fn should_support_unicode_args() { - assert_eq!( - Shellwords::from(":sh echo 𒀀").args().collect::<Vec<_>>(), - &["echo", "𒀀"] - ); - assert_eq!( - Shellwords::from(":sh echo 𒀀 hello world𒀀") - .args() - .collect::<Vec<_>>(), - &["echo", "𒀀", "hello", "world𒀀"] - ); - } - - #[test] - fn should_preserve_quote_if_last_argument() { - let sh = Shellwords::from(r#":read "file with space.txt"""#); - let mut args = sh.args(); - assert_eq!("file with space.txt", args.next().unwrap()); - assert_eq!(r#"""#, args.next().unwrap()); - } - - #[test] - fn should_return_rest_of_non_closed_quote_as_one_argument() { - let sh = Shellwords::from(r":rename 'should be one \'argument"); - assert_eq!(r"should be one \'argument", sh.args().next().unwrap()); - } - - #[test] - fn should_respect_escaped_quote_in_what_looks_like_non_closed_arg() { - let sh = Shellwords::from(r":rename 'should be one \\'argument"); - let mut args = sh.args(); - assert_eq!(r"should be one \\", args.next().unwrap()); - assert_eq!(r"argument", args.next().unwrap()); - } - - #[test] - fn should_split_args() { - assert_eq!(Shellwords::from(":o a").args().collect::<Vec<_>>(), &["a"]); - assert_eq!( - Shellwords::from(":o a\\ ").args().collect::<Vec<_>>(), - &["a\\"] - ); - } - - #[test] - fn should_parse_args_even_with_leading_whitespace() { - // Three spaces - assert_eq!( - Shellwords::from(":o a").args().collect::<Vec<_>>(), - &["a"] - ); + // TODO test is_owned and is_borrowed, once they get stabilized. + assert_eq!(expected, result); } #[test] - fn should_peek_next_arg_and_not_consume() { - let mut args = Shellwords::from(":o a").args(); - - assert_eq!(Some("a"), args.peek()); - assert_eq!(Some("a"), args.next()); - assert_eq!(None, args.next()); + #[cfg(unix)] + fn test_normal() { + let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; + let shellwords = Shellwords::from(input); + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó"), + Cow::from("wörds"), + Cow::from(r#"three "with escaping\"#), + ]; + // TODO test is_owned and is_borrowed, once they get stabilized. + assert_eq!(expected, result); } #[test] - fn should_parse_single_quotes_while_respecting_escapes() { + #[cfg(unix)] + fn test_quoted() { let quoted = r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#; let shellwords = Shellwords::from(quoted); - let result = shellwords.args().collect::<Vec<_>>(); + let result = shellwords.words().to_vec(); let expected = vec![ - "single_word", - "twó wörds", - "", - " ", - r#"\three\' \"with\ escaping\\"#, - "quote incomplete", + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó wörds"), + Cow::from(r#"three' "with escaping\"#), + Cow::from("quote incomplete"), ]; assert_eq!(expected, result); } #[test] - fn should_parse_double_quotes_while_respecting_escapes() { + #[cfg(unix)] + fn test_dquoted() { let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#; let shellwords = Shellwords::from(dquoted); - let result = shellwords.args().collect::<Vec<_>>(); + let result = shellwords.words().to_vec(); let expected = vec![ - "single_word", - "twó wörds", - "", - " ", - r#"\three\' \"with\ escaping\\"#, - "dquote incomplete", + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó wörds"), + Cow::from(r#"three' "with escaping\"#), + Cow::from("dquote incomplete"), ]; assert_eq!(expected, result); } #[test] - fn should_respect_escapes_with_mixed_quotes() { + #[cfg(unix)] + fn test_mixed() { let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#; let shellwords = Shellwords::from(dquoted); - let result = shellwords.args().collect::<Vec<_>>(); + let result = shellwords.words().to_vec(); let expected = vec![ - "single_word", - "twó wörds", - r#"\three\' \"with\ escaping\\"#, - "no space before", - "and after", - "$#%^@", - "%^&(%^", - r")(*&^%", - r"a\\\\\b", - // Last ' is important, as if the user input an accidental quote at the end, this should be checked in - // commands where there should only be one input and return an error rather than silently succeed. - "'", + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó wörds"), + Cow::from("three' \"with escaping\\"), + Cow::from("no space before"), + Cow::from("and after"), + Cow::from("$#%^@"), + Cow::from("%^&(%^"), + Cow::from(")(*&^%"), + Cow::from(r#"a\\b"#), + //last ' just changes to quoted but since we dont have anything after it, it should be ignored ]; assert_eq!(expected, result); } #[test] - fn should_return_rest() { - let input = r#":set statusline.center ["file-type","file-encoding"]"#; + fn test_lists() { + let input = + r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#; let shellwords = Shellwords::from(input); - let mut args = shellwords.args(); - assert_eq!(":set", shellwords.command()); - assert_eq!(Some("statusline.center"), args.next()); - assert_eq!(r#"["file-type","file-encoding"]"#, args.rest()); - } - - #[test] - fn should_return_no_args() { - let mut args = Args::parse(""); - assert!(args.next().is_none()); - assert!(args.is_empty()); - assert!(args.arg_count() == 0); - } - - #[test] - fn should_leave_escaped_quotes() { - let input = r#"\" \` \' \"with \'with \`with"#; - let result = Args::parse(input).collect::<Vec<_>>(); - assert_eq!(r#"\""#, result[0]); - assert_eq!(r"\`", result[1]); - assert_eq!(r"\'", result[2]); - assert_eq!(r#"\"with"#, result[3]); - assert_eq!(r"\'with", result[4]); - assert_eq!(r"\`with", result[5]); - } - - #[test] - fn should_leave_literal_newline_alone() { - let result = Args::parse(r"\n").collect::<Vec<_>>(); - assert_eq!(r"\n", result[0]); - } - - #[test] - fn should_leave_literal_unicode_alone() { - let result = Args::parse(r"\u{C}").collect::<Vec<_>>(); - assert_eq!(r"\u{C}", result[0]); + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":set"), + Cow::from("statusline.center"), + Cow::from(r#"["file-type","file-encoding"]"#), + Cow::from(r#"["list", "in", "quotes"]"#), + ]; + assert_eq!(expected, result); } #[test] #[cfg(unix)] - fn should_escape_unix() { + fn test_escaping_unix() { assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar")); assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar")); assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar")); @@ -711,79 +316,35 @@ mod test { #[test] #[cfg(windows)] - fn should_escape_windows() { + fn test_escaping_windows() { assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar")); assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\"")); } #[test] - fn should_unescape_newline() { - let unescaped = unescape("hello\\nworld"); - assert_eq!("hello\nworld", unescaped); - } - - #[test] - fn should_unescape_tab() { - let unescaped = unescape("hello\\tworld"); - assert_eq!("hello\tworld", unescaped); - } - - #[test] - fn should_unescape_unicode() { - let unescaped = unescape("hello\\u{1f929}world"); - assert_eq!("hello\u{1f929}world", unescaped, "char: 🤩 "); - assert_eq!("hello🤩world", unescaped); - } - - #[test] - fn should_return_original_input_due_to_bad_unicode() { - let unescaped = unescape("hello\\u{999999999}world"); - assert_eq!("hello\\u{999999999}world", unescaped); - } - - #[test] - fn should_not_unescape_slash() { - let unescaped = unescape(r"hello\\world"); - assert_eq!(r"hello\\world", unescaped); - - let unescaped = unescape(r"hello\\\\world"); - assert_eq!(r"hello\\\\world", unescaped); - } - - #[test] - fn should_not_unescape_slash_single_quote() { - let unescaped = unescape("\\'"); - assert_eq!(r"\'", unescaped); - } - - #[test] - fn should_not_unescape_slash_double_quote() { - let unescaped = unescape("\\\""); - assert_eq!(r#"\""#, unescaped); - } - - #[test] - fn should_not_change_anything() { - let unescaped = unescape("'"); - assert_eq!("'", unescaped); - let unescaped = unescape(r#"""#); - assert_eq!(r#"""#, unescaped); + #[cfg(unix)] + fn test_parts() { + assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); + assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]); } #[test] - fn should_only_unescape_newline_not_slash_single_quote() { - let unescaped = unescape("\\n\'"); - assert_eq!("\n'", unescaped); - let unescaped = unescape("\\n\\'"); - assert_eq!("\n\\'", unescaped); + #[cfg(windows)] + fn test_parts() { + assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); + assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]); } #[test] - fn should_unescape_args() { - // 1f929: 🤩 - let args = Args::parse(r#"'hello\u{1f929} world' '["hello", "\u{1f929}", "world"]'"#) - .collect::<Vec<_>>(); - assert_eq!("hello\u{1f929} world", unescape(args[0])); - assert_eq!(r#"["hello", "🤩", "world"]"#, unescape(args[1])); + fn test_multibyte_at_end() { + assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]); + assert_eq!( + Shellwords::from(":sh echo 𒀀").parts(), + &[":sh", "echo", "𒀀"] + ); + assert_eq!( + Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(), + &[":sh", "echo", "𒀀", "hello", "world𒀀"] + ); } } |