helix

Unnamed repository; edit this file 'description' to name the repository.

master 24Branches 30Tags

Clone

HTTPS

SSH

Open with VS Code

Diffstat (limited to 'helix-core/src/shellwords.rs')

-rw-r--r--

helix-core/src/shellwords.rs

945

1 files changed, 692 insertions, 253 deletions

diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs
index 9d873c36..edfd9ad1 100644
--- a/helix-core/src/shellwords.rs
+++ b/helix-core/src/shellwords.rs

@@ -1,6 +1,358 @@

+use smartstring::{LazyCompact, SmartString};

use std::borrow::Cow;

+/// A utility for parsing shell-like command lines.

+///

+/// The `Shellwords` struct takes an input string and allows extracting the command and its arguments.

+///

+/// # Features

+///

+/// - Parses command and arguments from input strings.

+/// - Supports single, double, and backtick quoted arguments.

+/// - Respects backslash escaping in arguments.

+///

+/// # Examples

+///

+/// Basic usage:

+///

+/// ```

+/// # use helix_core::shellwords::Shellwords;

+/// let shellwords = Shellwords::from(":o helix-core/src/shellwords.rs");

+/// assert_eq!(":o", shellwords.command());

+/// assert_eq!("helix-core/src/shellwords.rs", shellwords.args().next().unwrap());

+/// ```

+///

+/// Empty command:

+///

+/// ```

+/// # use helix_core::shellwords::Shellwords;

+/// let shellwords = Shellwords::from(" ");

+/// assert!(shellwords.command().is_empty());

+/// ```

+///

+/// # Iterator

+///

+/// The `args` method returns a non-allocating iterator, `Args`, over the arguments of the input.

+///

+/// ```

+/// # use helix_core::shellwords::Shellwords;

+/// let shellwords = Shellwords::from(":o a b c");

+/// let mut args = shellwords.args();

+/// assert_eq!(Some("a"), args.next());

+/// assert_eq!(Some("b"), args.next());

+/// assert_eq!(Some("c"), args.next());

+/// assert_eq!(None, args.next());

+/// ```

+#[derive(Clone, Copy)]

+pub struct Shellwords<'a> {

+ input: &'a str,

+impl<'a> From<&'a str> for Shellwords<'a> {

+ #[inline]

+ fn from(input: &'a str) -> Self {

+ Self { input }

+ }

+impl<'a> From<&'a String> for Shellwords<'a> {

+ #[inline]

+ fn from(input: &'a String) -> Self {

+ Self { input }

+ }

+impl<'a> From<&'a Cow<'a, str>> for Shellwords<'a> {

+ #[inline]

+ fn from(input: &'a Cow<str>) -> Self {

+ Self { input }

+ }

+impl<'a> Shellwords<'a> {

+ #[inline]

+ #[must_use]

+ pub fn command(&self) -> &str {

+ self.input

+ .split_once(' ')

+ .map_or(self.input, |(command, _)| command)

+ }

+ #[inline]

+ #[must_use]

+ pub fn args(&self) -> Args<'a> {

+ let args = self.input.split_once(' ').map_or("", |(_, args)| args);

+ Args::parse(args)

+ }

+ #[inline]

+ pub fn input(&self) -> &str {

+ self.input

+ }

+ /// Checks that the input ends with a whitespace character which is not escaped.

+ ///

+ /// # Examples

+ ///

+ /// ```rust

+ /// # use helix_core::shellwords::Shellwords;

+ /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);

+ /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);

+ /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);

+ /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);

+ /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), true);

+ /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);

+ /// ```

+ #[inline]

+ pub fn ends_with_whitespace(&self) -> bool {

+ self.input.ends_with(' ')

+ }

+/// An iterator over an input string which yields arguments.

+///

+/// Splits on whitespace, but respects quoted substrings (using double quotes, single quotes, or backticks).

+#[derive(Debug, Clone)]

+pub struct Args<'a> {

+ input: &'a str,

+ idx: usize,

+ start: usize,

+impl<'a> Args<'a> {

+ #[inline]

+ fn parse(input: &'a str) -> Self {

+ Self {

+ input,

+ idx: 0,

+ start: 0,

+ }

+ #[inline]

+ pub fn is_empty(&self) -> bool {

+ self.input.is_empty()

+ }

+ /// Returns the args exactly as input.

+ ///

+ /// # Examples

+ /// ```

+ /// # use helix_core::shellwords::Args;

+ /// let args = Args::from(r#"sed -n "s/test t/not /p""#);

+ /// assert_eq!(r#"sed -n "s/test t/not /p""#, args.raw());

+ ///

+ /// let args = Args::from(r#"cat "file name with space.txt""#);

+ /// assert_eq!(r#"cat "file name with space.txt""#, args.raw());

+ /// ```

+ #[inline]

+ pub fn raw(&self) -> &str {

+ self.input

+ }

+ /// Returns the remainder of the args exactly as input.

+ ///

+ /// # Examples

+ /// ```

+ /// # use helix_core::shellwords::Args;

+ /// let mut args = Args::from(r#"sed -n "s/test t/not /p""#);

+ /// assert_eq!("sed", args.next().unwrap());

+ /// assert_eq!(r#"-n "s/test t/not /p""#, args.rest());

+ /// ```

+ ///

+ /// Never calling `next` and using `rest` is functionally equivalent to calling `raw`.

+ #[inline]

+ pub fn rest(&self) -> &str {

+ &self.input[self.idx..]

+ }

+ /// Returns a reference to the `next()` value without advancing the iterator.

+ ///

+ /// Unlike `std::iter::Peakable::peek` this does not return a double reference, `&&str`

+ /// but a normal `&str`.

+ #[inline]

+ #[must_use]

+ pub fn peek(&self) -> Option<&str> {

+ self.clone().next()

+ }

+ /// Returns the total number of arguments given in a command.

+ ///

+ /// This count is aware of all parsing rules for `Args`.

+ #[must_use]

+ pub fn arg_count(&self) -> usize {

+ Self {

+ input: self.input,

+ idx: 0,

+ start: 0,

+ }

+ .fold(0, |acc, _| acc + 1)

+ }

+ /// Convenient function to return an empty `Args`.

+ ///

+ /// When used in any iteration, it will always return `None`.

+ #[inline(always)]

+ pub const fn empty() -> Self {

+ Self {

+ input: "",

+ idx: 0,

+ start: 0,

+ }

+impl<'a> Iterator for Args<'a> {

+ type Item = &'a str;

+ #[inline]

+ #[allow(clippy::too_many_lines)]

+ fn next(&mut self) -> Option<Self::Item> {

+ // The parser loop is split into three main blocks to handle different types of input processing:

+ //

+ // 1. Quote block:

+ // - Detects an unescaped quote character, either starting an in-quote scan or, if already in-quote,

+ // locating the closing quote to return the quoted argument.

+ // - Handles cases where mismatched quotes are ignored and when quotes appear as the last character.

+ //

+ // 2. Whitespace block:

+ // - Handles arguments separated by whitespace (space or tab), respecting quotes so quoted phrases

+ // remain grouped together.

+ // - Splits arguments by whitespace when outside of a quoted context and updates boundaries accordingly.

+ //

+ // 3. Catch-all block:

+ // - Handles any other character, updating the `is_escaped` status if a backslash is encountered,

+ // advancing the loop to the next character.

+ let bytes = self.input.as_bytes();

+ let mut in_quotes = false;

+ let mut quote = b'\0';

+ let mut is_escaped = false;

+ while self.idx < bytes.len() {

+ match bytes[self.idx] {

+ b'"' | b'\'' | b'`' if !is_escaped => {

+ if in_quotes {

+ // Found the proper closing quote, so can return the arg and advance the state along.

+ if bytes[self.idx] == quote {

+ let arg = Some(&self.input[self.start..self.idx]);

+ self.idx += 1;

+ self.start = self.idx;

+ return arg;

+ }

+ // If quote does not match the type of the opening quote, then do nothing and advance.

+ self.idx += 1;

+ } else if self.idx == bytes.len() - 1 {

+ // Special case for when a quote is the last input in args.

+ // e.g: :read "file with space.txt""

+ // This preserves the quote as an arg:

+ // - `file with space`

+ // - `"`

+ let arg = Some(&self.input[self.idx..]);

+ self.idx = bytes.len();

+ self.start = bytes.len();

+ return arg;

+ } else {

+ // Found opening quote.

+ in_quotes = true;

+ // Kind of quote that was found.

+ quote = bytes[self.idx];

+ if self.start < self.idx {

+ // When part of the input ends in a quote, `one two" three`, this properly returns the `two`

+ // before advancing to the quoted arg for the next iteration:

+ // - `one` <- previous arg

+ // - `two` <- this step

+ // - ` three` <- next arg

+ let arg = Some(&self.input[self.start..self.idx]);

+ self.idx += 1;

+ self.start = self.idx;

+ return arg;

+ }

+ // Advance after quote.

+ self.idx += 1;

+ // Exclude quote from arg output.

+ self.start = self.idx;

+ }

+ b' ' | b'\t' if !in_quotes => {

+ // Found a true whitespace separator that wasn't inside quotes.

+ // Check if there is anything to return or if its just advancing over whitespace.

+ // `start` will only be less than `idx` when there is something to return.

+ if self.start < self.idx {

+ let arg = Some(&self.input[self.start..self.idx]);

+ self.idx += 1;

+ self.start = self.idx;

+ return arg;

+ }

+ // Advance beyond the whitespace.

+ self.idx += 1;

+ // This is where `start` will be set to the start of an arg boundary, either encountering a word

+ // boundary or a quote boundary. If it finds a quote, then it will be advanced again in that part

+ // of the code. Either way, all that remains for the check above will be to return a full arg.

+ self.start = self.idx;

+ }

+ _ => {

+ // If previous loop didn't find any backslash and was already escaped it will change to false

+ // as the backslash chain was broken.

+ //

+ // If the previous loop had no backslash escape, and found one this iteration, then its the start

+ // of an escape chain.

+ is_escaped = match (is_escaped, bytes[self.idx]) {

+ (false, b'\\') => true, // Set `is_escaped` if the current byte is a backslash

+ _ => false, //Reset `is_escaped` if it was true, otherwise keep `is_escaped` as false

+ };

+ // Advance to next `char`.

+ self.idx += 1;

+ }

+ // Fallback that catches when the loop would have exited but failed to return the arg between start and the end.

+ if self.start < bytes.len() {

+ let arg = Some(&self.input[self.start..]);

+ self.start = bytes.len();

+ return arg;

+ }

+ // All args have been parsed.

+ None

+ }

+ fn count(self) -> usize

+ where

+ Self: Sized,

+ {

+ panic!("use `arg_count` instead to get the number of arguments.");

+ }

+impl<'a> From<&'a String> for Args<'a> {

+ fn from(args: &'a String) -> Self {

+ Args::parse(args)

+ }

+impl<'a> From<&'a str> for Args<'a> {

+ fn from(args: &'a str) -> Self {

+ Args::parse(args)

+ }

+impl<'a> From<&'a Cow<'_, str>> for Args<'a> {

+ fn from(args: &'a Cow<str>) -> Self {

+ Args::parse(args)

+ }

/// Auto escape for shellwords usage.

+#[inline]

+#[must_use]

pub fn escape(input: Cow<str>) -> Cow<str> {

if !input.chars().any(|x| x.is_ascii_whitespace()) {

input

@@ -13,186 +365,141 @@ pub fn escape(input: Cow<str>) -> Cow<str> {

buf

}))

} else {

- Cow::Owned(format!("\"{}\"", input))

+ Cow::Owned(format!("\"{input}\""))

}

-enum State {

- OnWhitespace,

- Unquoted,

- UnquotedEscaped,

- Quoted,

- QuoteEscaped,

- Dquoted,

- DquoteEscaped,

+/// Unescapes a string, converting escape sequences into their literal characters.

+///

+/// This function handles the following escape sequences:

+/// - `\\n` is converted to `\n` (newline)

+/// - `\\t` is converted to `\t` (tab)

+/// - `\\u{...}` is converted to the corresponding Unicode character

+///

+/// Other escape sequences, such as `\\` followed by any character not listed above, will remain unchanged.

+///

+/// If input is invalid, for example if there is invalid unicode, \u{999999999}, it will return the input as is.

+///

+/// # Examples

+///

+/// Basic usage:

+///

+/// ```

+/// # use helix_core::shellwords::unescape;

+/// let unescaped = unescape("hello\\nworld");

+/// assert_eq!("hello\nworld", unescaped);

+/// ```

+///

+/// Unescaping tabs:

+///

+/// ```

+/// # use helix_core::shellwords::unescape;

+/// let unescaped = unescape("hello\\tworld");

+/// assert_eq!("hello\tworld", unescaped);

+/// ```

+///

+/// Unescaping Unicode characters:

+///

+/// ```

+/// # use helix_core::shellwords::unescape;

+/// let unescaped = unescape("hello\\u{1f929}world");

+/// assert_eq!("hello\u{1f929}world", unescaped);

+/// assert_eq!("hello🤩world", unescaped);

+/// ```

+///

+/// Handling backslashes:

+///

+/// ```

+/// # use helix_core::shellwords::unescape;

+/// let unescaped = unescape(r"hello\\world");

+/// assert_eq!(r"hello\\world", unescaped);

+///

+/// let unescaped = unescape(r"hello\\\\world");

+/// assert_eq!(r"hello\\\\world", unescaped);

+/// ```

+///

+/// # Note

+///

+/// This function is opinionated, with a clear purpose of handling user input, not a general or generic unescaping utility, and does not unescape sequences like `\\'` or `\\\"`, leaving them as is.

+#[inline]

+#[must_use]

+pub fn unescape(input: &str) -> Cow<'_, str> {

+ enum State {

+ Normal,

+ Escaped,

+ Unicode,

+ }

-pub struct Shellwords<'a> {

- state: State,

- /// Shellwords where whitespace and escapes has been resolved.

- words: Vec<Cow<'a, str>>,

- /// The parts of the input that are divided into shellwords. This can be

- /// used to retrieve the original text for a given word by looking up the

- /// same index in the Vec as the word in `words`.

- parts: Vec<&'a str>,

+ let mut unescaped = String::new();

+ let mut state = State::Normal;

+ let mut is_escaped = false;

+ // NOTE: Max unicode code point is U+10FFFF for a maximum of 6 chars

+ let mut unicode = SmartString::<LazyCompact>::new_const();

-impl<'a> From<&'a str> for Shellwords<'a> {

- fn from(input: &'a str) -> Self {

- use State::*;

- let mut state = Unquoted;

- let mut words = Vec::new();

- let mut parts = Vec::new();

- let mut escaped = String::with_capacity(input.len());

- let mut part_start = 0;

- let mut unescaped_start = 0;

- let mut end = 0;

- for (i, c) in input.char_indices() {

- state = match state {

- OnWhitespace => match c {

- '"' => {

- end = i;

- Dquoted

- }

- '\'' => {

- end = i;

- Quoted

- }

- '\\' => {

- if cfg!(unix) {

- escaped.push_str(&input[unescaped_start..i]);

- unescaped_start = i + 1;

- UnquotedEscaped

- } else {

- OnWhitespace

- }

- c if c.is_ascii_whitespace() => {

- end = i;

- OnWhitespace

- }

- _ => Unquoted,

- },

- Unquoted => match c {

- '\\' => {

- if cfg!(unix) {

- escaped.push_str(&input[unescaped_start..i]);

- unescaped_start = i + 1;

- UnquotedEscaped

- } else {

- Unquoted

- }

- c if c.is_ascii_whitespace() => {

- end = i;

- OnWhitespace

- }

- _ => Unquoted,

- },

- UnquotedEscaped => Unquoted,

- Quoted => match c {

- '\\' => {

- if cfg!(unix) {

- escaped.push_str(&input[unescaped_start..i]);

- unescaped_start = i + 1;

- QuoteEscaped

- } else {

- Quoted

+ for (idx, ch) in input.char_indices() {

+ match state {

+ State::Normal => match ch {

+ '\\' => {

+ if !is_escaped {

+ // PERF: As not every separator will be escaped, we use `String::new` as that has no initial

+ // allocation. If an escape is found, then we reserve capacity thats the len of the separator,

+ // as the new unescaped string will be at least that long.

+ unescaped.reserve(input.len());

+ if idx > 0 {

+ // First time finding an escape, so all prior chars can be added to the new unescaped

+ // version if its not the very first char found.

+ unescaped.push_str(&input[0..idx]);

}

- '\'' => {

- end = i;

- OnWhitespace

- }

- _ => Quoted,

- },

- QuoteEscaped => Quoted,

- Dquoted => match c {

- '\\' => {

- if cfg!(unix) {

- escaped.push_str(&input[unescaped_start..i]);

- unescaped_start = i + 1;

- DquoteEscaped

- } else {

- Dquoted

- }

+ state = State::Escaped;

+ is_escaped = true;

+ }

+ _ => {

+ if is_escaped {

+ unescaped.push(ch);

}

- '"' => {

- end = i;

- OnWhitespace

+ }

+ },

+ State::Escaped => {

+ match ch {

+ 'n' => unescaped.push('\n'),

+ 't' => unescaped.push('\t'),

+ 'u' => {

+ state = State::Unicode;

+ continue;

}

- _ => Dquoted,

- },

- DquoteEscaped => Dquoted,

- };

- let c_len = c.len_utf8();

- if i == input.len() - c_len && end == 0 {

- end = i + c_len;

- }

- if end > 0 {

- let esc_trim = escaped.trim();

- let inp = &input[unescaped_start..end];

- if !(esc_trim.is_empty() && inp.trim().is_empty()) {

- if esc_trim.is_empty() {

- words.push(inp.into());

- parts.push(inp);

- } else {

- words.push([escaped, inp.into()].concat().into());

- parts.push(&input[part_start..end]);

- escaped = "".to_string();

+ // Uncomment if you want to handle '\\' to '\'

+ // '\\' => unescaped.push('\\'),

+ _ => {

+ unescaped.push('\\');

+ unescaped.push(ch);

}

- unescaped_start = i + 1;

- part_start = i + 1;

- end = 0;

+ state = State::Normal;

}

+ State::Unicode => match ch {

+ '{' => continue,

+ '}' => {

+ let Ok(digit) = u32::from_str_radix(&unicode, 16) else {

+ return input.into();

+ };

+ let Some(point) = char::from_u32(digit) else {

+ return input.into();

+ };

+ unescaped.push(point);

+ // Might be more unicode to unescape so clear for reuse.

+ unicode.clear();

+ state = State::Normal;

+ }

+ _ => unicode.push(ch),

+ },

}

- debug_assert!(words.len() == parts.len());

- Self {

- state,

- words,

- parts,

- }

-impl<'a> Shellwords<'a> {

- /// Checks that the input ends with a whitespace character which is not escaped.

- ///

- /// # Examples

- ///

- /// ```rust

- /// use helix_core::shellwords::Shellwords;

- /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);

- /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);

- /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);

- /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);

- /// #[cfg(unix)]

- /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);

- /// #[cfg(unix)]

- /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);

- /// ```

- pub fn ends_with_whitespace(&self) -> bool {

- matches!(self.state, State::OnWhitespace)

- }

- /// Returns the list of shellwords calculated from the input string.

- pub fn words(&self) -> &[Cow<'a, str>] {

- &self.words

}

- /// Returns a list of strings which correspond to [`Self::words`] but represent the original

- /// text in the input string - including escape characters - without separating whitespace.

- pub fn parts(&self) -> &[&'a str] {

- &self.parts

+ if is_escaped {

+ unescaped.into()

+ } else {

+ input.into()

}

@@ -201,114 +508,202 @@ mod test {

use super::*;

#[test]

- #[cfg(windows)]

- fn test_normal() {

+ fn base() {

let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;

let shellwords = Shellwords::from(input);

- let result = shellwords.words().to_vec();

- let expected = vec![

- Cow::from(":o"),

- Cow::from("single_word"),

- Cow::from("twó"),

- Cow::from("wörds"),

- Cow::from("\\three\\"),

- Cow::from("\\"),

- Cow::from("with\\ escaping\\\\"),

+ let args = vec![

+ "single_word",

+ "twó",

+ "wörds",

+ r"\three\",

+ r#"\"with\"#,

+ r"escaping\\",

];

- // TODO test is_owned and is_borrowed, once they get stabilized.

- assert_eq!(expected, result);

+ assert_eq!(":o", shellwords.command());

+ assert_eq!(args, shellwords.args().collect::<Vec<_>>());

}

#[test]

- #[cfg(unix)]

- fn test_normal() {

- let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;

- let shellwords = Shellwords::from(input);

- let result = shellwords.words().to_vec();

- let expected = vec![

- Cow::from(":o"),

- Cow::from("single_word"),

- Cow::from("twó"),

- Cow::from("wörds"),

- Cow::from(r#"three "with escaping\"#),

- ];

- // TODO test is_owned and is_borrowed, once they get stabilized.

- assert_eq!(expected, result);

+ fn should_have_empty_args() {

+ let shellwords = Shellwords::from(":quit");

+ assert!(

+ shellwords.args().is_empty(),

+ "args: `{}`",

+ shellwords.args().next().unwrap()

+ );

+ assert!(shellwords.args().next().is_none());

}

#[test]

- #[cfg(unix)]

- fn test_quoted() {

+ fn should_return_empty_command() {

+ let shellwords = Shellwords::from(" ");

+ assert!(shellwords.command().is_empty());

+ }

+ #[test]

+ fn should_support_unicode_args() {

+ assert_eq!(

+ Shellwords::from(":sh echo 𒀀").args().collect::<Vec<_>>(),

+ &["echo", "𒀀"]

+ );

+ assert_eq!(

+ Shellwords::from(":sh echo 𒀀 hello world𒀀")

+ .args()

+ .collect::<Vec<_>>(),

+ &["echo", "𒀀", "hello", "world𒀀"]

+ );

+ }

+ #[test]

+ fn should_preserve_quote_if_last_argument() {

+ let sh = Shellwords::from(r#":read "file with space.txt"""#);

+ let mut args = sh.args();

+ assert_eq!("file with space.txt", args.next().unwrap());

+ assert_eq!(r#"""#, args.next().unwrap());

+ }

+ #[test]

+ fn should_return_rest_of_non_closed_quote_as_one_argument() {

+ let sh = Shellwords::from(r":rename 'should be one \'argument");

+ assert_eq!(r"should be one \'argument", sh.args().next().unwrap());

+ }

+ #[test]

+ fn should_respect_escaped_quote_in_what_looks_like_non_closed_arg() {

+ let sh = Shellwords::from(r":rename 'should be one \\'argument");

+ let mut args = sh.args();

+ assert_eq!(r"should be one \\", args.next().unwrap());

+ assert_eq!(r"argument", args.next().unwrap());

+ }

+ #[test]

+ fn should_split_args() {

+ assert_eq!(Shellwords::from(":o a").args().collect::<Vec<_>>(), &["a"]);

+ assert_eq!(

+ Shellwords::from(":o a\\ ").args().collect::<Vec<_>>(),

+ &["a\\"]

+ );

+ }

+ #[test]

+ fn should_parse_args_even_with_leading_whitespace() {

+ // Three spaces

+ assert_eq!(

+ Shellwords::from(":o a").args().collect::<Vec<_>>(),

+ &["a"]

+ );

+ }

+ #[test]

+ fn should_peek_next_arg_and_not_consume() {

+ let mut args = Shellwords::from(":o a").args();

+ assert_eq!(Some("a"), args.peek());

+ assert_eq!(Some("a"), args.next());

+ assert_eq!(None, args.next());

+ }

+ #[test]

+ fn should_parse_single_quotes_while_respecting_escapes() {

let quoted =

r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;

let shellwords = Shellwords::from(quoted);

- let result = shellwords.words().to_vec();

+ let result = shellwords.args().collect::<Vec<_>>();

let expected = vec![

- Cow::from(":o"),

- Cow::from("single_word"),

- Cow::from("twó wörds"),

- Cow::from(r#"three' "with escaping\"#),

- Cow::from("quote incomplete"),

+ "single_word",

+ "twó wörds",

+ "",

+ " ",

+ r#"\three\' \"with\ escaping\\"#,

+ "quote incomplete",

];

assert_eq!(expected, result);

}

#[test]

- #[cfg(unix)]

- fn test_dquoted() {

+ fn should_parse_double_quotes_while_respecting_escapes() {

let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#;

let shellwords = Shellwords::from(dquoted);

- let result = shellwords.words().to_vec();

+ let result = shellwords.args().collect::<Vec<_>>();

let expected = vec![

- Cow::from(":o"),

- Cow::from("single_word"),

- Cow::from("twó wörds"),

- Cow::from(r#"three' "with escaping\"#),

- Cow::from("dquote incomplete"),

+ "single_word",

+ "twó wörds",

+ "",

+ " ",

+ r#"\three\' \"with\ escaping\\"#,

+ "dquote incomplete",

];

assert_eq!(expected, result);

}

#[test]

- #[cfg(unix)]

- fn test_mixed() {

+ fn should_respect_escapes_with_mixed_quotes() {

let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;

let shellwords = Shellwords::from(dquoted);

- let result = shellwords.words().to_vec();

+ let result = shellwords.args().collect::<Vec<_>>();

let expected = vec![

- Cow::from(":o"),

- Cow::from("single_word"),

- Cow::from("twó wörds"),

- Cow::from("three' \"with escaping\\"),

- Cow::from("no space before"),

- Cow::from("and after"),

- Cow::from("$#%^@"),

- Cow::from("%^&(%^"),

- Cow::from(")(*&^%"),

- Cow::from(r#"a\\b"#),

- //last ' just changes to quoted but since we dont have anything after it, it should be ignored

+ "single_word",

+ "twó wörds",

+ r#"\three\' \"with\ escaping\\"#,

+ "no space before",

+ "and after",

+ "$#%^@",

+ "%^&(%^",

+ r")(*&^%",

+ r"a\\\\\b",

+ // Last ' is important, as if the user input an accidental quote at the end, this should be checked in

+ // commands where there should only be one input and return an error rather than silently succeed.

+ "'",

];

assert_eq!(expected, result);

}

#[test]

- fn test_lists() {

- let input =

- r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#;

+ fn should_return_rest() {

+ let input = r#":set statusline.center ["file-type","file-encoding"]"#;

let shellwords = Shellwords::from(input);

- let result = shellwords.words().to_vec();

- let expected = vec![

- Cow::from(":set"),

- Cow::from("statusline.center"),

- Cow::from(r#"["file-type","file-encoding"]"#),

- Cow::from(r#"["list", "in", "quotes"]"#),

- ];

- assert_eq!(expected, result);

+ let mut args = shellwords.args();

+ assert_eq!(":set", shellwords.command());

+ assert_eq!(Some("statusline.center"), args.next());

+ assert_eq!(r#"["file-type","file-encoding"]"#, args.rest());

+ }

+ #[test]

+ fn should_return_no_args() {

+ let mut args = Args::parse("");

+ assert!(args.next().is_none());

+ assert!(args.is_empty());

+ assert!(args.arg_count() == 0);

+ }

+ #[test]

+ fn should_leave_escaped_quotes() {

+ let input = r#"\" \` \' \"with \'with \`with"#;

+ let result = Args::parse(input).collect::<Vec<_>>();

+ assert_eq!(r#"\""#, result[0]);

+ assert_eq!(r"\`", result[1]);

+ assert_eq!(r"\'", result[2]);

+ assert_eq!(r#"\"with"#, result[3]);

+ assert_eq!(r"\'with", result[4]);

+ assert_eq!(r"\`with", result[5]);

+ }

+ #[test]

+ fn should_leave_literal_newline_alone() {

+ let result = Args::parse(r"\n").collect::<Vec<_>>();

+ assert_eq!(r"\n", result[0]);

+ }

+ #[test]

+ fn should_leave_literal_unicode_alone() {

+ let result = Args::parse(r"\u{C}").collect::<Vec<_>>();

+ assert_eq!(r"\u{C}", result[0]);

}

#[test]

#[cfg(unix)]

- fn test_escaping_unix() {

+ fn should_escape_unix() {

assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));

assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar"));

assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar"));

@@ -316,35 +711,79 @@ mod test {

#[test]

#[cfg(windows)]

- fn test_escaping_windows() {

+ fn should_escape_windows() {

assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));

assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));

}

#[test]

- #[cfg(unix)]

- fn test_parts() {

- assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);

- assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);

+ fn should_unescape_newline() {

+ let unescaped = unescape("hello\\nworld");

+ assert_eq!("hello\nworld", unescaped);

}

#[test]

- #[cfg(windows)]

- fn test_parts() {

- assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);

- assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);

+ fn should_unescape_tab() {

+ let unescaped = unescape("hello\\tworld");

+ assert_eq!("hello\tworld", unescaped);

}

#[test]

- fn test_multibyte_at_end() {

- assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]);

- assert_eq!(

- Shellwords::from(":sh echo 𒀀").parts(),

- &[":sh", "echo", "𒀀"]

- );

- assert_eq!(

- Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(),

- &[":sh", "echo", "𒀀", "hello", "world𒀀"]

- );

+ fn should_unescape_unicode() {

+ let unescaped = unescape("hello\\u{1f929}world");

+ assert_eq!("hello\u{1f929}world", unescaped, "char: 🤩 ");

+ assert_eq!("hello🤩world", unescaped);

+ }

+ #[test]

+ fn should_return_original_input_due_to_bad_unicode() {

+ let unescaped = unescape("hello\\u{999999999}world");

+ assert_eq!("hello\\u{999999999}world", unescaped);

+ }

+ #[test]

+ fn should_not_unescape_slash() {

+ let unescaped = unescape(r"hello\\world");

+ assert_eq!(r"hello\\world", unescaped);

+ let unescaped = unescape(r"hello\\\\world");

+ assert_eq!(r"hello\\\\world", unescaped);

+ }

+ #[test]

+ fn should_not_unescape_slash_single_quote() {

+ let unescaped = unescape("\\'");

+ assert_eq!(r"\'", unescaped);

+ }

+ #[test]

+ fn should_not_unescape_slash_double_quote() {

+ let unescaped = unescape("\\\"");

+ assert_eq!(r#"\""#, unescaped);

+ }

+ #[test]

+ fn should_not_change_anything() {

+ let unescaped = unescape("'");

+ assert_eq!("'", unescaped);

+ let unescaped = unescape(r#"""#);

+ assert_eq!(r#"""#, unescaped);

+ }

+ #[test]

+ fn should_only_unescape_newline_not_slash_single_quote() {

+ let unescaped = unescape("\\n\'");

+ assert_eq!("\n'", unescaped);

+ let unescaped = unescape("\\n\\'");

+ assert_eq!("\n\\'", unescaped);

+ }

+ #[test]

+ fn should_unescape_args() {

+ // 1f929: 🤩

+ let args = Args::parse(r#"'hello\u{1f929} world' '["hello", "\u{1f929}", "world"]'"#)

+ .collect::<Vec<_>>();

+ assert_eq!("hello\u{1f929} world", unescape(args[0]));

+ assert_eq!(r#"["hello", "🤩", "world"]"#, unescape(args[1]));

}