helix

Unnamed repository; edit this file 'description' to name the repository.

master 24Branches 30Tags

Clone

HTTPS

SSH

Open with VS Code

Diffstat (limited to 'helix-core/src/shellwords.rs')

-rw-r--r--

helix-core/src/shellwords.rs

945

1 files changed, 253 insertions, 692 deletions

diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs
index edfd9ad1..9d873c36 100644
--- a/helix-core/src/shellwords.rs
+++ b/helix-core/src/shellwords.rs

@@ -1,358 +1,6 @@

-use smartstring::{LazyCompact, SmartString};

use std::borrow::Cow;

-/// A utility for parsing shell-like command lines.

-///

-/// The `Shellwords` struct takes an input string and allows extracting the command and its arguments.

-///

-/// # Features

-///

-/// - Parses command and arguments from input strings.

-/// - Supports single, double, and backtick quoted arguments.

-/// - Respects backslash escaping in arguments.

-///

-/// # Examples

-///

-/// Basic usage:

-///

-/// ```

-/// # use helix_core::shellwords::Shellwords;

-/// let shellwords = Shellwords::from(":o helix-core/src/shellwords.rs");

-/// assert_eq!(":o", shellwords.command());

-/// assert_eq!("helix-core/src/shellwords.rs", shellwords.args().next().unwrap());

-/// ```

-///

-/// Empty command:

-///

-/// ```

-/// # use helix_core::shellwords::Shellwords;

-/// let shellwords = Shellwords::from(" ");

-/// assert!(shellwords.command().is_empty());

-/// ```

-///

-/// # Iterator

-///

-/// The `args` method returns a non-allocating iterator, `Args`, over the arguments of the input.

-///

-/// ```

-/// # use helix_core::shellwords::Shellwords;

-/// let shellwords = Shellwords::from(":o a b c");

-/// let mut args = shellwords.args();

-/// assert_eq!(Some("a"), args.next());

-/// assert_eq!(Some("b"), args.next());

-/// assert_eq!(Some("c"), args.next());

-/// assert_eq!(None, args.next());

-/// ```

-#[derive(Clone, Copy)]

-pub struct Shellwords<'a> {

- input: &'a str,

-impl<'a> From<&'a str> for Shellwords<'a> {

- #[inline]

- fn from(input: &'a str) -> Self {

- Self { input }

- }

-impl<'a> From<&'a String> for Shellwords<'a> {

- #[inline]

- fn from(input: &'a String) -> Self {

- Self { input }

- }

-impl<'a> From<&'a Cow<'a, str>> for Shellwords<'a> {

- #[inline]

- fn from(input: &'a Cow<str>) -> Self {

- Self { input }

- }

-impl<'a> Shellwords<'a> {

- #[inline]

- #[must_use]

- pub fn command(&self) -> &str {

- self.input

- .split_once(' ')

- .map_or(self.input, |(command, _)| command)

- }

- #[inline]

- #[must_use]

- pub fn args(&self) -> Args<'a> {

- let args = self.input.split_once(' ').map_or("", |(_, args)| args);

- Args::parse(args)

- }

- #[inline]

- pub fn input(&self) -> &str {

- self.input

- }

- /// Checks that the input ends with a whitespace character which is not escaped.

- ///

- /// # Examples

- ///

- /// ```rust

- /// # use helix_core::shellwords::Shellwords;

- /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);

- /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);

- /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);

- /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);

- /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), true);

- /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);

- /// ```

- #[inline]

- pub fn ends_with_whitespace(&self) -> bool {

- self.input.ends_with(' ')

- }

-/// An iterator over an input string which yields arguments.

-///

-/// Splits on whitespace, but respects quoted substrings (using double quotes, single quotes, or backticks).

-#[derive(Debug, Clone)]

-pub struct Args<'a> {

- input: &'a str,

- idx: usize,

- start: usize,

-impl<'a> Args<'a> {

- #[inline]

- fn parse(input: &'a str) -> Self {

- Self {

- input,

- idx: 0,

- start: 0,

- }

- #[inline]

- pub fn is_empty(&self) -> bool {

- self.input.is_empty()

- }

- /// Returns the args exactly as input.

- ///

- /// # Examples

- /// ```

- /// # use helix_core::shellwords::Args;

- /// let args = Args::from(r#"sed -n "s/test t/not /p""#);

- /// assert_eq!(r#"sed -n "s/test t/not /p""#, args.raw());

- ///

- /// let args = Args::from(r#"cat "file name with space.txt""#);

- /// assert_eq!(r#"cat "file name with space.txt""#, args.raw());

- /// ```

- #[inline]

- pub fn raw(&self) -> &str {

- self.input

- }

- /// Returns the remainder of the args exactly as input.

- ///

- /// # Examples

- /// ```

- /// # use helix_core::shellwords::Args;

- /// let mut args = Args::from(r#"sed -n "s/test t/not /p""#);

- /// assert_eq!("sed", args.next().unwrap());

- /// assert_eq!(r#"-n "s/test t/not /p""#, args.rest());

- /// ```

- ///

- /// Never calling `next` and using `rest` is functionally equivalent to calling `raw`.

- #[inline]

- pub fn rest(&self) -> &str {

- &self.input[self.idx..]

- }

- /// Returns a reference to the `next()` value without advancing the iterator.

- ///

- /// Unlike `std::iter::Peakable::peek` this does not return a double reference, `&&str`

- /// but a normal `&str`.

- #[inline]

- #[must_use]

- pub fn peek(&self) -> Option<&str> {

- self.clone().next()

- }

- /// Returns the total number of arguments given in a command.

- ///

- /// This count is aware of all parsing rules for `Args`.

- #[must_use]

- pub fn arg_count(&self) -> usize {

- Self {

- input: self.input,

- idx: 0,

- start: 0,

- }

- .fold(0, |acc, _| acc + 1)

- }

- /// Convenient function to return an empty `Args`.

- ///

- /// When used in any iteration, it will always return `None`.

- #[inline(always)]

- pub const fn empty() -> Self {

- Self {

- input: "",

- idx: 0,

- start: 0,

- }

-impl<'a> Iterator for Args<'a> {

- type Item = &'a str;

- #[inline]

- #[allow(clippy::too_many_lines)]

- fn next(&mut self) -> Option<Self::Item> {

- // The parser loop is split into three main blocks to handle different types of input processing:

- //

- // 1. Quote block:

- // - Detects an unescaped quote character, either starting an in-quote scan or, if already in-quote,

- // locating the closing quote to return the quoted argument.

- // - Handles cases where mismatched quotes are ignored and when quotes appear as the last character.

- //

- // 2. Whitespace block:

- // - Handles arguments separated by whitespace (space or tab), respecting quotes so quoted phrases

- // remain grouped together.

- // - Splits arguments by whitespace when outside of a quoted context and updates boundaries accordingly.

- //

- // 3. Catch-all block:

- // - Handles any other character, updating the `is_escaped` status if a backslash is encountered,

- // advancing the loop to the next character.

- let bytes = self.input.as_bytes();

- let mut in_quotes = false;

- let mut quote = b'\0';

- let mut is_escaped = false;

- while self.idx < bytes.len() {

- match bytes[self.idx] {

- b'"' | b'\'' | b'`' if !is_escaped => {

- if in_quotes {

- // Found the proper closing quote, so can return the arg and advance the state along.

- if bytes[self.idx] == quote {

- let arg = Some(&self.input[self.start..self.idx]);

- self.idx += 1;

- self.start = self.idx;

- return arg;

- }

- // If quote does not match the type of the opening quote, then do nothing and advance.

- self.idx += 1;

- } else if self.idx == bytes.len() - 1 {

- // Special case for when a quote is the last input in args.

- // e.g: :read "file with space.txt""

- // This preserves the quote as an arg:

- // - `file with space`

- // - `"`

- let arg = Some(&self.input[self.idx..]);

- self.idx = bytes.len();

- self.start = bytes.len();

- return arg;

- } else {

- // Found opening quote.

- in_quotes = true;

- // Kind of quote that was found.

- quote = bytes[self.idx];

- if self.start < self.idx {

- // When part of the input ends in a quote, `one two" three`, this properly returns the `two`

- // before advancing to the quoted arg for the next iteration:

- // - `one` <- previous arg

- // - `two` <- this step

- // - ` three` <- next arg

- let arg = Some(&self.input[self.start..self.idx]);

- self.idx += 1;

- self.start = self.idx;

- return arg;

- }

- // Advance after quote.

- self.idx += 1;

- // Exclude quote from arg output.

- self.start = self.idx;

- }

- b' ' | b'\t' if !in_quotes => {

- // Found a true whitespace separator that wasn't inside quotes.

- // Check if there is anything to return or if its just advancing over whitespace.

- // `start` will only be less than `idx` when there is something to return.

- if self.start < self.idx {

- let arg = Some(&self.input[self.start..self.idx]);

- self.idx += 1;

- self.start = self.idx;

- return arg;

- }

- // Advance beyond the whitespace.

- self.idx += 1;

- // This is where `start` will be set to the start of an arg boundary, either encountering a word

- // boundary or a quote boundary. If it finds a quote, then it will be advanced again in that part

- // of the code. Either way, all that remains for the check above will be to return a full arg.

- self.start = self.idx;

- }

- _ => {

- // If previous loop didn't find any backslash and was already escaped it will change to false

- // as the backslash chain was broken.

- //

- // If the previous loop had no backslash escape, and found one this iteration, then its the start

- // of an escape chain.

- is_escaped = match (is_escaped, bytes[self.idx]) {

- (false, b'\\') => true, // Set `is_escaped` if the current byte is a backslash

- _ => false, //Reset `is_escaped` if it was true, otherwise keep `is_escaped` as false

- };

- // Advance to next `char`.

- self.idx += 1;

- }

- // Fallback that catches when the loop would have exited but failed to return the arg between start and the end.

- if self.start < bytes.len() {

- let arg = Some(&self.input[self.start..]);

- self.start = bytes.len();

- return arg;

- }

- // All args have been parsed.

- None

- }

- fn count(self) -> usize

- where

- Self: Sized,

- {

- panic!("use `arg_count` instead to get the number of arguments.");

- }

-impl<'a> From<&'a String> for Args<'a> {

- fn from(args: &'a String) -> Self {

- Args::parse(args)

- }

-impl<'a> From<&'a str> for Args<'a> {

- fn from(args: &'a str) -> Self {

- Args::parse(args)

- }

-impl<'a> From<&'a Cow<'_, str>> for Args<'a> {

- fn from(args: &'a Cow<str>) -> Self {

- Args::parse(args)

- }

/// Auto escape for shellwords usage.

-#[inline]

-#[must_use]

pub fn escape(input: Cow<str>) -> Cow<str> {

if !input.chars().any(|x| x.is_ascii_whitespace()) {

input

@@ -365,141 +13,186 @@ pub fn escape(input: Cow<str>) -> Cow<str> {

buf

}))

} else {

- Cow::Owned(format!("\"{input}\""))

+ Cow::Owned(format!("\"{}\"", input))

}

-/// Unescapes a string, converting escape sequences into their literal characters.

-///

-/// This function handles the following escape sequences:

-/// - `\\n` is converted to `\n` (newline)

-/// - `\\t` is converted to `\t` (tab)

-/// - `\\u{...}` is converted to the corresponding Unicode character

-///

-/// Other escape sequences, such as `\\` followed by any character not listed above, will remain unchanged.

-///

-/// If input is invalid, for example if there is invalid unicode, \u{999999999}, it will return the input as is.

-///

-/// # Examples

-///

-/// Basic usage:

-///

-/// ```

-/// # use helix_core::shellwords::unescape;

-/// let unescaped = unescape("hello\\nworld");

-/// assert_eq!("hello\nworld", unescaped);

-/// ```

-///

-/// Unescaping tabs:

-///

-/// ```

-/// # use helix_core::shellwords::unescape;

-/// let unescaped = unescape("hello\\tworld");

-/// assert_eq!("hello\tworld", unescaped);

-/// ```

-///

-/// Unescaping Unicode characters:

-///

-/// ```

-/// # use helix_core::shellwords::unescape;

-/// let unescaped = unescape("hello\\u{1f929}world");

-/// assert_eq!("hello\u{1f929}world", unescaped);

-/// assert_eq!("hello🤩world", unescaped);

-/// ```

-///

-/// Handling backslashes:

-///

-/// ```

-/// # use helix_core::shellwords::unescape;

-/// let unescaped = unescape(r"hello\\world");

-/// assert_eq!(r"hello\\world", unescaped);

-///

-/// let unescaped = unescape(r"hello\\\\world");

-/// assert_eq!(r"hello\\\\world", unescaped);

-/// ```

-///

-/// # Note

-///

-/// This function is opinionated, with a clear purpose of handling user input, not a general or generic unescaping utility, and does not unescape sequences like `\\'` or `\\\"`, leaving them as is.

-#[inline]

-#[must_use]

-pub fn unescape(input: &str) -> Cow<'_, str> {

- enum State {

- Normal,

- Escaped,

- Unicode,

- }

+enum State {

+ OnWhitespace,

+ Unquoted,

+ UnquotedEscaped,

+ Quoted,

+ QuoteEscaped,

+ Dquoted,

+ DquoteEscaped,

- let mut unescaped = String::new();

- let mut state = State::Normal;

- let mut is_escaped = false;

- // NOTE: Max unicode code point is U+10FFFF for a maximum of 6 chars

- let mut unicode = SmartString::<LazyCompact>::new_const();

+pub struct Shellwords<'a> {

+ state: State,

+ /// Shellwords where whitespace and escapes has been resolved.

+ words: Vec<Cow<'a, str>>,

+ /// The parts of the input that are divided into shellwords. This can be

+ /// used to retrieve the original text for a given word by looking up the

+ /// same index in the Vec as the word in `words`.

+ parts: Vec<&'a str>,

- for (idx, ch) in input.char_indices() {

- match state {

- State::Normal => match ch {

- '\\' => {

- if !is_escaped {

- // PERF: As not every separator will be escaped, we use `String::new` as that has no initial

- // allocation. If an escape is found, then we reserve capacity thats the len of the separator,

- // as the new unescaped string will be at least that long.

- unescaped.reserve(input.len());

- if idx > 0 {

- // First time finding an escape, so all prior chars can be added to the new unescaped

- // version if its not the very first char found.

- unescaped.push_str(&input[0..idx]);

+impl<'a> From<&'a str> for Shellwords<'a> {

+ fn from(input: &'a str) -> Self {

+ use State::*;

+ let mut state = Unquoted;

+ let mut words = Vec::new();

+ let mut parts = Vec::new();

+ let mut escaped = String::with_capacity(input.len());

+ let mut part_start = 0;

+ let mut unescaped_start = 0;

+ let mut end = 0;

+ for (i, c) in input.char_indices() {

+ state = match state {

+ OnWhitespace => match c {

+ '"' => {

+ end = i;

+ Dquoted

+ }

+ '\'' => {

+ end = i;

+ Quoted

+ }

+ '\\' => {

+ if cfg!(unix) {

+ escaped.push_str(&input[unescaped_start..i]);

+ unescaped_start = i + 1;

+ UnquotedEscaped

+ } else {

+ OnWhitespace

}

- state = State::Escaped;

- is_escaped = true;

- }

- _ => {

- if is_escaped {

- unescaped.push(ch);

+ c if c.is_ascii_whitespace() => {

+ end = i;

+ OnWhitespace

}

- }

- },

- State::Escaped => {

- match ch {

- 'n' => unescaped.push('\n'),

- 't' => unescaped.push('\t'),

- 'u' => {

- state = State::Unicode;

- continue;

+ _ => Unquoted,

+ },

+ Unquoted => match c {

+ '\\' => {

+ if cfg!(unix) {

+ escaped.push_str(&input[unescaped_start..i]);

+ unescaped_start = i + 1;

+ UnquotedEscaped

+ } else {

+ Unquoted

+ }

}

- // Uncomment if you want to handle '\\' to '\'

- // '\\' => unescaped.push('\\'),

- _ => {

- unescaped.push('\\');

- unescaped.push(ch);

+ c if c.is_ascii_whitespace() => {

+ end = i;

+ OnWhitespace

}

- }

- state = State::Normal;

+ _ => Unquoted,

+ },

+ UnquotedEscaped => Unquoted,

+ Quoted => match c {

+ '\\' => {

+ if cfg!(unix) {

+ escaped.push_str(&input[unescaped_start..i]);

+ unescaped_start = i + 1;

+ QuoteEscaped

+ } else {

+ Quoted

+ }

+ '\'' => {

+ end = i;

+ OnWhitespace

+ }

+ _ => Quoted,

+ },

+ QuoteEscaped => Quoted,

+ Dquoted => match c {

+ '\\' => {

+ if cfg!(unix) {

+ escaped.push_str(&input[unescaped_start..i]);

+ unescaped_start = i + 1;

+ DquoteEscaped

+ } else {

+ Dquoted

+ }

+ '"' => {

+ end = i;

+ OnWhitespace

+ }

+ _ => Dquoted,

+ },

+ DquoteEscaped => Dquoted,

+ };

+ let c_len = c.len_utf8();

+ if i == input.len() - c_len && end == 0 {

+ end = i + c_len;

}

- State::Unicode => match ch {

- '{' => continue,

- '}' => {

- let Ok(digit) = u32::from_str_radix(&unicode, 16) else {

- return input.into();

- };

- let Some(point) = char::from_u32(digit) else {

- return input.into();

- };

- unescaped.push(point);

- // Might be more unicode to unescape so clear for reuse.

- unicode.clear();

- state = State::Normal;

+ if end > 0 {

+ let esc_trim = escaped.trim();

+ let inp = &input[unescaped_start..end];

+ if !(esc_trim.is_empty() && inp.trim().is_empty()) {

+ if esc_trim.is_empty() {

+ words.push(inp.into());

+ parts.push(inp);

+ } else {

+ words.push([escaped, inp.into()].concat().into());

+ parts.push(&input[part_start..end]);

+ escaped = "".to_string();

+ }

}

- _ => unicode.push(ch),

- },

+ unescaped_start = i + 1;

+ part_start = i + 1;

+ end = 0;

+ }

+ debug_assert!(words.len() == parts.len());

+ Self {

+ state,

+ words,

+ parts,

}

- if is_escaped {

- unescaped.into()

- } else {

- input.into()

+impl<'a> Shellwords<'a> {

+ /// Checks that the input ends with a whitespace character which is not escaped.

+ ///

+ /// # Examples

+ ///

+ /// ```rust

+ /// use helix_core::shellwords::Shellwords;

+ /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);

+ /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);

+ /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);

+ /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);

+ /// #[cfg(unix)]

+ /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);

+ /// #[cfg(unix)]

+ /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);

+ /// ```

+ pub fn ends_with_whitespace(&self) -> bool {

+ matches!(self.state, State::OnWhitespace)

+ }

+ /// Returns the list of shellwords calculated from the input string.

+ pub fn words(&self) -> &[Cow<'a, str>] {

+ &self.words

+ }

+ /// Returns a list of strings which correspond to [`Self::words`] but represent the original

+ /// text in the input string - including escape characters - without separating whitespace.

+ pub fn parts(&self) -> &[&'a str] {

+ &self.parts

}

@@ -508,202 +201,114 @@ mod test {

use super::*;

#[test]

- fn base() {

+ #[cfg(windows)]

+ fn test_normal() {

let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;

let shellwords = Shellwords::from(input);

- let args = vec![

- "single_word",

- "twó",

- "wörds",

- r"\three\",

- r#"\"with\"#,

- r"escaping\\",

+ let result = shellwords.words().to_vec();

+ let expected = vec![

+ Cow::from(":o"),

+ Cow::from("single_word"),

+ Cow::from("twó"),

+ Cow::from("wörds"),

+ Cow::from("\\three\\"),

+ Cow::from("\\"),

+ Cow::from("with\\ escaping\\\\"),

];

- assert_eq!(":o", shellwords.command());

- assert_eq!(args, shellwords.args().collect::<Vec<_>>());

- }

- #[test]

- fn should_have_empty_args() {

- let shellwords = Shellwords::from(":quit");

- assert!(

- shellwords.args().is_empty(),

- "args: `{}`",

- shellwords.args().next().unwrap()

- );

- assert!(shellwords.args().next().is_none());

- }

- #[test]

- fn should_return_empty_command() {

- let shellwords = Shellwords::from(" ");

- assert!(shellwords.command().is_empty());

- }

- #[test]

- fn should_support_unicode_args() {

- assert_eq!(

- Shellwords::from(":sh echo 𒀀").args().collect::<Vec<_>>(),

- &["echo", "𒀀"]

- );

- assert_eq!(

- Shellwords::from(":sh echo 𒀀 hello world𒀀")

- .args()

- .collect::<Vec<_>>(),

- &["echo", "𒀀", "hello", "world𒀀"]

- );

- }

- #[test]

- fn should_preserve_quote_if_last_argument() {

- let sh = Shellwords::from(r#":read "file with space.txt"""#);

- let mut args = sh.args();

- assert_eq!("file with space.txt", args.next().unwrap());

- assert_eq!(r#"""#, args.next().unwrap());

- }

- #[test]

- fn should_return_rest_of_non_closed_quote_as_one_argument() {

- let sh = Shellwords::from(r":rename 'should be one \'argument");

- assert_eq!(r"should be one \'argument", sh.args().next().unwrap());

- }

- #[test]

- fn should_respect_escaped_quote_in_what_looks_like_non_closed_arg() {

- let sh = Shellwords::from(r":rename 'should be one \\'argument");

- let mut args = sh.args();

- assert_eq!(r"should be one \\", args.next().unwrap());

- assert_eq!(r"argument", args.next().unwrap());

- }

- #[test]

- fn should_split_args() {

- assert_eq!(Shellwords::from(":o a").args().collect::<Vec<_>>(), &["a"]);

- assert_eq!(

- Shellwords::from(":o a\\ ").args().collect::<Vec<_>>(),

- &["a\\"]

- );

- }

- #[test]

- fn should_parse_args_even_with_leading_whitespace() {

- // Three spaces

- assert_eq!(

- Shellwords::from(":o a").args().collect::<Vec<_>>(),

- &["a"]

- );

+ // TODO test is_owned and is_borrowed, once they get stabilized.

+ assert_eq!(expected, result);

}

#[test]

- fn should_peek_next_arg_and_not_consume() {

- let mut args = Shellwords::from(":o a").args();

- assert_eq!(Some("a"), args.peek());

- assert_eq!(Some("a"), args.next());

- assert_eq!(None, args.next());

+ #[cfg(unix)]

+ fn test_normal() {

+ let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;

+ let shellwords = Shellwords::from(input);

+ let result = shellwords.words().to_vec();

+ let expected = vec![

+ Cow::from(":o"),

+ Cow::from("single_word"),

+ Cow::from("twó"),

+ Cow::from("wörds"),

+ Cow::from(r#"three "with escaping\"#),

+ ];

+ // TODO test is_owned and is_borrowed, once they get stabilized.

+ assert_eq!(expected, result);

}

#[test]

- fn should_parse_single_quotes_while_respecting_escapes() {

+ #[cfg(unix)]

+ fn test_quoted() {

let quoted =

r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;

let shellwords = Shellwords::from(quoted);

- let result = shellwords.args().collect::<Vec<_>>();

+ let result = shellwords.words().to_vec();

let expected = vec![

- "single_word",

- "twó wörds",

- "",

- " ",

- r#"\three\' \"with\ escaping\\"#,

- "quote incomplete",

+ Cow::from(":o"),

+ Cow::from("single_word"),

+ Cow::from("twó wörds"),

+ Cow::from(r#"three' "with escaping\"#),

+ Cow::from("quote incomplete"),

];

assert_eq!(expected, result);

}

#[test]

- fn should_parse_double_quotes_while_respecting_escapes() {

+ #[cfg(unix)]

+ fn test_dquoted() {

let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#;

let shellwords = Shellwords::from(dquoted);

- let result = shellwords.args().collect::<Vec<_>>();

+ let result = shellwords.words().to_vec();

let expected = vec![

- "single_word",

- "twó wörds",

- "",

- " ",

- r#"\three\' \"with\ escaping\\"#,

- "dquote incomplete",

+ Cow::from(":o"),

+ Cow::from("single_word"),

+ Cow::from("twó wörds"),

+ Cow::from(r#"three' "with escaping\"#),

+ Cow::from("dquote incomplete"),

];

assert_eq!(expected, result);

}

#[test]

- fn should_respect_escapes_with_mixed_quotes() {

+ #[cfg(unix)]

+ fn test_mixed() {

let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;

let shellwords = Shellwords::from(dquoted);

- let result = shellwords.args().collect::<Vec<_>>();

+ let result = shellwords.words().to_vec();

let expected = vec![

- "single_word",

- "twó wörds",

- r#"\three\' \"with\ escaping\\"#,

- "no space before",

- "and after",

- "$#%^@",

- "%^&(%^",

- r")(*&^%",

- r"a\\\\\b",

- // Last ' is important, as if the user input an accidental quote at the end, this should be checked in

- // commands where there should only be one input and return an error rather than silently succeed.

- "'",

+ Cow::from(":o"),

+ Cow::from("single_word"),

+ Cow::from("twó wörds"),

+ Cow::from("three' \"with escaping\\"),

+ Cow::from("no space before"),

+ Cow::from("and after"),

+ Cow::from("$#%^@"),

+ Cow::from("%^&(%^"),

+ Cow::from(")(*&^%"),

+ Cow::from(r#"a\\b"#),

+ //last ' just changes to quoted but since we dont have anything after it, it should be ignored

];

assert_eq!(expected, result);

}

#[test]

- fn should_return_rest() {

- let input = r#":set statusline.center ["file-type","file-encoding"]"#;

+ fn test_lists() {

+ let input =

+ r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#;

let shellwords = Shellwords::from(input);

- let mut args = shellwords.args();

- assert_eq!(":set", shellwords.command());

- assert_eq!(Some("statusline.center"), args.next());

- assert_eq!(r#"["file-type","file-encoding"]"#, args.rest());

- }

- #[test]

- fn should_return_no_args() {

- let mut args = Args::parse("");

- assert!(args.next().is_none());

- assert!(args.is_empty());

- assert!(args.arg_count() == 0);

- }

- #[test]

- fn should_leave_escaped_quotes() {

- let input = r#"\" \` \' \"with \'with \`with"#;

- let result = Args::parse(input).collect::<Vec<_>>();

- assert_eq!(r#"\""#, result[0]);

- assert_eq!(r"\`", result[1]);

- assert_eq!(r"\'", result[2]);

- assert_eq!(r#"\"with"#, result[3]);

- assert_eq!(r"\'with", result[4]);

- assert_eq!(r"\`with", result[5]);

- }

- #[test]

- fn should_leave_literal_newline_alone() {

- let result = Args::parse(r"\n").collect::<Vec<_>>();

- assert_eq!(r"\n", result[0]);

- }

- #[test]

- fn should_leave_literal_unicode_alone() {

- let result = Args::parse(r"\u{C}").collect::<Vec<_>>();

- assert_eq!(r"\u{C}", result[0]);

+ let result = shellwords.words().to_vec();

+ let expected = vec![

+ Cow::from(":set"),

+ Cow::from("statusline.center"),

+ Cow::from(r#"["file-type","file-encoding"]"#),

+ Cow::from(r#"["list", "in", "quotes"]"#),

+ ];

+ assert_eq!(expected, result);

}

#[test]

#[cfg(unix)]

- fn should_escape_unix() {

+ fn test_escaping_unix() {

assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));

assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar"));

assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar"));

@@ -711,79 +316,35 @@ mod test {

#[test]

#[cfg(windows)]

- fn should_escape_windows() {

+ fn test_escaping_windows() {

assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));

assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));

}

#[test]

- fn should_unescape_newline() {

- let unescaped = unescape("hello\\nworld");

- assert_eq!("hello\nworld", unescaped);

- }

- #[test]

- fn should_unescape_tab() {

- let unescaped = unescape("hello\\tworld");

- assert_eq!("hello\tworld", unescaped);

- }

- #[test]

- fn should_unescape_unicode() {

- let unescaped = unescape("hello\\u{1f929}world");

- assert_eq!("hello\u{1f929}world", unescaped, "char: 🤩 ");

- assert_eq!("hello🤩world", unescaped);

- }

- #[test]

- fn should_return_original_input_due_to_bad_unicode() {

- let unescaped = unescape("hello\\u{999999999}world");

- assert_eq!("hello\\u{999999999}world", unescaped);

- }

- #[test]

- fn should_not_unescape_slash() {

- let unescaped = unescape(r"hello\\world");

- assert_eq!(r"hello\\world", unescaped);

- let unescaped = unescape(r"hello\\\\world");

- assert_eq!(r"hello\\\\world", unescaped);

- }

- #[test]

- fn should_not_unescape_slash_single_quote() {

- let unescaped = unescape("\\'");

- assert_eq!(r"\'", unescaped);

- }

- #[test]

- fn should_not_unescape_slash_double_quote() {

- let unescaped = unescape("\\\"");

- assert_eq!(r#"\""#, unescaped);

- }

- #[test]

- fn should_not_change_anything() {

- let unescaped = unescape("'");

- assert_eq!("'", unescaped);

- let unescaped = unescape(r#"""#);

- assert_eq!(r#"""#, unescaped);

+ #[cfg(unix)]

+ fn test_parts() {

+ assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);

+ assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);

}

#[test]

- fn should_only_unescape_newline_not_slash_single_quote() {

- let unescaped = unescape("\\n\'");

- assert_eq!("\n'", unescaped);

- let unescaped = unescape("\\n\\'");

- assert_eq!("\n\\'", unescaped);

+ #[cfg(windows)]

+ fn test_parts() {

+ assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);

+ assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);

}

#[test]

- fn should_unescape_args() {

- // 1f929: 🤩

- let args = Args::parse(r#"'hello\u{1f929} world' '["hello", "\u{1f929}", "world"]'"#)

- .collect::<Vec<_>>();

- assert_eq!("hello\u{1f929} world", unescape(args[0]));

- assert_eq!(r#"["hello", "🤩", "world"]"#, unescape(args[1]));

+ fn test_multibyte_at_end() {

+ assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]);

+ assert_eq!(

+ Shellwords::from(":sh echo 𒀀").parts(),

+ &[":sh", "echo", "𒀀"]

+ );

+ assert_eq!(

+ Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(),

+ &[":sh", "echo", "𒀀", "hello", "world𒀀"]

+ );

}