Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/shellwords.rs')
-rw-r--r--helix-core/src/shellwords.rs945
1 files changed, 692 insertions, 253 deletions
diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs
index 9d873c36..edfd9ad1 100644
--- a/helix-core/src/shellwords.rs
+++ b/helix-core/src/shellwords.rs
@@ -1,6 +1,358 @@
+use smartstring::{LazyCompact, SmartString};
use std::borrow::Cow;
+/// A utility for parsing shell-like command lines.
+///
+/// The `Shellwords` struct takes an input string and allows extracting the command and its arguments.
+///
+/// # Features
+///
+/// - Parses command and arguments from input strings.
+/// - Supports single, double, and backtick quoted arguments.
+/// - Respects backslash escaping in arguments.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// # use helix_core::shellwords::Shellwords;
+/// let shellwords = Shellwords::from(":o helix-core/src/shellwords.rs");
+/// assert_eq!(":o", shellwords.command());
+/// assert_eq!("helix-core/src/shellwords.rs", shellwords.args().next().unwrap());
+/// ```
+///
+/// Empty command:
+///
+/// ```
+/// # use helix_core::shellwords::Shellwords;
+/// let shellwords = Shellwords::from(" ");
+/// assert!(shellwords.command().is_empty());
+/// ```
+///
+/// # Iterator
+///
+/// The `args` method returns a non-allocating iterator, `Args`, over the arguments of the input.
+///
+/// ```
+/// # use helix_core::shellwords::Shellwords;
+/// let shellwords = Shellwords::from(":o a b c");
+/// let mut args = shellwords.args();
+/// assert_eq!(Some("a"), args.next());
+/// assert_eq!(Some("b"), args.next());
+/// assert_eq!(Some("c"), args.next());
+/// assert_eq!(None, args.next());
+/// ```
+#[derive(Clone, Copy)]
+pub struct Shellwords<'a> {
+ input: &'a str,
+}
+
+impl<'a> From<&'a str> for Shellwords<'a> {
+ #[inline]
+ fn from(input: &'a str) -> Self {
+ Self { input }
+ }
+}
+
+impl<'a> From<&'a String> for Shellwords<'a> {
+ #[inline]
+ fn from(input: &'a String) -> Self {
+ Self { input }
+ }
+}
+
+impl<'a> From<&'a Cow<'a, str>> for Shellwords<'a> {
+ #[inline]
+ fn from(input: &'a Cow<str>) -> Self {
+ Self { input }
+ }
+}
+
+impl<'a> Shellwords<'a> {
+ #[inline]
+ #[must_use]
+ pub fn command(&self) -> &str {
+ self.input
+ .split_once(' ')
+ .map_or(self.input, |(command, _)| command)
+ }
+
+ #[inline]
+ #[must_use]
+ pub fn args(&self) -> Args<'a> {
+ let args = self.input.split_once(' ').map_or("", |(_, args)| args);
+ Args::parse(args)
+ }
+
+ #[inline]
+ pub fn input(&self) -> &str {
+ self.input
+ }
+
+ /// Checks that the input ends with a whitespace character which is not escaped.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// # use helix_core::shellwords::Shellwords;
+ /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
+ /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
+ /// ```
+ #[inline]
+ pub fn ends_with_whitespace(&self) -> bool {
+ self.input.ends_with(' ')
+ }
+}
+
+/// An iterator over an input string which yields arguments.
+///
+/// Splits on whitespace, but respects quoted substrings (using double quotes, single quotes, or backticks).
+#[derive(Debug, Clone)]
+pub struct Args<'a> {
+ input: &'a str,
+ idx: usize,
+ start: usize,
+}
+
+impl<'a> Args<'a> {
+ #[inline]
+ fn parse(input: &'a str) -> Self {
+ Self {
+ input,
+ idx: 0,
+ start: 0,
+ }
+ }
+
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.input.is_empty()
+ }
+
+ /// Returns the args exactly as input.
+ ///
+ /// # Examples
+ /// ```
+ /// # use helix_core::shellwords::Args;
+ /// let args = Args::from(r#"sed -n "s/test t/not /p""#);
+ /// assert_eq!(r#"sed -n "s/test t/not /p""#, args.raw());
+ ///
+ /// let args = Args::from(r#"cat "file name with space.txt""#);
+ /// assert_eq!(r#"cat "file name with space.txt""#, args.raw());
+ /// ```
+ #[inline]
+ pub fn raw(&self) -> &str {
+ self.input
+ }
+
+ /// Returns the remainder of the args exactly as input.
+ ///
+ /// # Examples
+ /// ```
+ /// # use helix_core::shellwords::Args;
+ /// let mut args = Args::from(r#"sed -n "s/test t/not /p""#);
+ /// assert_eq!("sed", args.next().unwrap());
+ /// assert_eq!(r#"-n "s/test t/not /p""#, args.rest());
+ /// ```
+ ///
+ /// Never calling `next` and using `rest` is functionally equivalent to calling `raw`.
+ #[inline]
+ pub fn rest(&self) -> &str {
+ &self.input[self.idx..]
+ }
+
+ /// Returns a reference to the `next()` value without advancing the iterator.
+ ///
+ /// Unlike `std::iter::Peakable::peek` this does not return a double reference, `&&str`
+ /// but a normal `&str`.
+ #[inline]
+ #[must_use]
+ pub fn peek(&self) -> Option<&str> {
+ self.clone().next()
+ }
+
+ /// Returns the total number of arguments given in a command.
+ ///
+ /// This count is aware of all parsing rules for `Args`.
+ #[must_use]
+ pub fn arg_count(&self) -> usize {
+ Self {
+ input: self.input,
+ idx: 0,
+ start: 0,
+ }
+ .fold(0, |acc, _| acc + 1)
+ }
+
+ /// Convenient function to return an empty `Args`.
+ ///
+ /// When used in any iteration, it will always return `None`.
+ #[inline(always)]
+ pub const fn empty() -> Self {
+ Self {
+ input: "",
+ idx: 0,
+ start: 0,
+ }
+ }
+}
+
+impl<'a> Iterator for Args<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ #[allow(clippy::too_many_lines)]
+ fn next(&mut self) -> Option<Self::Item> {
+ // The parser loop is split into three main blocks to handle different types of input processing:
+ //
+ // 1. Quote block:
+ // - Detects an unescaped quote character, either starting an in-quote scan or, if already in-quote,
+ // locating the closing quote to return the quoted argument.
+ // - Handles cases where mismatched quotes are ignored and when quotes appear as the last character.
+ //
+ // 2. Whitespace block:
+ // - Handles arguments separated by whitespace (space or tab), respecting quotes so quoted phrases
+ // remain grouped together.
+ // - Splits arguments by whitespace when outside of a quoted context and updates boundaries accordingly.
+ //
+ // 3. Catch-all block:
+ // - Handles any other character, updating the `is_escaped` status if a backslash is encountered,
+ // advancing the loop to the next character.
+
+ let bytes = self.input.as_bytes();
+ let mut in_quotes = false;
+ let mut quote = b'\0';
+ let mut is_escaped = false;
+
+ while self.idx < bytes.len() {
+ match bytes[self.idx] {
+ b'"' | b'\'' | b'`' if !is_escaped => {
+ if in_quotes {
+ // Found the proper closing quote, so can return the arg and advance the state along.
+ if bytes[self.idx] == quote {
+ let arg = Some(&self.input[self.start..self.idx]);
+ self.idx += 1;
+ self.start = self.idx;
+ return arg;
+ }
+ // If quote does not match the type of the opening quote, then do nothing and advance.
+ self.idx += 1;
+ } else if self.idx == bytes.len() - 1 {
+ // Special case for when a quote is the last input in args.
+ // e.g: :read "file with space.txt""
+ // This preserves the quote as an arg:
+ // - `file with space`
+ // - `"`
+ let arg = Some(&self.input[self.idx..]);
+ self.idx = bytes.len();
+ self.start = bytes.len();
+ return arg;
+ } else {
+ // Found opening quote.
+ in_quotes = true;
+ // Kind of quote that was found.
+ quote = bytes[self.idx];
+
+ if self.start < self.idx {
+ // When part of the input ends in a quote, `one two" three`, this properly returns the `two`
+ // before advancing to the quoted arg for the next iteration:
+ // - `one` <- previous arg
+ // - `two` <- this step
+ // - ` three` <- next arg
+ let arg = Some(&self.input[self.start..self.idx]);
+ self.idx += 1;
+ self.start = self.idx;
+ return arg;
+ }
+
+ // Advance after quote.
+ self.idx += 1;
+ // Exclude quote from arg output.
+ self.start = self.idx;
+ }
+ }
+ b' ' | b'\t' if !in_quotes => {
+ // Found a true whitespace separator that wasn't inside quotes.
+
+ // Check if there is anything to return or if its just advancing over whitespace.
+ // `start` will only be less than `idx` when there is something to return.
+ if self.start < self.idx {
+ let arg = Some(&self.input[self.start..self.idx]);
+ self.idx += 1;
+ self.start = self.idx;
+ return arg;
+ }
+
+ // Advance beyond the whitespace.
+ self.idx += 1;
+
+ // This is where `start` will be set to the start of an arg boundary, either encountering a word
+ // boundary or a quote boundary. If it finds a quote, then it will be advanced again in that part
+ // of the code. Either way, all that remains for the check above will be to return a full arg.
+ self.start = self.idx;
+ }
+ _ => {
+ // If previous loop didn't find any backslash and was already escaped it will change to false
+ // as the backslash chain was broken.
+ //
+ // If the previous loop had no backslash escape, and found one this iteration, then its the start
+ // of an escape chain.
+ is_escaped = match (is_escaped, bytes[self.idx]) {
+ (false, b'\\') => true, // Set `is_escaped` if the current byte is a backslash
+ _ => false, //Reset `is_escaped` if it was true, otherwise keep `is_escaped` as false
+ };
+
+ // Advance to next `char`.
+ self.idx += 1;
+ }
+ }
+ }
+
+ // Fallback that catches when the loop would have exited but failed to return the arg between start and the end.
+ if self.start < bytes.len() {
+ let arg = Some(&self.input[self.start..]);
+ self.start = bytes.len();
+ return arg;
+ }
+
+ // All args have been parsed.
+ None
+ }
+
+ fn count(self) -> usize
+ where
+ Self: Sized,
+ {
+ panic!("use `arg_count` instead to get the number of arguments.");
+ }
+}
+
+impl<'a> From<&'a String> for Args<'a> {
+ fn from(args: &'a String) -> Self {
+ Args::parse(args)
+ }
+}
+
+impl<'a> From<&'a str> for Args<'a> {
+ fn from(args: &'a str) -> Self {
+ Args::parse(args)
+ }
+}
+
+impl<'a> From<&'a Cow<'_, str>> for Args<'a> {
+ fn from(args: &'a Cow<str>) -> Self {
+ Args::parse(args)
+ }
+}
+
/// Auto escape for shellwords usage.
+#[inline]
+#[must_use]
pub fn escape(input: Cow<str>) -> Cow<str> {
if !input.chars().any(|x| x.is_ascii_whitespace()) {
input
@@ -13,186 +365,141 @@ pub fn escape(input: Cow<str>) -> Cow<str> {
buf
}))
} else {
- Cow::Owned(format!("\"{}\"", input))
+ Cow::Owned(format!("\"{input}\""))
}
}
-enum State {
- OnWhitespace,
- Unquoted,
- UnquotedEscaped,
- Quoted,
- QuoteEscaped,
- Dquoted,
- DquoteEscaped,
-}
+/// Unescapes a string, converting escape sequences into their literal characters.
+///
+/// This function handles the following escape sequences:
+/// - `\\n` is converted to `\n` (newline)
+/// - `\\t` is converted to `\t` (tab)
+/// - `\\u{...}` is converted to the corresponding Unicode character
+///
+/// Other escape sequences, such as `\\` followed by any character not listed above, will remain unchanged.
+///
+/// If input is invalid, for example if there is invalid unicode, \u{999999999}, it will return the input as is.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// # use helix_core::shellwords::unescape;
+/// let unescaped = unescape("hello\\nworld");
+/// assert_eq!("hello\nworld", unescaped);
+/// ```
+///
+/// Unescaping tabs:
+///
+/// ```
+/// # use helix_core::shellwords::unescape;
+/// let unescaped = unescape("hello\\tworld");
+/// assert_eq!("hello\tworld", unescaped);
+/// ```
+///
+/// Unescaping Unicode characters:
+///
+/// ```
+/// # use helix_core::shellwords::unescape;
+/// let unescaped = unescape("hello\\u{1f929}world");
+/// assert_eq!("hello\u{1f929}world", unescaped);
+/// assert_eq!("hello🤩world", unescaped);
+/// ```
+///
+/// Handling backslashes:
+///
+/// ```
+/// # use helix_core::shellwords::unescape;
+/// let unescaped = unescape(r"hello\\world");
+/// assert_eq!(r"hello\\world", unescaped);
+///
+/// let unescaped = unescape(r"hello\\\\world");
+/// assert_eq!(r"hello\\\\world", unescaped);
+/// ```
+///
+/// # Note
+///
+/// This function is opinionated, with a clear purpose of handling user input, not a general or generic unescaping utility, and does not unescape sequences like `\\'` or `\\\"`, leaving them as is.
+#[inline]
+#[must_use]
+pub fn unescape(input: &str) -> Cow<'_, str> {
+ enum State {
+ Normal,
+ Escaped,
+ Unicode,
+ }
-pub struct Shellwords<'a> {
- state: State,
- /// Shellwords where whitespace and escapes has been resolved.
- words: Vec<Cow<'a, str>>,
- /// The parts of the input that are divided into shellwords. This can be
- /// used to retrieve the original text for a given word by looking up the
- /// same index in the Vec as the word in `words`.
- parts: Vec<&'a str>,
-}
+ let mut unescaped = String::new();
+ let mut state = State::Normal;
+ let mut is_escaped = false;
+ // NOTE: Max unicode code point is U+10FFFF for a maximum of 6 chars
+ let mut unicode = SmartString::<LazyCompact>::new_const();
-impl<'a> From<&'a str> for Shellwords<'a> {
- fn from(input: &'a str) -> Self {
- use State::*;
-
- let mut state = Unquoted;
- let mut words = Vec::new();
- let mut parts = Vec::new();
- let mut escaped = String::with_capacity(input.len());
-
- let mut part_start = 0;
- let mut unescaped_start = 0;
- let mut end = 0;
-
- for (i, c) in input.char_indices() {
- state = match state {
- OnWhitespace => match c {
- '"' => {
- end = i;
- Dquoted
- }
- '\'' => {
- end = i;
- Quoted
- }
- '\\' => {
- if cfg!(unix) {
- escaped.push_str(&input[unescaped_start..i]);
- unescaped_start = i + 1;
- UnquotedEscaped
- } else {
- OnWhitespace
- }
- }
- c if c.is_ascii_whitespace() => {
- end = i;
- OnWhitespace
- }
- _ => Unquoted,
- },
- Unquoted => match c {
- '\\' => {
- if cfg!(unix) {
- escaped.push_str(&input[unescaped_start..i]);
- unescaped_start = i + 1;
- UnquotedEscaped
- } else {
- Unquoted
- }
- }
- c if c.is_ascii_whitespace() => {
- end = i;
- OnWhitespace
- }
- _ => Unquoted,
- },
- UnquotedEscaped => Unquoted,
- Quoted => match c {
- '\\' => {
- if cfg!(unix) {
- escaped.push_str(&input[unescaped_start..i]);
- unescaped_start = i + 1;
- QuoteEscaped
- } else {
- Quoted
+ for (idx, ch) in input.char_indices() {
+ match state {
+ State::Normal => match ch {
+ '\\' => {
+ if !is_escaped {
+ // PERF: As not every separator will be escaped, we use `String::new` as that has no initial
+ // allocation. If an escape is found, then we reserve capacity thats the len of the separator,
+ // as the new unescaped string will be at least that long.
+ unescaped.reserve(input.len());
+ if idx > 0 {
+ // First time finding an escape, so all prior chars can be added to the new unescaped
+ // version if its not the very first char found.
+ unescaped.push_str(&input[0..idx]);
}
}
- '\'' => {
- end = i;
- OnWhitespace
- }
- _ => Quoted,
- },
- QuoteEscaped => Quoted,
- Dquoted => match c {
- '\\' => {
- if cfg!(unix) {
- escaped.push_str(&input[unescaped_start..i]);
- unescaped_start = i + 1;
- DquoteEscaped
- } else {
- Dquoted
- }
+ state = State::Escaped;
+ is_escaped = true;
+ }
+ _ => {
+ if is_escaped {
+ unescaped.push(ch);
}
- '"' => {
- end = i;
- OnWhitespace
+ }
+ },
+ State::Escaped => {
+ match ch {
+ 'n' => unescaped.push('\n'),
+ 't' => unescaped.push('\t'),
+ 'u' => {
+ state = State::Unicode;
+ continue;
}
- _ => Dquoted,
- },
- DquoteEscaped => Dquoted,
- };
-
- let c_len = c.len_utf8();
- if i == input.len() - c_len && end == 0 {
- end = i + c_len;
- }
-
- if end > 0 {
- let esc_trim = escaped.trim();
- let inp = &input[unescaped_start..end];
-
- if !(esc_trim.is_empty() && inp.trim().is_empty()) {
- if esc_trim.is_empty() {
- words.push(inp.into());
- parts.push(inp);
- } else {
- words.push([escaped, inp.into()].concat().into());
- parts.push(&input[part_start..end]);
- escaped = "".to_string();
+ // Uncomment if you want to handle '\\' to '\'
+ // '\\' => unescaped.push('\\'),
+ _ => {
+ unescaped.push('\\');
+ unescaped.push(ch);
}
}
- unescaped_start = i + 1;
- part_start = i + 1;
- end = 0;
+ state = State::Normal;
}
+ State::Unicode => match ch {
+ '{' => continue,
+ '}' => {
+ let Ok(digit) = u32::from_str_radix(&unicode, 16) else {
+ return input.into();
+ };
+ let Some(point) = char::from_u32(digit) else {
+ return input.into();
+ };
+ unescaped.push(point);
+ // Might be more unicode to unescape so clear for reuse.
+ unicode.clear();
+ state = State::Normal;
+ }
+ _ => unicode.push(ch),
+ },
}
-
- debug_assert!(words.len() == parts.len());
-
- Self {
- state,
- words,
- parts,
- }
- }
-}
-
-impl<'a> Shellwords<'a> {
- /// Checks that the input ends with a whitespace character which is not escaped.
- ///
- /// # Examples
- ///
- /// ```rust
- /// use helix_core::shellwords::Shellwords;
- /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
- /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
- /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
- /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
- /// #[cfg(unix)]
- /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
- /// #[cfg(unix)]
- /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
- /// ```
- pub fn ends_with_whitespace(&self) -> bool {
- matches!(self.state, State::OnWhitespace)
- }
-
- /// Returns the list of shellwords calculated from the input string.
- pub fn words(&self) -> &[Cow<'a, str>] {
- &self.words
}
- /// Returns a list of strings which correspond to [`Self::words`] but represent the original
- /// text in the input string - including escape characters - without separating whitespace.
- pub fn parts(&self) -> &[&'a str] {
- &self.parts
+ if is_escaped {
+ unescaped.into()
+ } else {
+ input.into()
}
}
@@ -201,114 +508,202 @@ mod test {
use super::*;
#[test]
- #[cfg(windows)]
- fn test_normal() {
+ fn base() {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
let shellwords = Shellwords::from(input);
- let result = shellwords.words().to_vec();
- let expected = vec![
- Cow::from(":o"),
- Cow::from("single_word"),
- Cow::from("twó"),
- Cow::from("wörds"),
- Cow::from("\\three\\"),
- Cow::from("\\"),
- Cow::from("with\\ escaping\\\\"),
+ let args = vec![
+ "single_word",
+ "twó",
+ "wörds",
+ r"\three\",
+ r#"\"with\"#,
+ r"escaping\\",
];
- // TODO test is_owned and is_borrowed, once they get stabilized.
- assert_eq!(expected, result);
+
+ assert_eq!(":o", shellwords.command());
+ assert_eq!(args, shellwords.args().collect::<Vec<_>>());
}
#[test]
- #[cfg(unix)]
- fn test_normal() {
- let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
- let shellwords = Shellwords::from(input);
- let result = shellwords.words().to_vec();
- let expected = vec![
- Cow::from(":o"),
- Cow::from("single_word"),
- Cow::from("twó"),
- Cow::from("wörds"),
- Cow::from(r#"three "with escaping\"#),
- ];
- // TODO test is_owned and is_borrowed, once they get stabilized.
- assert_eq!(expected, result);
+ fn should_have_empty_args() {
+ let shellwords = Shellwords::from(":quit");
+ assert!(
+ shellwords.args().is_empty(),
+ "args: `{}`",
+ shellwords.args().next().unwrap()
+ );
+ assert!(shellwords.args().next().is_none());
}
#[test]
- #[cfg(unix)]
- fn test_quoted() {
+ fn should_return_empty_command() {
+ let shellwords = Shellwords::from(" ");
+ assert!(shellwords.command().is_empty());
+ }
+
+ #[test]
+ fn should_support_unicode_args() {
+ assert_eq!(
+ Shellwords::from(":sh echo 𒀀").args().collect::<Vec<_>>(),
+ &["echo", "𒀀"]
+ );
+ assert_eq!(
+ Shellwords::from(":sh echo 𒀀 hello world𒀀")
+ .args()
+ .collect::<Vec<_>>(),
+ &["echo", "𒀀", "hello", "world𒀀"]
+ );
+ }
+
+ #[test]
+ fn should_preserve_quote_if_last_argument() {
+ let sh = Shellwords::from(r#":read "file with space.txt"""#);
+ let mut args = sh.args();
+ assert_eq!("file with space.txt", args.next().unwrap());
+ assert_eq!(r#"""#, args.next().unwrap());
+ }
+
+ #[test]
+ fn should_return_rest_of_non_closed_quote_as_one_argument() {
+ let sh = Shellwords::from(r":rename 'should be one \'argument");
+ assert_eq!(r"should be one \'argument", sh.args().next().unwrap());
+ }
+
+ #[test]
+ fn should_respect_escaped_quote_in_what_looks_like_non_closed_arg() {
+ let sh = Shellwords::from(r":rename 'should be one \\'argument");
+ let mut args = sh.args();
+ assert_eq!(r"should be one \\", args.next().unwrap());
+ assert_eq!(r"argument", args.next().unwrap());
+ }
+
+ #[test]
+ fn should_split_args() {
+ assert_eq!(Shellwords::from(":o a").args().collect::<Vec<_>>(), &["a"]);
+ assert_eq!(
+ Shellwords::from(":o a\\ ").args().collect::<Vec<_>>(),
+ &["a\\"]
+ );
+ }
+
+ #[test]
+ fn should_parse_args_even_with_leading_whitespace() {
+ // Three spaces
+ assert_eq!(
+ Shellwords::from(":o a").args().collect::<Vec<_>>(),
+ &["a"]
+ );
+ }
+
+ #[test]
+ fn should_peek_next_arg_and_not_consume() {
+ let mut args = Shellwords::from(":o a").args();
+
+ assert_eq!(Some("a"), args.peek());
+ assert_eq!(Some("a"), args.next());
+ assert_eq!(None, args.next());
+ }
+
+ #[test]
+ fn should_parse_single_quotes_while_respecting_escapes() {
let quoted =
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;
let shellwords = Shellwords::from(quoted);
- let result = shellwords.words().to_vec();
+ let result = shellwords.args().collect::<Vec<_>>();
let expected = vec![
- Cow::from(":o"),
- Cow::from("single_word"),
- Cow::from("twó wörds"),
- Cow::from(r#"three' "with escaping\"#),
- Cow::from("quote incomplete"),
+ "single_word",
+ "twó wörds",
+ "",
+ " ",
+ r#"\three\' \"with\ escaping\\"#,
+ "quote incomplete",
];
assert_eq!(expected, result);
}
#[test]
- #[cfg(unix)]
- fn test_dquoted() {
+ fn should_parse_double_quotes_while_respecting_escapes() {
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#;
let shellwords = Shellwords::from(dquoted);
- let result = shellwords.words().to_vec();
+ let result = shellwords.args().collect::<Vec<_>>();
let expected = vec![
- Cow::from(":o"),
- Cow::from("single_word"),
- Cow::from("twó wörds"),
- Cow::from(r#"three' "with escaping\"#),
- Cow::from("dquote incomplete"),
+ "single_word",
+ "twó wörds",
+ "",
+ " ",
+ r#"\three\' \"with\ escaping\\"#,
+ "dquote incomplete",
];
assert_eq!(expected, result);
}
#[test]
- #[cfg(unix)]
- fn test_mixed() {
+ fn should_respect_escapes_with_mixed_quotes() {
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;
let shellwords = Shellwords::from(dquoted);
- let result = shellwords.words().to_vec();
+ let result = shellwords.args().collect::<Vec<_>>();
let expected = vec![
- Cow::from(":o"),
- Cow::from("single_word"),
- Cow::from("twó wörds"),
- Cow::from("three' \"with escaping\\"),
- Cow::from("no space before"),
- Cow::from("and after"),
- Cow::from("$#%^@"),
- Cow::from("%^&(%^"),
- Cow::from(")(*&^%"),
- Cow::from(r#"a\\b"#),
- //last ' just changes to quoted but since we dont have anything after it, it should be ignored
+ "single_word",
+ "twó wörds",
+ r#"\three\' \"with\ escaping\\"#,
+ "no space before",
+ "and after",
+ "$#%^@",
+ "%^&(%^",
+ r")(*&^%",
+ r"a\\\\\b",
+ // Last ' is important, as if the user input an accidental quote at the end, this should be checked in
+ // commands where there should only be one input and return an error rather than silently succeed.
+ "'",
];
assert_eq!(expected, result);
}
#[test]
- fn test_lists() {
- let input =
- r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#;
+ fn should_return_rest() {
+ let input = r#":set statusline.center ["file-type","file-encoding"]"#;
let shellwords = Shellwords::from(input);
- let result = shellwords.words().to_vec();
- let expected = vec![
- Cow::from(":set"),
- Cow::from("statusline.center"),
- Cow::from(r#"["file-type","file-encoding"]"#),
- Cow::from(r#"["list", "in", "quotes"]"#),
- ];
- assert_eq!(expected, result);
+ let mut args = shellwords.args();
+ assert_eq!(":set", shellwords.command());
+ assert_eq!(Some("statusline.center"), args.next());
+ assert_eq!(r#"["file-type","file-encoding"]"#, args.rest());
+ }
+
+ #[test]
+ fn should_return_no_args() {
+ let mut args = Args::parse("");
+ assert!(args.next().is_none());
+ assert!(args.is_empty());
+ assert!(args.arg_count() == 0);
+ }
+
+ #[test]
+ fn should_leave_escaped_quotes() {
+ let input = r#"\" \` \' \"with \'with \`with"#;
+ let result = Args::parse(input).collect::<Vec<_>>();
+ assert_eq!(r#"\""#, result[0]);
+ assert_eq!(r"\`", result[1]);
+ assert_eq!(r"\'", result[2]);
+ assert_eq!(r#"\"with"#, result[3]);
+ assert_eq!(r"\'with", result[4]);
+ assert_eq!(r"\`with", result[5]);
+ }
+
+ #[test]
+ fn should_leave_literal_newline_alone() {
+ let result = Args::parse(r"\n").collect::<Vec<_>>();
+ assert_eq!(r"\n", result[0]);
+ }
+
+ #[test]
+ fn should_leave_literal_unicode_alone() {
+ let result = Args::parse(r"\u{C}").collect::<Vec<_>>();
+ assert_eq!(r"\u{C}", result[0]);
}
#[test]
#[cfg(unix)]
- fn test_escaping_unix() {
+ fn should_escape_unix() {
assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar"));
assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar"));
@@ -316,35 +711,79 @@ mod test {
#[test]
#[cfg(windows)]
- fn test_escaping_windows() {
+ fn should_escape_windows() {
assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));
}
#[test]
- #[cfg(unix)]
- fn test_parts() {
- assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
- assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);
+ fn should_unescape_newline() {
+ let unescaped = unescape("hello\\nworld");
+ assert_eq!("hello\nworld", unescaped);
}
#[test]
- #[cfg(windows)]
- fn test_parts() {
- assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
- assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);
+ fn should_unescape_tab() {
+ let unescaped = unescape("hello\\tworld");
+ assert_eq!("hello\tworld", unescaped);
}
#[test]
- fn test_multibyte_at_end() {
- assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]);
- assert_eq!(
- Shellwords::from(":sh echo 𒀀").parts(),
- &[":sh", "echo", "𒀀"]
- );
- assert_eq!(
- Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(),
- &[":sh", "echo", "𒀀", "hello", "world𒀀"]
- );
+ fn should_unescape_unicode() {
+ let unescaped = unescape("hello\\u{1f929}world");
+ assert_eq!("hello\u{1f929}world", unescaped, "char: 🤩 ");
+ assert_eq!("hello🤩world", unescaped);
+ }
+
+ #[test]
+ fn should_return_original_input_due_to_bad_unicode() {
+ let unescaped = unescape("hello\\u{999999999}world");
+ assert_eq!("hello\\u{999999999}world", unescaped);
+ }
+
+ #[test]
+ fn should_not_unescape_slash() {
+ let unescaped = unescape(r"hello\\world");
+ assert_eq!(r"hello\\world", unescaped);
+
+ let unescaped = unescape(r"hello\\\\world");
+ assert_eq!(r"hello\\\\world", unescaped);
+ }
+
+ #[test]
+ fn should_not_unescape_slash_single_quote() {
+ let unescaped = unescape("\\'");
+ assert_eq!(r"\'", unescaped);
+ }
+
+ #[test]
+ fn should_not_unescape_slash_double_quote() {
+ let unescaped = unescape("\\\"");
+ assert_eq!(r#"\""#, unescaped);
+ }
+
+ #[test]
+ fn should_not_change_anything() {
+ let unescaped = unescape("'");
+ assert_eq!("'", unescaped);
+ let unescaped = unescape(r#"""#);
+ assert_eq!(r#"""#, unescaped);
+ }
+
+ #[test]
+ fn should_only_unescape_newline_not_slash_single_quote() {
+ let unescaped = unescape("\\n\'");
+ assert_eq!("\n'", unescaped);
+ let unescaped = unescape("\\n\\'");
+ assert_eq!("\n\\'", unescaped);
+ }
+
+ #[test]
+ fn should_unescape_args() {
+ // 1f929: 🤩
+ let args = Args::parse(r#"'hello\u{1f929} world' '["hello", "\u{1f929}", "world"]'"#)
+ .collect::<Vec<_>>();
+ assert_eq!("hello\u{1f929} world", unescape(args[0]));
+ assert_eq!(r#"["hello", "🤩", "world"]"#, unescape(args[1]));
}
}