Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/shellwords.rs')
| -rw-r--r-- | helix-core/src/shellwords.rs | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs new file mode 100644 index 00000000..9d873c36 --- /dev/null +++ b/helix-core/src/shellwords.rs @@ -0,0 +1,350 @@ +use std::borrow::Cow; + +/// Auto escape for shellwords usage. +pub fn escape(input: Cow<str>) -> Cow<str> { + if !input.chars().any(|x| x.is_ascii_whitespace()) { + input + } else if cfg!(unix) { + Cow::Owned(input.chars().fold(String::new(), |mut buf, c| { + if c.is_ascii_whitespace() { + buf.push('\\'); + } + buf.push(c); + buf + })) + } else { + Cow::Owned(format!("\"{}\"", input)) + } +} + +enum State { + OnWhitespace, + Unquoted, + UnquotedEscaped, + Quoted, + QuoteEscaped, + Dquoted, + DquoteEscaped, +} + +pub struct Shellwords<'a> { + state: State, + /// Shellwords where whitespace and escapes has been resolved. + words: Vec<Cow<'a, str>>, + /// The parts of the input that are divided into shellwords. This can be + /// used to retrieve the original text for a given word by looking up the + /// same index in the Vec as the word in `words`. + parts: Vec<&'a str>, +} + +impl<'a> From<&'a str> for Shellwords<'a> { + fn from(input: &'a str) -> Self { + use State::*; + + let mut state = Unquoted; + let mut words = Vec::new(); + let mut parts = Vec::new(); + let mut escaped = String::with_capacity(input.len()); + + let mut part_start = 0; + let mut unescaped_start = 0; + let mut end = 0; + + for (i, c) in input.char_indices() { + state = match state { + OnWhitespace => match c { + '"' => { + end = i; + Dquoted + } + '\'' => { + end = i; + Quoted + } + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + UnquotedEscaped + } else { + OnWhitespace + } + } + c if c.is_ascii_whitespace() => { + end = i; + OnWhitespace + } + _ => Unquoted, + }, + Unquoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + UnquotedEscaped + } else { + Unquoted + } + } + c if c.is_ascii_whitespace() => { + end = i; + OnWhitespace + } + _ => Unquoted, + }, + UnquotedEscaped => Unquoted, + Quoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + QuoteEscaped + } else { + Quoted + } + } + '\'' => { + end = i; + OnWhitespace + } + _ => Quoted, + }, + QuoteEscaped => Quoted, + Dquoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + DquoteEscaped + } else { + Dquoted + } + } + '"' => { + end = i; + OnWhitespace + } + _ => Dquoted, + }, + DquoteEscaped => Dquoted, + }; + + let c_len = c.len_utf8(); + if i == input.len() - c_len && end == 0 { + end = i + c_len; + } + + if end > 0 { + let esc_trim = escaped.trim(); + let inp = &input[unescaped_start..end]; + + if !(esc_trim.is_empty() && inp.trim().is_empty()) { + if esc_trim.is_empty() { + words.push(inp.into()); + parts.push(inp); + } else { + words.push([escaped, inp.into()].concat().into()); + parts.push(&input[part_start..end]); + escaped = "".to_string(); + } + } + unescaped_start = i + 1; + part_start = i + 1; + end = 0; + } + } + + debug_assert!(words.len() == parts.len()); + + Self { + state, + words, + parts, + } + } +} + +impl<'a> Shellwords<'a> { + /// Checks that the input ends with a whitespace character which is not escaped. + /// + /// # Examples + /// + /// ```rust + /// use helix_core::shellwords::Shellwords; + /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false); + /// #[cfg(unix)] + /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false); + /// #[cfg(unix)] + /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false); + /// ``` + pub fn ends_with_whitespace(&self) -> bool { + matches!(self.state, State::OnWhitespace) + } + + /// Returns the list of shellwords calculated from the input string. + pub fn words(&self) -> &[Cow<'a, str>] { + &self.words + } + + /// Returns a list of strings which correspond to [`Self::words`] but represent the original + /// text in the input string - including escape characters - without separating whitespace. + pub fn parts(&self) -> &[&'a str] { + &self.parts + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + #[cfg(windows)] + fn test_normal() { + let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; + let shellwords = Shellwords::from(input); + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó"), + Cow::from("wörds"), + Cow::from("\\three\\"), + Cow::from("\\"), + Cow::from("with\\ escaping\\\\"), + ]; + // TODO test is_owned and is_borrowed, once they get stabilized. + assert_eq!(expected, result); + } + + #[test] + #[cfg(unix)] + fn test_normal() { + let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; + let shellwords = Shellwords::from(input); + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó"), + Cow::from("wörds"), + Cow::from(r#"three "with escaping\"#), + ]; + // TODO test is_owned and is_borrowed, once they get stabilized. + assert_eq!(expected, result); + } + + #[test] + #[cfg(unix)] + fn test_quoted() { + let quoted = + r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#; + let shellwords = Shellwords::from(quoted); + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó wörds"), + Cow::from(r#"three' "with escaping\"#), + Cow::from("quote incomplete"), + ]; + assert_eq!(expected, result); + } + + #[test] + #[cfg(unix)] + fn test_dquoted() { + let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#; + let shellwords = Shellwords::from(dquoted); + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó wörds"), + Cow::from(r#"three' "with escaping\"#), + Cow::from("dquote incomplete"), + ]; + assert_eq!(expected, result); + } + + #[test] + #[cfg(unix)] + fn test_mixed() { + let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#; + let shellwords = Shellwords::from(dquoted); + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twó wörds"), + Cow::from("three' \"with escaping\\"), + Cow::from("no space before"), + Cow::from("and after"), + Cow::from("$#%^@"), + Cow::from("%^&(%^"), + Cow::from(")(*&^%"), + Cow::from(r#"a\\b"#), + //last ' just changes to quoted but since we dont have anything after it, it should be ignored + ]; + assert_eq!(expected, result); + } + + #[test] + fn test_lists() { + let input = + r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#; + let shellwords = Shellwords::from(input); + let result = shellwords.words().to_vec(); + let expected = vec![ + Cow::from(":set"), + Cow::from("statusline.center"), + Cow::from(r#"["file-type","file-encoding"]"#), + Cow::from(r#"["list", "in", "quotes"]"#), + ]; + assert_eq!(expected, result); + } + + #[test] + #[cfg(unix)] + fn test_escaping_unix() { + assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar")); + assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar")); + assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar")); + } + + #[test] + #[cfg(windows)] + fn test_escaping_windows() { + assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar")); + assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\"")); + } + + #[test] + #[cfg(unix)] + fn test_parts() { + assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); + assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]); + } + + #[test] + #[cfg(windows)] + fn test_parts() { + assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); + assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]); + } + + #[test] + fn test_multibyte_at_end() { + assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]); + assert_eq!( + Shellwords::from(":sh echo 𒀀").parts(), + &[":sh", "echo", "𒀀"] + ); + assert_eq!( + Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(), + &[":sh", "echo", "𒀀", "hello", "world𒀀"] + ); + } +} |