Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/shellwords.rs')
-rw-r--r--helix-core/src/shellwords.rs350
1 files changed, 350 insertions, 0 deletions
diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs
new file mode 100644
index 00000000..9d873c36
--- /dev/null
+++ b/helix-core/src/shellwords.rs
@@ -0,0 +1,350 @@
+use std::borrow::Cow;
+
+/// Auto escape for shellwords usage.
+pub fn escape(input: Cow<str>) -> Cow<str> {
+ if !input.chars().any(|x| x.is_ascii_whitespace()) {
+ input
+ } else if cfg!(unix) {
+ Cow::Owned(input.chars().fold(String::new(), |mut buf, c| {
+ if c.is_ascii_whitespace() {
+ buf.push('\\');
+ }
+ buf.push(c);
+ buf
+ }))
+ } else {
+ Cow::Owned(format!("\"{}\"", input))
+ }
+}
+
+enum State {
+ OnWhitespace,
+ Unquoted,
+ UnquotedEscaped,
+ Quoted,
+ QuoteEscaped,
+ Dquoted,
+ DquoteEscaped,
+}
+
+pub struct Shellwords<'a> {
+ state: State,
+ /// Shellwords where whitespace and escapes has been resolved.
+ words: Vec<Cow<'a, str>>,
+ /// The parts of the input that are divided into shellwords. This can be
+ /// used to retrieve the original text for a given word by looking up the
+ /// same index in the Vec as the word in `words`.
+ parts: Vec<&'a str>,
+}
+
+impl<'a> From<&'a str> for Shellwords<'a> {
+ fn from(input: &'a str) -> Self {
+ use State::*;
+
+ let mut state = Unquoted;
+ let mut words = Vec::new();
+ let mut parts = Vec::new();
+ let mut escaped = String::with_capacity(input.len());
+
+ let mut part_start = 0;
+ let mut unescaped_start = 0;
+ let mut end = 0;
+
+ for (i, c) in input.char_indices() {
+ state = match state {
+ OnWhitespace => match c {
+ '"' => {
+ end = i;
+ Dquoted
+ }
+ '\'' => {
+ end = i;
+ Quoted
+ }
+ '\\' => {
+ if cfg!(unix) {
+ escaped.push_str(&input[unescaped_start..i]);
+ unescaped_start = i + 1;
+ UnquotedEscaped
+ } else {
+ OnWhitespace
+ }
+ }
+ c if c.is_ascii_whitespace() => {
+ end = i;
+ OnWhitespace
+ }
+ _ => Unquoted,
+ },
+ Unquoted => match c {
+ '\\' => {
+ if cfg!(unix) {
+ escaped.push_str(&input[unescaped_start..i]);
+ unescaped_start = i + 1;
+ UnquotedEscaped
+ } else {
+ Unquoted
+ }
+ }
+ c if c.is_ascii_whitespace() => {
+ end = i;
+ OnWhitespace
+ }
+ _ => Unquoted,
+ },
+ UnquotedEscaped => Unquoted,
+ Quoted => match c {
+ '\\' => {
+ if cfg!(unix) {
+ escaped.push_str(&input[unescaped_start..i]);
+ unescaped_start = i + 1;
+ QuoteEscaped
+ } else {
+ Quoted
+ }
+ }
+ '\'' => {
+ end = i;
+ OnWhitespace
+ }
+ _ => Quoted,
+ },
+ QuoteEscaped => Quoted,
+ Dquoted => match c {
+ '\\' => {
+ if cfg!(unix) {
+ escaped.push_str(&input[unescaped_start..i]);
+ unescaped_start = i + 1;
+ DquoteEscaped
+ } else {
+ Dquoted
+ }
+ }
+ '"' => {
+ end = i;
+ OnWhitespace
+ }
+ _ => Dquoted,
+ },
+ DquoteEscaped => Dquoted,
+ };
+
+ let c_len = c.len_utf8();
+ if i == input.len() - c_len && end == 0 {
+ end = i + c_len;
+ }
+
+ if end > 0 {
+ let esc_trim = escaped.trim();
+ let inp = &input[unescaped_start..end];
+
+ if !(esc_trim.is_empty() && inp.trim().is_empty()) {
+ if esc_trim.is_empty() {
+ words.push(inp.into());
+ parts.push(inp);
+ } else {
+ words.push([escaped, inp.into()].concat().into());
+ parts.push(&input[part_start..end]);
+ escaped = "".to_string();
+ }
+ }
+ unescaped_start = i + 1;
+ part_start = i + 1;
+ end = 0;
+ }
+ }
+
+ debug_assert!(words.len() == parts.len());
+
+ Self {
+ state,
+ words,
+ parts,
+ }
+ }
+}
+
+impl<'a> Shellwords<'a> {
+ /// Checks that the input ends with a whitespace character which is not escaped.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use helix_core::shellwords::Shellwords;
+ /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
+ /// #[cfg(unix)]
+ /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
+ /// #[cfg(unix)]
+ /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
+ /// ```
+ pub fn ends_with_whitespace(&self) -> bool {
+ matches!(self.state, State::OnWhitespace)
+ }
+
+ /// Returns the list of shellwords calculated from the input string.
+ pub fn words(&self) -> &[Cow<'a, str>] {
+ &self.words
+ }
+
+ /// Returns a list of strings which correspond to [`Self::words`] but represent the original
+ /// text in the input string - including escape characters - without separating whitespace.
+ pub fn parts(&self) -> &[&'a str] {
+ &self.parts
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ #[cfg(windows)]
+ fn test_normal() {
+ let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
+ let shellwords = Shellwords::from(input);
+ let result = shellwords.words().to_vec();
+ let expected = vec![
+ Cow::from(":o"),
+ Cow::from("single_word"),
+ Cow::from("twó"),
+ Cow::from("wörds"),
+ Cow::from("\\three\\"),
+ Cow::from("\\"),
+ Cow::from("with\\ escaping\\\\"),
+ ];
+ // TODO test is_owned and is_borrowed, once they get stabilized.
+ assert_eq!(expected, result);
+ }
+
+ #[test]
+ #[cfg(unix)]
+ fn test_normal() {
+ let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
+ let shellwords = Shellwords::from(input);
+ let result = shellwords.words().to_vec();
+ let expected = vec![
+ Cow::from(":o"),
+ Cow::from("single_word"),
+ Cow::from("twó"),
+ Cow::from("wörds"),
+ Cow::from(r#"three "with escaping\"#),
+ ];
+ // TODO test is_owned and is_borrowed, once they get stabilized.
+ assert_eq!(expected, result);
+ }
+
+ #[test]
+ #[cfg(unix)]
+ fn test_quoted() {
+ let quoted =
+ r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;
+ let shellwords = Shellwords::from(quoted);
+ let result = shellwords.words().to_vec();
+ let expected = vec![
+ Cow::from(":o"),
+ Cow::from("single_word"),
+ Cow::from("twó wörds"),
+ Cow::from(r#"three' "with escaping\"#),
+ Cow::from("quote incomplete"),
+ ];
+ assert_eq!(expected, result);
+ }
+
+ #[test]
+ #[cfg(unix)]
+ fn test_dquoted() {
+ let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#;
+ let shellwords = Shellwords::from(dquoted);
+ let result = shellwords.words().to_vec();
+ let expected = vec![
+ Cow::from(":o"),
+ Cow::from("single_word"),
+ Cow::from("twó wörds"),
+ Cow::from(r#"three' "with escaping\"#),
+ Cow::from("dquote incomplete"),
+ ];
+ assert_eq!(expected, result);
+ }
+
+ #[test]
+ #[cfg(unix)]
+ fn test_mixed() {
+ let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;
+ let shellwords = Shellwords::from(dquoted);
+ let result = shellwords.words().to_vec();
+ let expected = vec![
+ Cow::from(":o"),
+ Cow::from("single_word"),
+ Cow::from("twó wörds"),
+ Cow::from("three' \"with escaping\\"),
+ Cow::from("no space before"),
+ Cow::from("and after"),
+ Cow::from("$#%^@"),
+ Cow::from("%^&(%^"),
+ Cow::from(")(*&^%"),
+ Cow::from(r#"a\\b"#),
+ //last ' just changes to quoted but since we dont have anything after it, it should be ignored
+ ];
+ assert_eq!(expected, result);
+ }
+
+ #[test]
+ fn test_lists() {
+ let input =
+ r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#;
+ let shellwords = Shellwords::from(input);
+ let result = shellwords.words().to_vec();
+ let expected = vec![
+ Cow::from(":set"),
+ Cow::from("statusline.center"),
+ Cow::from(r#"["file-type","file-encoding"]"#),
+ Cow::from(r#"["list", "in", "quotes"]"#),
+ ];
+ assert_eq!(expected, result);
+ }
+
+ #[test]
+ #[cfg(unix)]
+ fn test_escaping_unix() {
+ assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
+ assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar"));
+ assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar"));
+ }
+
+ #[test]
+ #[cfg(windows)]
+ fn test_escaping_windows() {
+ assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
+ assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));
+ }
+
+ #[test]
+ #[cfg(unix)]
+ fn test_parts() {
+ assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
+ assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);
+ }
+
+ #[test]
+ #[cfg(windows)]
+ fn test_parts() {
+ assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
+ assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);
+ }
+
+ #[test]
+ fn test_multibyte_at_end() {
+ assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]);
+ assert_eq!(
+ Shellwords::from(":sh echo 𒀀").parts(),
+ &[":sh", "echo", "𒀀"]
+ );
+ assert_eq!(
+ Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(),
+ &[":sh", "echo", "𒀀", "hello", "world𒀀"]
+ );
+ }
+}