Unnamed repository; edit this file 'description' to name the repository.
Replace `url::Url` with a String wrapper
Michael Davis 2024-12-21
parent 652e316 · commit b84c9a8
-rw-r--r--helix-lsp-types/Cargo.toml2
-rw-r--r--helix-lsp-types/README.md4
-rw-r--r--helix-lsp-types/src/lib.rs156
3 files changed, 146 insertions, 16 deletions
diff --git a/helix-lsp-types/Cargo.toml b/helix-lsp-types/Cargo.toml
index 308edad6..535cefba 100644
--- a/helix-lsp-types/Cargo.toml
+++ b/helix-lsp-types/Cargo.toml
@@ -25,7 +25,7 @@ bitflags = "2.6.0"
serde = { version = "1.0.216", features = ["derive"] }
serde_json = "1.0.133"
serde_repr = "0.1"
-url = {version = "2.5.4", features = ["serde"]}
+percent-encoding.workspace = true
[features]
default = []
diff --git a/helix-lsp-types/README.md b/helix-lsp-types/README.md
index 01803be1..de19757f 100644
--- a/helix-lsp-types/README.md
+++ b/helix-lsp-types/README.md
@@ -1,3 +1,5 @@
# Helix's `lsp-types`
-This is a fork of the [`lsp-types`](https://crates.io/crates/lsp-types) crate ([`gluon-lang/lsp-types`](https://github.com/gluon-lang/lsp-types)) taken at version v0.95.1 (commit [3e6daee](https://github.com/gluon-lang/lsp-types/commit/3e6daee771d14db4094a554b8d03e29c310dfcbe)). This fork focuses usability improvements that make the types easier to work with for the Helix codebase. For example the URL type - the `uri` crate at this version of `lsp-types` - will be replaced with a wrapper around a string.
+This is a fork of the [`lsp-types`](https://crates.io/crates/lsp-types) crate ([`gluon-lang/lsp-types`](https://github.com/gluon-lang/lsp-types)) taken at version v0.95.1 (commit [3e6daee](https://github.com/gluon-lang/lsp-types/commit/3e6daee771d14db4094a554b8d03e29c310dfcbe)). This fork focuses on usability improvements that make the types easier to work with for the Helix codebase.
+
+The URL type has been replaced with a newtype wrapper of a `String`. The `lsp-types` crate at the forked version used [`url::Url`](https://docs.rs/url/2.5.0/url/struct.Url.html) which provides conveniences for using URLs according to [the WHATWG URL spec](https://url.spec.whatwg.org). Helix supports a subset of valid URLs, namely the `file://` scheme, so a wrapper around a normal `String` is sufficient. Plus the LSP spec requires URLs to be in [RFC3986](https://tools.ietf.org/html/rfc3986) format instead.
diff --git a/helix-lsp-types/src/lib.rs b/helix-lsp-types/src/lib.rs
index 41c483f4..5024f6e7 100644
--- a/helix-lsp-types/src/lib.rs
+++ b/helix-lsp-types/src/lib.rs
@@ -3,27 +3,155 @@
Language Server Protocol types for Rust.
Based on: <https://microsoft.github.io/language-server-protocol/specification>
-
-This library uses the URL crate for parsing URIs. Note that there is
-some confusion on the meaning of URLs vs URIs:
-<http://stackoverflow.com/a/28865728/393898>. According to that
-information, on the classical sense of "URLs", "URLs" are a subset of
-URIs, But on the modern/new meaning of URLs, they are the same as
-URIs. The important take-away aspect is that the URL crate should be
-able to parse any URI, such as `urn:isbn:0451450523`.
-
-
*/
#![allow(non_upper_case_globals)]
#![forbid(unsafe_code)]
use bitflags::bitflags;
-use std::{collections::HashMap, fmt::Debug};
+use std::{collections::HashMap, fmt::Debug, path::Path};
use serde::{de, de::Error as Error_, Deserialize, Serialize};
use serde_json::Value;
-pub use url::Url;
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize)]
+pub struct Url(String);
+
+// <https://datatracker.ietf.org/doc/html/rfc3986#section-2.2>, also see
+// <https://github.com/microsoft/vscode-uri/blob/6dec22d7dcc6c63c30343d3a8d56050d0078cb6a/src/uri.ts#L454-L477>
+const RESERVED: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
+ // GEN_DELIMS
+ .add(b':')
+ .add(b'/')
+ .add(b'?')
+ .add(b'#')
+ .add(b'[')
+ .add(b']')
+ .add(b'@')
+ // SUB_DELIMS
+ .add(b'!')
+ .add(b'$')
+ .add(b'&')
+ .add(b'\'')
+ .add(b'(')
+ .add(b')')
+ .add(b'*')
+ .add(b'+')
+ .add(b',')
+ .add(b';')
+ .add(b'=');
+
+impl Url {
+ #[allow(clippy::result_unit_err)]
+ #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
+ pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Self, ()> {
+ #[cfg(any(unix, target_os = "redox"))]
+ use std::os::unix::prelude::OsStrExt;
+ #[cfg(target_os = "wasi")]
+ use std::os::wasi::prelude::OsStrExt;
+
+ let mut serialization = String::from("file://");
+ // skip the root component
+ for component in path.as_ref().components().skip(1) {
+ serialization.push('/');
+ serialization.extend(percent_encoding::percent_encode(
+ component.as_os_str().as_bytes(),
+ RESERVED,
+ ));
+ }
+ if &serialization == "file://" {
+ // An URL's path must not be empty.
+ serialization.push('/');
+ }
+ Ok(Self(serialization))
+ }
+
+ #[allow(clippy::result_unit_err)]
+ #[cfg(windows)]
+ pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Self, ()> {
+ from_file_path_windows(path.as_ref())
+ }
+
+ #[allow(clippy::result_unit_err)]
+ #[cfg_attr(not(windows), allow(dead_code))]
+ fn from_file_path_windows(path: &Path) -> Result<Self, ()> {
+ use std::path::{Component, Prefix};
+
+ fn is_windows_drive_letter(segment: &str) -> bool {
+ segment.len() == 2
+ && (segment.as_bytes()[0] as char).is_ascii_alphabetic()
+ && matches!(segment.as_bytes()[1], b':' | b'|')
+ }
+
+ assert!(path.is_absolute());
+ let mut serialization = String::from("file://");
+ let mut components = path.components();
+ let host_start = serialization.len() + 1;
+
+ match components.next() {
+ Some(Component::Prefix(ref p)) => match p.kind() {
+ Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
+ serialization.push('/');
+ serialization.push(letter as char);
+ serialization.push(':');
+ }
+ // TODO: Prefix::UNC | Prefix::VerbatimUNC
+ _ => todo!("support UNC drives"),
+ },
+ _ => unreachable!("absolute windows paths must start with a prefix"),
+ }
+
+ let mut path_only_has_prefix = true;
+ for component in components {
+ if component == Component::RootDir {
+ continue;
+ }
+
+ path_only_has_prefix = false;
+
+ serialization.push('/');
+ serialization.extend(percent_encoding::percent_encode(
+ component.as_os_str().as_encoded_bytes(),
+ RESERVED,
+ ));
+ }
+
+ if serialization.len() > host_start
+ && is_windows_drive_letter(&serialization[host_start..])
+ && path_only_has_prefix
+ {
+ serialization.push('/');
+ }
+
+ Ok(Self(serialization))
+ }
+
+ #[allow(clippy::result_unit_err)]
+ pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Self, ()> {
+ let Self(mut serialization) = Self::from_file_path(path)?;
+ if !serialization.ends_with('/') {
+ serialization.push('/');
+ }
+ Ok(Self(serialization))
+ }
+
+ /// Returns the serialized representation of the URL as a `&str`
+ pub fn as_str(&self) -> &str {
+ &self.0
+ }
+
+ /// Consumes the URL, converting into a `String`.
+ /// Note that the string is the serialized representation of the URL.
+ pub fn into_string(self) -> String {
+ self.0
+ }
+}
+
+impl From<&str> for Url {
+ fn from(value: &str) -> Self {
+ Self(value.to_string())
+ }
+}
// Large enough to contain any enumeration name defined in this crate
type PascalCaseBuf = [u8; 32];
@@ -2843,14 +2971,14 @@ mod tests {
test_serialization(
&WorkspaceEdit {
changes: Some(
- vec![(Url::parse("file://test").unwrap(), vec![])]
+ vec![(Url::from("file://test"), vec![])]
.into_iter()
.collect(),
),
document_changes: None,
..Default::default()
},
- r#"{"changes":{"file://test/":[]}}"#,
+ r#"{"changes":{"file://test":[]}}"#,
);
}