diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a114a272a..2bf71d3ce 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,7 +49,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: test - args: --features "url/serde" + args: --features "url/serde,url/expose_internals" # The #[debugger_visualizer] attribute is currently gated behind an unstable feature flag. # In order to test the visualizers for the url crate, they have to be tested on a nightly build. - name: Run debugger_visualizer tests diff --git a/url/Cargo.toml b/url/Cargo.toml index 1747e4a97..1eaf15526 100644 --- a/url/Cargo.toml +++ b/url/Cargo.toml @@ -38,6 +38,8 @@ default = ["idna"] # UNSTABLE FEATURES (requires Rust nightly) # Enable to use the #[debugger_visualizer] attribute. debugger_visualizer = [] +# Expose internal offsets of the URL. +expose_internals = [] [[bench]] name = "parse_url" diff --git a/url/src/quirks.rs b/url/src/quirks.rs index 3ecb3c24d..3a5106402 100644 --- a/url/src/quirks.rs +++ b/url/src/quirks.rs @@ -14,6 +14,49 @@ use crate::parser::{default_port, Context, Input, Parser, SchemeType}; use crate::{Host, ParseError, Position, Url}; +/// Internal components / offsets of a URL. +/// +/// https://user@pass:example.com:1234/foo/bar?baz#quux +/// | | | | ^^^^| | | +/// | | | | | | | `----- fragment_start +/// | | | | | | `--------- query_start +/// | | | | | `----------------- path_start +/// | | | | `--------------------- port +/// | | | `----------------------- host_end +/// | | `---------------------------------- host_start +/// | `--------------------------------------- username_end +/// `---------------------------------------------- scheme_end +#[derive(Copy, Clone)] +#[cfg(feature = "expose_internals")] +pub struct InternalComponents { + pub scheme_end: u32, + pub username_end: u32, + pub host_start: u32, + pub host_end: u32, + pub port: Option, + pub path_start: u32, + pub query_start: Option, + pub fragment_start: Option, +} + +/// Internal component / parsed offsets of the URL. +/// +/// This can be useful for implementing efficient serialization +/// for the URL. +#[cfg(feature = "expose_internals")] +pub fn internal_components(url: &Url) -> InternalComponents { + InternalComponents { + scheme_end: url.scheme_end, + username_end: url.username_end, + host_start: url.host_start, + host_end: url.host_end, + port: url.port, + path_start: url.path_start, + query_start: url.query_start, + fragment_start: url.fragment_start, + } +} + /// https://url.spec.whatwg.org/#dom-url-domaintoascii pub fn domain_to_ascii(domain: &str) -> String { match Host::parse(domain) { diff --git a/url/tests/unit.rs b/url/tests/unit.rs index b0dbd57f9..b0fd5a3c1 100644 --- a/url/tests/unit.rs +++ b/url/tests/unit.rs @@ -731,6 +731,34 @@ fn test_domain_encoding_quirks() { } } +#[cfg(feature = "expose_internals")] +#[test] +fn test_expose_internals() { + use url::quirks::internal_components; + use url::quirks::InternalComponents; + + let url = Url::parse("https://example.com/path/file.ext?key=val&key2=val2#fragment").unwrap(); + let InternalComponents { + scheme_end, + username_end, + host_start, + host_end, + port, + path_start, + query_start, + fragment_start, + } = internal_components(&url); + + assert_eq!(scheme_end, 5); + assert_eq!(username_end, 8); + assert_eq!(host_start, 8); + assert_eq!(host_end, 19); + assert_eq!(port, None); + assert_eq!(path_start, 19); + assert_eq!(query_start, Some(33)); + assert_eq!(fragment_start, Some(51)); +} + #[test] fn test_windows_unc_path() { if !cfg!(windows) {