Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use web_sys::Url for IDN conversion on wasm32-unknown-unknown #887

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ jobs:
- uses: dtolnay/rust-toolchain@stable
with:
targets: wasm32-unknown-unknown
- name: Install wasm-pack
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- run: cargo build --target wasm32-unknown-unknown
- run: cd url && wasm-pack test --headless --chrome
- run: cd url && wasm-pack test --headless --firefox

Lint:
runs-on: ubuntu-latest
Expand Down
11 changes: 10 additions & 1 deletion url/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,21 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
bencher = "0.1"

[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dev-dependencies]
wasm-bindgen-test = "0.3"
web-sys = { version = "0.3.65", features = ["Navigator", "Window"] }

[dependencies]
form_urlencoded = { version = "1.2.1", path = "../form_urlencoded" }
idna = { version = "0.5.0", path = "../idna" }
percent-encoding = { version = "2.3.1", path = "../percent_encoding" }
serde = { version = "1.0", optional = true, features = ["derive"] }

[target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dependencies]
idna = { version = "0.5.0", path = "../idna" }

[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies]
web-sys = { version = "0.3.65", features = ["Url"] }

[features]
default = []
# Enable to use the #[debugger_visualizer] attribute. This feature requires Rust >= 1.71.
Expand Down
81 changes: 57 additions & 24 deletions url/src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,29 +89,7 @@ impl Host<String> {
return Err(ParseError::EmptyHost);
}

let is_invalid_domain_char = |c| {
matches!(
c,
'\0'..='\u{001F}'
| ' '
| '#'
| '%'
| '/'
| ':'
| '<'
| '>'
| '?'
| '@'
| '['
| '\\'
| ']'
| '^'
| '\u{007F}'
| '|'
)
};

if domain.find(is_invalid_domain_char).is_some() {
if domain.find(Self::is_invalid_domain_char).is_some() {
Err(ParseError::InvalidDomainCharacter)
} else if ends_in_a_number(&domain) {
let address = parse_ipv4addr(&domain)?;
Expand Down Expand Up @@ -161,10 +139,65 @@ impl Host<String> {
}
}

/// convert domain with idna
#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
/// Convert IDN domain to ASCII form with [idna]
fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
idna::domain_to_ascii(domain).map_err(Into::into)
}

#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
const SENTINEL_HOSTNAME: &'static str = "url-host-web-sys-sentinel";
#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
const SENTINEL_URL: &'static str = "http://url-host-web-sys-sentinel";

#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
/// Convert IDN domain to ASCII form with [web_sys::Url]
fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
debug_assert!(Self::SENTINEL_URL.ends_with(Self::SENTINEL_HOSTNAME));
// Url throws an error on empty hostnames
if domain.is_empty() {
return Ok(domain.to_string());
}
// Url returns strange results for invalid domain chars
if domain.contains(Self::is_invalid_domain_char) {
return Err(ParseError::InvalidDomainCharacter);
}

// Create a new Url with a sentinel value.
let u = web_sys::Url::new(Self::SENTINEL_URL).map_err(|_| ParseError::IdnaError)?;
debug_assert_eq!(u.hostname(), Self::SENTINEL_HOSTNAME);
// Whenever set_hostname fails, it doesn't update the Url.
u.set_hostname(domain);
let h = u.hostname();
if h.eq_ignore_ascii_case(Self::SENTINEL_HOSTNAME) || h.is_empty() {
// It's probably invalid
Err(ParseError::IdnaError)
} else {
Ok(h)
}
}

fn is_invalid_domain_char(c: char) -> bool {
matches!(
c,
'\0'..='\u{001F}'
| ' '
| '#'
| '%'
| '/'
| ':'
| '<'
| '>'
| '?'
| '@'
| '['
| '\\'
| ']'
| '^'
| '\u{007F}'
| '|'
)
}
}

impl<S: AsRef<str>> fmt::Display for Host<S> {
Expand Down
6 changes: 5 additions & 1 deletion url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,12 @@ use std::borrow::Borrow;
use std::cmp;
use std::fmt::{self, Write};
use std::hash;
#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
use std::io;
use std::mem;
use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
use std::net::IpAddr;
#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
use std::net::{SocketAddr, ToSocketAddrs};
use std::ops::{Range, RangeFrom, RangeTo};
use std::path::{Path, PathBuf};
use std::str;
Expand Down Expand Up @@ -1252,6 +1255,7 @@ impl Url {
/// })
/// }
/// ```
#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
pub fn socket_addrs(
&self,
default_port_number: impl Fn() -> Option<u16>,
Expand Down
10 changes: 9 additions & 1 deletion url/src/origin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,15 @@ impl Origin {
}
}

/// <https://html.spec.whatwg.org/multipage/#unicode-serialisation-of-an-origin>
#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
#[deprecated]
/// [The Unicode serialization of an origin][0].
///
/// This [has been removed from the standard][1] because it was never widely
/// adopted, and was difficult to use.
///
/// [0]: https://html.spec.whatwg.org/multipage/#unicode-serialisation-of-an-origin
/// [1]: https://github.com/whatwg/html/pull/2689
pub fn unicode_serialization(&self) -> String {
match *self {
Origin::Opaque(_) => "null".to_owned(),
Expand Down
1 change: 1 addition & 0 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ simple_enum_error! {
Overflow => "URLs more than 4 GB are not supported",
}

#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
impl From<::idna::Errors> for ParseError {
fn from(_: ::idna::Errors) -> ParseError {
ParseError::IdnaError
Expand Down
19 changes: 17 additions & 2 deletions url/src/quirks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,30 @@ pub fn internal_components(url: &Url) -> InternalComponents {
}
}

/// https://url.spec.whatwg.org/#dom-url-domaintoascii
/// Converts a domain name to its ASCII (punycode) form.
///
/// This feature was never implemented by browsers, and
/// [has been removed from the URL spec][0].
///
/// [0]: https://github.com/whatwg/url/issues/63
#[deprecated]
pub fn domain_to_ascii(domain: &str) -> String {
match Host::parse(domain) {
Ok(Host::Domain(domain)) => domain,
_ => String::new(),
}
}

/// https://url.spec.whatwg.org/#dom-url-domaintounicode
#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
#[deprecated]
/// Converts a domain name to its Unicode form.
///
/// This feature was never implemented by browsers, and
/// [has been removed from the URL spec][0].
///
/// This feature is not available on `wasm32-unknown-unknown` targets.
///
/// [0]: https://github.com/whatwg/url/issues/63
pub fn domain_to_unicode(domain: &str) -> String {
match Host::parse(domain) {
Ok(Host::Domain(ref domain)) => {
Expand Down
14 changes: 14 additions & 0 deletions url/tests/expected_failures_chromium.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<http://\u{ff05}\u{ff14}\u{ff11}.com> against <http://other.com/>
<http://%ef%bc%85%ef%bc%94%ef%bc%91.com> against <http://other.com/>
<http://!\"$&\'()*+,-.;=_`{}~/>
<wss://!\"$&\'()*+,-.;=_`{}~/>
<https://example.com/> set host to <xn-->
<https://example.com/> set hostname to <xn-->
<http://a.b.c.xn--pokxncvks>
<http://10.0.0.xn--pokxncvks>
<http://a.b.c.XN--pokxncvks>
<http://a.b.c.Xn--pokxncvks>
<http://10.0.0.XN--pokxncvks>
<http://10.0.0.xN--pokxncvks>
<file://xn--/p>
<https://xn--/>
2 changes: 2 additions & 0 deletions url/tests/expected_failures_firefox.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<http://!\"$&\'()*+,-.;=_`{}~/>
<wss://!\"$&\'()*+,-.;=_`{}~/>
2 changes: 2 additions & 0 deletions url/tests/expected_failures_safari.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<http://\u{ff05}\u{ff14}\u{ff11}.com> against <http://other.com/>
<http://%ef%bc%85%ef%bc%94%ef%bc%91.com> against <http://other.com/>
55 changes: 35 additions & 20 deletions url/tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,16 @@
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::net::{Ipv4Addr, Ipv6Addr};
#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
use std::path::{Path, PathBuf};
use url::{form_urlencoded, Host, Origin, Url};

// https://rustwasm.github.io/wasm-bindgen/wasm-bindgen-test/usage.html
#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
use wasm_bindgen_test::{wasm_bindgen_test as test, wasm_bindgen_test_configure};
#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
wasm_bindgen_test_configure!(run_in_browser);

#[test]
fn size() {
use std::mem::size_of;
Expand Down Expand Up @@ -117,6 +124,7 @@ fn test_set_empty_query() {
assert_eq!(base.as_str(), "moz://example.com/path");
}

#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
macro_rules! assert_from_file_path {
($path: expr) => {
assert_from_file_path!($path, $path)
Expand All @@ -130,6 +138,7 @@ macro_rules! assert_from_file_path {
}

#[test]
#[cfg(any(unix, windows))]
fn new_file_paths() {
if cfg!(unix) {
assert_eq!(Url::from_file_path(Path::new("relative")), Err(()));
Expand Down Expand Up @@ -162,28 +171,28 @@ fn new_path_bad_utf8() {
}

#[test]
#[cfg(windows)]
fn new_path_windows_fun() {
if cfg!(windows) {
assert_from_file_path!(r"C:\foo\bar", "/C:/foo/bar");
assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r");
assert_from_file_path!(r"C:\foo\bar", "/C:/foo/bar");
assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r");

// Invalid UTF-8
assert!(Url::parse("file:///C:/foo/ba%80r")
.unwrap()
.to_file_path()
.is_err());
// Invalid UTF-8
assert!(Url::parse("file:///C:/foo/ba%80r")
.unwrap()
.to_file_path()
.is_err());

// test windows canonicalized path
let path = PathBuf::from(r"\\?\C:\foo\bar");
assert!(Url::from_file_path(path).is_ok());
// test windows canonicalized path
let path = PathBuf::from(r"\\?\C:\foo\bar");
assert!(Url::from_file_path(path).is_ok());

// Percent-encoded drive letter
let url = Url::parse("file:///C%3A/foo/bar").unwrap();
assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar")));
}
// Percent-encoded drive letter
let url = Url::parse("file:///C%3A/foo/bar").unwrap();
assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar")));
}

#[test]
#[cfg(any(unix, windows))]
fn new_directory_paths() {
if cfg!(unix) {
assert_eq!(Url::from_directory_path(Path::new("relative")), Err(()));
Expand Down Expand Up @@ -439,6 +448,7 @@ fn issue_61() {
}

#[test]
#[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
#[cfg(not(windows))]
/// https://github.com/servo/rust-url/issues/197
fn issue_197() {
Expand Down Expand Up @@ -587,6 +597,8 @@ fn test_origin_opaque() {
}

#[test]
#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
#[allow(deprecated)]
fn test_origin_unicode_serialization() {
let data = [
("http://😅.com", "http://😅.com"),
Expand Down Expand Up @@ -623,6 +635,7 @@ fn test_origin_unicode_serialization() {
}

#[test]
#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
fn test_socket_addrs() {
use std::net::ToSocketAddrs;

Expand Down Expand Up @@ -759,8 +772,11 @@ fn test_set_href() {
}

#[test]
#[allow(deprecated)]
fn test_domain_encoding_quirks() {
use url::quirks::{domain_to_ascii, domain_to_unicode};
use url::quirks::domain_to_ascii;
#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
use url::quirks::domain_to_unicode;

let data = [
("http://example.com", "", ""),
Expand All @@ -771,6 +787,7 @@ fn test_domain_encoding_quirks() {

for url in &data {
assert_eq!(domain_to_ascii(url.0), url.1);
#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
assert_eq!(domain_to_unicode(url.0), url.2);
}
}
Expand Down Expand Up @@ -804,11 +821,8 @@ fn test_expose_internals() {
}

#[test]
#[cfg(windows)]
fn test_windows_unc_path() {
if !cfg!(windows) {
return;
}

let url = Url::from_file_path(Path::new(r"\\host\share\path\file.txt")).unwrap();
assert_eq!(url.as_str(), "file://host/share/path/file.txt");

Expand Down Expand Up @@ -928,6 +942,7 @@ fn test_url_from_file_path() {
}

/// https://github.com/servo/rust-url/issues/505
#[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
#[cfg(not(windows))]
#[test]
fn test_url_from_file_path() {
Expand Down