Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional percent-encode sets #837

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
10 changes: 5 additions & 5 deletions idna/src/uts46.rs
Expand Up @@ -475,7 +475,7 @@ impl Idna {
errors
}

/// http://www.unicode.org/reports/tr46/#ToASCII
/// <http://www.unicode.org/reports/tr46/#ToASCII>
#[allow(clippy::wrong_self_convention)]
pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
let mut errors = self.to_ascii_inner(domain, out);
Expand All @@ -497,7 +497,7 @@ impl Idna {
errors.into()
}

/// http://www.unicode.org/reports/tr46/#ToUnicode
/// <http://www.unicode.org/reports/tr46/#ToUnicode>
#[allow(clippy::wrong_self_convention)]
pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
if is_simple(domain) {
Expand All @@ -518,7 +518,7 @@ pub struct Config {
use_idna_2008_rules: bool,
}

/// The defaults are that of https://url.spec.whatwg.org/#idna
/// The defaults are that of <https://url.spec.whatwg.org/#idna>
impl Default for Config {
fn default() -> Self {
Config {
Expand Down Expand Up @@ -566,14 +566,14 @@ impl Config {
self
}

/// http://www.unicode.org/reports/tr46/#ToASCII
/// <http://www.unicode.org/reports/tr46/#ToASCII>
pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
let mut result = String::with_capacity(domain.len());
let mut codec = Idna::new(self);
codec.to_ascii(domain, &mut result).map(|()| result)
}

/// http://www.unicode.org/reports/tr46/#ToUnicode
/// <http://www.unicode.org/reports/tr46/#ToUnicode>
pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
let mut codec = Idna::new(self);
let mut out = String::with_capacity(domain.len());
Expand Down
119 changes: 119 additions & 0 deletions percent_encoding/src/lib.rs
Expand Up @@ -116,6 +116,125 @@ pub const CONTROLS: &AsciiSet = &AsciiSet {
],
};

/// The fragment percent-encode set.
///
/// The C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`).
///
/// <https://url.spec.whatwg.org/#fragment-percent-encode-set>
pub const FRAGMENT: &AsciiSet = &CONTROLS
// U+0020 SPACE
.add(b' ')
// U+0022 (")
.add(b'"')
// U+003C (<)
.add(b'<')
// U+003E (>)
.add(b'>')
// U+0060 (`)
.add(b'`');

/// The query percent-encode set.
///
/// The C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>).
///
/// <https://url.spec.whatwg.org/#query-percent-encode-set>
pub const QUERY: &AsciiSet = &CONTROLS
// U+0020 SPACE
.add(b' ')
// U+0022 (")
.add(b'"')
// U+0023 (#)
.add(b'#')
// U+003C (<)
.add(b'<')
// U+003E (>)
.add(b'>');

/// The special-query percent-encode set.
///
/// The query percent-encode set and U+0027 (').
///
/// <https://url.spec.whatwg.org/#special-query-percent-encode-set>
pub const SPECIAL_QUERY: &AsciiSet = &QUERY
// U+0027 (')
.add(b'\'');

/// The path percent-encode set.
///
/// The query percent-encode set and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
///
/// <https://url.spec.whatwg.org/#path-percent-encode-set>
pub const PATH: &AsciiSet = &QUERY
// U+003F (?)
.add(b'?')
// U+0060 (`)
.add(b'`')
// U+007B ({)
.add(b'{')
// U+007D (})
.add(b'}');

/// The userinfo percent-encode set.
///
/// The path percent-encode set and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([) to U+005E (^), inclusive, and U+007C (|).
///
/// <https://url.spec.whatwg.org/#userinfo-percent-encode-set>
pub const USERINFO: &AsciiSet = &PATH
// U+002F (/)
.add(b'/')
// U+003A (:)
.add(b':')
// U+003B (;)
.add(b';')
// U+003D (=)
.add(b'=')
// U+0040 (@)
.add(b'@')
// U+005B ([)
.add(b'[')
// U+005C (\)
.add(b'\\')
// U+005D (])
.add(b']')
// U+005E (^)
.add(b'^')
// U+007C (|)
.add(b'|');

/// The component percent-encode set.
///
/// The userinfo percent-encode set and U+0024 ($) to U+0026 (&), inclusive, U+002B (+), and U+002C (,).
///
/// <https://url.spec.whatwg.org/#component-percent-encode-set>
pub const COMPONENT: &AsciiSet = &USERINFO
// U+0024 ($)
.add(b'$')
// U+0025 (%)
.add(b'%')
// U+0026 (&)
.add(b'&')
// U+002B (+)
.add(b'+')
// U+002C (,)
.add(b',');

/// The `application/x-www-form-urlencoded` percent-encode set.
///
/// The component percent-encode set and U+0021 (!), U+0027 (') to U+0029 RIGHT PARENTHESIS, inclusive, and U+007E (~).
///
/// <https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set>
pub const FORM: &AsciiSet = &COMPONENT
// U+0021 (!)
.add(b'!')
// U+0027 (')
.add(b'\'')
// U+0028 LEFT PARENTHESIS
.add(b'(')
// U+0029 RIGHT PARENTHESIS
.add(b')')
// and U+007E (~)
.add(b'~');

macro_rules! static_assert {
($( $bool: expr, )+) => {
fn _static_assert() {
Expand Down
6 changes: 2 additions & 4 deletions url/src/lib.rs
Expand Up @@ -146,10 +146,8 @@ pub use form_urlencoded;
extern crate serde;

use crate::host::HostInternal;
use crate::parser::{
to_u32, Context, Parser, SchemeType, PATH_SEGMENT, SPECIAL_PATH_SEGMENT, USERINFO,
};
use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, SPECIAL_PATH_SEGMENT};
use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode, USERINFO};
use std::borrow::Borrow;
use std::cmp;
use std::fmt::{self, Write};
Expand Down
23 changes: 3 additions & 20 deletions url/src/parser.rs
Expand Up @@ -13,26 +13,9 @@ use std::str;
use crate::host::{Host, HostInternal};
use crate::Url;
use form_urlencoded::EncodingOverride;
use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};

/// https://url.spec.whatwg.org/#fragment-percent-encode-set
const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');

/// https://url.spec.whatwg.org/#path-percent-encode-set
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');

/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
pub(crate) const USERINFO: &AsciiSet = &PATH
.add(b'/')
.add(b':')
.add(b';')
.add(b'=')
.add(b'@')
.add(b'[')
.add(b'\\')
.add(b']')
.add(b'^')
.add(b'|');
use percent_encoding::{
percent_encode, utf8_percent_encode, AsciiSet, CONTROLS, FRAGMENT, PATH, USERINFO,
};

pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');

Expand Down
48 changes: 24 additions & 24 deletions url/src/quirks.rs
Expand Up @@ -6,7 +6,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Getters and setters for URL components implemented per https://url.spec.whatwg.org/#api
//! Getters and setters for URL components implemented per <https://url.spec.whatwg.org/#api>
//!
//! Unless you need to be interoperable with web browsers,
//! you probably want to use `Url` method instead.
Expand Down Expand Up @@ -57,15 +57,15 @@ pub fn internal_components(url: &Url) -> InternalComponents {
}
}

/// https://url.spec.whatwg.org/#dom-url-domaintoascii
/// <https://url.spec.whatwg.org/#dom-url-domaintoascii>
pub fn domain_to_ascii(domain: &str) -> String {
match Host::parse(domain) {
Ok(Host::Domain(domain)) => domain,
_ => String::new(),
}
}

/// https://url.spec.whatwg.org/#dom-url-domaintounicode
/// <https://url.spec.whatwg.org/#dom-url-domaintounicode>
pub fn domain_to_unicode(domain: &str) -> String {
match Host::parse(domain) {
Ok(Host::Domain(ref domain)) => {
Expand All @@ -76,29 +76,29 @@ pub fn domain_to_unicode(domain: &str) -> String {
}
}

/// Getter for https://url.spec.whatwg.org/#dom-url-href
/// Getter for <https://url.spec.whatwg.org/#dom-url-href>
pub fn href(url: &Url) -> &str {
url.as_str()
}

/// Setter for https://url.spec.whatwg.org/#dom-url-href
/// Setter for <https://url.spec.whatwg.org/#dom-url-href>
pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> {
*url = Url::parse(value)?;
Ok(())
}

/// Getter for https://url.spec.whatwg.org/#dom-url-origin
/// Getter for <https://url.spec.whatwg.org/#dom-url-origin>
pub fn origin(url: &Url) -> String {
url.origin().ascii_serialization()
}

/// Getter for https://url.spec.whatwg.org/#dom-url-protocol
/// Getter for <https://url.spec.whatwg.org/#dom-url-protocol>
#[inline]
pub fn protocol(url: &Url) -> &str {
&url.as_str()[..url.scheme().len() + ":".len()]
}

/// Setter for https://url.spec.whatwg.org/#dom-url-protocol
/// Setter for <https://url.spec.whatwg.org/#dom-url-protocol>
#[allow(clippy::result_unit_err)]
pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> {
// The scheme state in the spec ignores everything after the first `:`,
Expand All @@ -109,25 +109,25 @@ pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> {
url.set_scheme(new_protocol)
}

/// Getter for https://url.spec.whatwg.org/#dom-url-username
/// Getter for <https://url.spec.whatwg.org/#dom-url-username>
#[inline]
pub fn username(url: &Url) -> &str {
url.username()
}

/// Setter for https://url.spec.whatwg.org/#dom-url-username
/// Setter for <https://url.spec.whatwg.org/#dom-url-username>
#[allow(clippy::result_unit_err)]
pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> {
url.set_username(new_username)
}

/// Getter for https://url.spec.whatwg.org/#dom-url-password
/// Getter for <https://url.spec.whatwg.org/#dom-url-password>
#[inline]
pub fn password(url: &Url) -> &str {
url.password().unwrap_or("")
}

/// Setter for https://url.spec.whatwg.org/#dom-url-password
/// Setter for <https://url.spec.whatwg.org/#dom-url-password>
#[allow(clippy::result_unit_err)]
pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> {
url.set_password(if new_password.is_empty() {
Expand All @@ -137,13 +137,13 @@ pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> {
})
}

/// Getter for https://url.spec.whatwg.org/#dom-url-host
/// Getter for <https://url.spec.whatwg.org/#dom-url-host>
#[inline]
pub fn host(url: &Url) -> &str {
&url[Position::BeforeHost..Position::AfterPort]
}

/// Setter for https://url.spec.whatwg.org/#dom-url-host
/// Setter for <https://url.spec.whatwg.org/#dom-url-host>
#[allow(clippy::result_unit_err)]
pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
// If context object’s url’s cannot-be-a-base-URL flag is set, then return.
Expand Down Expand Up @@ -190,13 +190,13 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
Ok(())
}

/// Getter for https://url.spec.whatwg.org/#dom-url-hostname
/// Getter for <https://url.spec.whatwg.org/#dom-url-hostname>
#[inline]
pub fn hostname(url: &Url) -> &str {
url.host_str().unwrap_or("")
}

/// Setter for https://url.spec.whatwg.org/#dom-url-hostname
/// Setter for <https://url.spec.whatwg.org/#dom-url-hostname>
#[allow(clippy::result_unit_err)]
pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
if url.cannot_be_a_base() {
Expand Down Expand Up @@ -232,13 +232,13 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
}
}

/// Getter for https://url.spec.whatwg.org/#dom-url-port
/// Getter for <https://url.spec.whatwg.org/#dom-url-port>
#[inline]
pub fn port(url: &Url) -> &str {
&url[Position::BeforePort..Position::AfterPort]
}

/// Setter for https://url.spec.whatwg.org/#dom-url-port
/// Setter for <https://url.spec.whatwg.org/#dom-url-port>
#[allow(clippy::result_unit_err)]
pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> {
let result;
Expand All @@ -262,13 +262,13 @@ pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> {
}
}

/// Getter for https://url.spec.whatwg.org/#dom-url-pathname
/// Getter for <https://url.spec.whatwg.org/#dom-url-pathname>
#[inline]
pub fn pathname(url: &Url) -> &str {
url.path()
}

/// Setter for https://url.spec.whatwg.org/#dom-url-pathname
/// Setter for <https://url.spec.whatwg.org/#dom-url-pathname>
pub fn set_pathname(url: &mut Url, new_pathname: &str) {
if url.cannot_be_a_base() {
return;
Expand All @@ -286,12 +286,12 @@ pub fn set_pathname(url: &mut Url, new_pathname: &str) {
}
}

/// Getter for https://url.spec.whatwg.org/#dom-url-search
/// Getter for <https://url.spec.whatwg.org/#dom-url-search>
pub fn search(url: &Url) -> &str {
trim(&url[Position::AfterPath..Position::AfterQuery])
}

/// Setter for https://url.spec.whatwg.org/#dom-url-search
/// Setter for <https://url.spec.whatwg.org/#dom-url-search>
pub fn set_search(url: &mut Url, new_search: &str) {
url.set_query(match new_search {
"" => None,
Expand All @@ -300,12 +300,12 @@ pub fn set_search(url: &mut Url, new_search: &str) {
})
}

/// Getter for https://url.spec.whatwg.org/#dom-url-hash
/// Getter for <https://url.spec.whatwg.org/#dom-url-hash>
pub fn hash(url: &Url) -> &str {
trim(&url[Position::AfterQuery..])
}

/// Setter for https://url.spec.whatwg.org/#dom-url-hash
/// Setter for <https://url.spec.whatwg.org/#dom-url-hash>
pub fn set_hash(url: &mut Url, new_hash: &str) {
url.set_fragment(match new_hash {
// If the given value is the empty string,
Expand Down