From 7f1bd6ce1c2fde599a757302a843a60e714c5f72 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Mon, 15 Jul 2019 21:19:21 +0200
Subject: [PATCH 1/2] percent-encoding: make sets be values of one type,
 instead of types that implement a trait

Fix https://github.com/servo/rust-url/issues/388
---
 Cargo.toml                  |   2 +-
 data-url/src/lib.rs         |   6 +-
 percent_encoding/Cargo.toml |   3 +-
 percent_encoding/lib.rs     | 348 ++++++++++++++++++------------------
 src/form_urlencoded.rs      |   6 +-
 src/host.rs                 |   4 +-
 src/lib.rs                  |  28 ++-
 src/parser.rs               | 113 ++++++++----
 tests/data.rs               |   2 +-
 tests/unit.rs               |  38 +---
 10 files changed, 277 insertions(+), 273 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 4c69f8c85..2c755ccc6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,7 +39,7 @@ bencher = "0.1"
 [dependencies]
 idna = { version = "0.2.0", path = "./idna" }
 matches = "0.1"
-percent-encoding = { version = "1.0.0", path = "./percent_encoding" }
+percent-encoding = { version = "2.0.0", path = "./percent_encoding" }
 serde = {version = "1.0", optional = true}
 
 [[bench]]
diff --git a/data-url/src/lib.rs b/data-url/src/lib.rs
index f6f023dae..bc0ee1961 100644
--- a/data-url/src/lib.rs
+++ b/data-url/src/lib.rs
@@ -103,7 +103,7 @@ impl<'a> FragmentIdentifier<'a> {
             match byte {
                 // Ignore ASCII tabs or newlines like the URL parser would
                 b'\t' | b'\n' | b'\r' => continue,
-                // Fragment encode set
+                // https://url.spec.whatwg.org/#fragment-percent-encode-set
                 b'\0'...b' ' | b'"' | b'<' | b'>' | b'`' | b'\x7F'...b'\xFF' => {
                     percent_encode(byte, &mut string)
                 }
@@ -182,10 +182,10 @@ fn parse_header(from_colon_to_comma: &str) -> (mime::Mime, bool) {
             // Ignore ASCII tabs or newlines like the URL parser would
             b'\t' | b'\n' | b'\r' => continue,
 
-            // C0 encode set
+            // https://url.spec.whatwg.org/#c0-control-percent-encode-set
             b'\0'...b'\x1F' | b'\x7F'...b'\xFF' => percent_encode(byte, &mut string),
 
-            // Bytes other than the C0 encode set that are percent-encoded
+            // Bytes other than the C0 percent-encode set that are percent-encoded
             // by the URL parser in the query state.
             // '#' is also in that list but cannot occur here
             // since it indicates the start of the URL’s fragment.
diff --git a/percent_encoding/Cargo.toml b/percent_encoding/Cargo.toml
index 4aad9c858..a737e333c 100644
--- a/percent_encoding/Cargo.toml
+++ b/percent_encoding/Cargo.toml
@@ -1,12 +1,11 @@
 [package]
 name = "percent-encoding"
-version = "1.0.2"
+version = "2.0.0"
 authors = ["The rust-url developers"]
 description = "Percent encoding and decoding"
 repository = "https://github.com/servo/rust-url/"
 license = "MIT/Apache-2.0"
 
 [lib]
-doctest = false
 test = false
 path = "lib.rs"
diff --git a/percent_encoding/lib.rs b/percent_encoding/lib.rs
index a5c2987a2..170674aa8 100644
--- a/percent_encoding/lib.rs
+++ b/percent_encoding/lib.rs
@@ -6,30 +6,35 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-//! URLs use special chacters to indicate the parts of the request.  For example, a forward slash
-//! indicates a path.  In order for that character to exist outside of a path separator, that
-//! character would need to be encoded.
+//! URLs use special chacters to indicate the parts of the request.
+//! For example, a `?` question mark marks the end of a path and the start of a query string.
+//! In order for that character to exist inside a path, it needs to be encoded differently.
 //!
-//! Percent encoding replaces reserved characters with the `%` escape character followed by hexidecimal
-//! ASCII representaton.  For non-ASCII character that are percent encoded, a UTF-8 byte sequence
-//! becomes percent encoded.  A simple example can be seen when the space literal is replaced with
-//! `%20`.
+//! Percent encoding replaces reserved characters with the `%` escape character
+//! followed by a byte value as two hexadecimal digits.
+//! For example, an ASCII space is replaced with `%20`.
 //!
-//! Percent encoding is further complicated by the fact that different parts of an URL have
-//! different encoding requirements.  In order to support the variety of encoding requirements,
-//! `url::percent_encoding` includes different *encode sets*.
-//! See [URL Standard](https://url.spec.whatwg.org/#percent-encoded-bytes) for details.
+//! When encoding, the set of characters that can (and should, for readability) be left alone
+//! depends on the context.
+//! The `?` question mark mentioned above is not a separator when used literally
+//! inside of a query string, and therefore does not need to be encoded.
+//! The [`AsciiSet`] parameter of [`percent_encode`] and [`utf8_percent_encode`]
+//! lets callers configure this.
 //!
-//! This module provides some `*_ENCODE_SET` constants.
-//! If a different set is required, it can be created with
-//! the [`define_encode_set!`](../macro.define_encode_set!.html) macro.
+//! This crate delibarately does not provide many different sets.
+//! Users should consider in what context the encoded string will be used,
+//! real relevant specifications, and define their own set.
+//! This is done by using the `add` method of an existing set.
 //!
 //! # Examples
 //!
 //! ```
-//! use url::percent_encoding::{utf8_percent_encode, DEFAULT_ENCODE_SET};
+//! use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
 //!
-//! assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
+//! /// https://url.spec.whatwg.org/#fragment-percent-encode-set
+//! const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
+//!
+//! assert_eq!(utf8_percent_encode("foo <bar>", FRAGMENT).to_string(), "foo%20%3Cbar%3E");
 //! ```
 
 use std::borrow::Cow;
@@ -37,137 +42,127 @@ use std::fmt;
 use std::slice;
 use std::str;
 
-/// Represents a set of characters / bytes that should be percent-encoded.
-///
-/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set).
-///
-/// Different characters need to be encoded in different parts of an URL.
-/// For example, a literal `?` question mark in an URL’s path would indicate
-/// the start of the query string.
-/// A question mark meant to be part of the path therefore needs to be percent-encoded.
-/// In the query string however, a question mark does not have any special meaning
-/// and does not need to be percent-encoded.
+/// Represents a set of characters or bytes in the ASCII range.
 ///
-/// A few sets are defined in this module.
-/// Use the [`define_encode_set!`](../macro.define_encode_set!.html) macro to define different ones.
-pub trait EncodeSet: Clone {
-    /// Called with UTF-8 bytes rather than code points.
-    /// Should return true for all non-ASCII bytes.
-    fn contains(&self, byte: u8) -> bool;
-}
-
-/// Define a new struct
-/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait,
-/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html)
-/// and related functions.
+/// This used in [`percent_encode`] and [`utf8_percent_encode`].
+/// This is simlar to [percent-encode sets](https://url.spec.whatwg.org/#percent-encoded-bytes).
 ///
-/// Parameters are characters to include in the set in addition to those of the base set.
-/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set).
+/// Use the `add` method of an existing set to define a new set. For example:
 ///
-/// Example
-/// =======
+/// ```
+/// use percent_encoding::{AsciiSet, CONTROLS};
 ///
-/// ```rust
-/// #[macro_use] extern crate percent_encoding;
-/// use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
-/// define_encode_set! {
-///     /// This encode set is used in the URL parser for query strings.
-///     pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
-/// }
-/// # fn main() {
-/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(), "foo%20bar");
-/// # }
+/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
+/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
 /// ```
-#[macro_export]
-macro_rules! define_encode_set {
-    ($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => {
-        $(#[$attr])*
-        #[derive(Copy, Clone, Debug)]
-        #[allow(non_camel_case_types)]
-        pub struct $name;
-
-        impl $crate::EncodeSet for $name {
-            #[inline]
-            fn contains(&self, byte: u8) -> bool {
-                match byte as char {
-                    $(
-                        $ch => true,
-                    )*
-                    _ => $base_set.contains(byte)
-                }
-            }
-        }
-    }
+pub struct AsciiSet {
+    mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
 }
 
-/// This encode set is used for the path of cannot-be-a-base URLs.
-///
-/// All ASCII charcters less than hexidecimal 20 and greater than 7E are encoded.  This includes
-/// special charcters such as line feed, carriage return, NULL, etc.
-#[derive(Copy, Clone, Debug)]
-#[allow(non_camel_case_types)]
-pub struct SIMPLE_ENCODE_SET;
-
-impl EncodeSet for SIMPLE_ENCODE_SET {
-    #[inline]
-    fn contains(&self, byte: u8) -> bool {
-        byte < 0x20 || byte > 0x7E
+type Chunk = u32;
+
+const ASCII_RANGE_LEN: usize = 0x80;
+
+const BITS_PER_CHUNK: usize = 8 * std::mem::size_of::<Chunk>();
+
+impl AsciiSet {
+    /// Called with UTF-8 bytes rather than code points.
+    /// Not used for non-ASCII bytes.
+    const fn contains(&self, byte: u8) -> bool {
+        let chunk = self.mask[byte as usize / BITS_PER_CHUNK];
+        let mask = 1 << (byte as usize % BITS_PER_CHUNK);
+        (chunk & mask) != 0
     }
-}
 
-define_encode_set! {
-    /// This encode set is used in the URL parser for query strings.
-    ///
-    /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
-    /// space, double quote ("), hash (#), and inequality qualifiers (<), (>) are encoded.
-    pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
-}
+    fn should_percent_encode(&self, byte: u8) -> bool {
+        !byte.is_ascii() || self.contains(byte)
+    }
 
-define_encode_set! {
-    /// This encode set is used for path components.
-    ///
-    /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
-    /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
-    /// question mark (?), and curly brackets ({), (}) are encoded.
-    pub DEFAULT_ENCODE_SET = [QUERY_ENCODE_SET] | {'`', '?', '{', '}'}
+    pub const fn add(&self, byte: u8) -> Self {
+        let mut mask = self.mask;
+        mask[byte as usize / BITS_PER_CHUNK] |= 1 << (byte as usize % BITS_PER_CHUNK);
+        AsciiSet { mask }
+    }
 }
 
-define_encode_set! {
-    /// This encode set is used for on '/'-separated path segment
-    ///
-    /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
-    /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
-    /// question mark (?), and curly brackets ({), (}), percent sign (%), forward slash (/) are
-    /// encoded.
-    ///
-    /// # Note
-    ///
-    /// For [special URLs](https://url.spec.whatwg.org/#is-special), the backslash (\) character should
-    /// additionally be escaped, but that is *not* included in this encode set.
-    pub PATH_SEGMENT_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'%', '/'}
-}
+/// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
+///
+/// Note that this includes the newline and tab characters, but not the space 0x20.
+///
+/// <https://url.spec.whatwg.org/#c0-control-percent-encode-set>
+pub const CONTROLS: &AsciiSet = &AsciiSet {
+    mask: [
+        !0_u32, // C0: 0x00 to 0x1F (32 bits set)
+        0,
+        0,
+        1 << (0x7F_u32 % 32), // DEL: 0x7F (one bit set)
+    ],
+};
 
-define_encode_set! {
-    /// This encode set is used for username and password.
-    ///
-    /// Aside from special chacters defined in the [`SIMPLE_ENCODE_SET`](struct.SIMPLE_ENCODE_SET.html),
-    /// space, double quote ("), hash (#), inequality qualifiers (<), (>), backtick (`),
-    /// question mark (?), and curly brackets ({), (}), forward slash (/), colon (:), semi-colon (;),
-    /// equality (=), at (@), backslash (\\), square brackets ([), (]), caret (\^), and pipe (|) are
-    /// encoded.
-    pub USERINFO_ENCODE_SET = [DEFAULT_ENCODE_SET] | {
-        '/', ':', ';', '=', '@', '[', '\\', ']', '^', '|'
+macro_rules! static_assert {
+    ($( $bool: expr, )+) => {
+        fn _static_assert() {
+            $(
+                let _ = std::mem::transmute::<[u8; $bool as usize], u8>;
+            )+
+        }
     }
 }
 
-/// Return the percent-encoding of the given bytes.
+static_assert! {
+    CONTROLS.contains(0x00),
+    CONTROLS.contains(0x1F),
+    !CONTROLS.contains(0x20),
+    !CONTROLS.contains(0x7E),
+    CONTROLS.contains(0x7F),
+}
+
+/// Everything that is not an ASCII letter or digit.
+///
+/// This is probably more eager than necessary in any context.
+pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS
+    .add(b' ')
+    .add(b'!')
+    .add(b'"')
+    .add(b'#')
+    .add(b'$')
+    .add(b'%')
+    .add(b'&')
+    .add(b'\'')
+    .add(b'(')
+    .add(b')')
+    .add(b'*')
+    .add(b'+')
+    .add(b',')
+    .add(b'-')
+    .add(b'.')
+    .add(b'/')
+    .add(b':')
+    .add(b';')
+    .add(b'<')
+    .add(b'=')
+    .add(b'>')
+    .add(b'?')
+    .add(b'@')
+    .add(b'[')
+    .add(b'\\')
+    .add(b']')
+    .add(b'^')
+    .add(b'_')
+    .add(b'`')
+    .add(b'{')
+    .add(b'|')
+    .add(b'}')
+    .add(b'~');
+
+/// Return the percent-encoding of the given byte.
 ///
-/// This is unconditional, unlike `percent_encode()` which uses an encode set.
+/// This is unconditional, unlike `percent_encode()` which has an `AsciiSet` parameter.
 ///
 /// # Examples
 ///
 /// ```
-/// use url::percent_encoding::percent_encode_byte;
+/// use percent_encoding::percent_encode_byte;
 ///
 /// assert_eq!("foo bar".bytes().map(percent_encode_byte).collect::<String>(),
 ///            "%66%6F%6F%20%62%61%72");
@@ -194,74 +189,69 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
       "[index..index + 3]
 }
 
-/// Percent-encode the given bytes with the given encode set.
+/// Percent-encode the given bytes with the given set.
 ///
-/// The encode set define which bytes (in addition to non-ASCII and controls)
-/// need to be percent-encoded.
-/// The choice of this set depends on context.
-/// For example, `?` needs to be encoded in an URL path but not in a query string.
+/// Non-ASCII bytes and bytes in `ascii_set` are encoded.
 ///
-/// The return value is an iterator of `&str` slices (so it has a `.collect::<String>()` method)
-/// that also implements `Display` and `Into<Cow<str>>`.
-/// The latter returns `Cow::Borrowed` when none of the bytes in `input`
-/// are in the given encode set.
+/// The return type:
+///
+/// * Implements `Iterator<Item = &str>` and therefore has a `.collect::<String>()` method,
+/// * Implements `Display` and therefore has a `.to_string()` method,
+/// * Implements `Into<Cow<str>>` borrowing `input` when none of its bytes are encoded.
 ///
 /// # Examples
 ///
 /// ```
-/// use url::percent_encoding::{percent_encode, DEFAULT_ENCODE_SET};
+/// use percent_encoding::{percent_encode, NON_ALPHANUMERIC};
 ///
-/// assert_eq!(percent_encode(b"foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
+/// assert_eq!(percent_encode(b"foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
 /// ```
 #[inline]
-pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncode<E> {
+pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> PercentEncode<'a> {
     PercentEncode {
         bytes: input,
-        encode_set: encode_set,
+        ascii_set,
     }
 }
 
 /// Percent-encode the UTF-8 encoding of the given string.
 ///
-/// See `percent_encode()` for how to use the return value.
+/// See [`percent_encode`] regarding the return type.
 ///
 /// # Examples
 ///
 /// ```
-/// use url::percent_encoding::{utf8_percent_encode, DEFAULT_ENCODE_SET};
+/// use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
 ///
-/// assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F");
+/// assert_eq!(utf8_percent_encode("foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
 /// ```
 #[inline]
-pub fn utf8_percent_encode<E: EncodeSet>(input: &str, encode_set: E) -> PercentEncode<E> {
-    percent_encode(input.as_bytes(), encode_set)
+pub fn utf8_percent_encode<'a>(input: &'a str, ascii_set: &'static AsciiSet) -> PercentEncode<'a> {
+    percent_encode(input.as_bytes(), ascii_set)
 }
 
-/// The return type of `percent_encode()` and `utf8_percent_encode()`.
-#[derive(Clone, Debug)]
-pub struct PercentEncode<'a, E: EncodeSet> {
+/// The return type of [`percent_encode`] and [`utf8_percent_encode`].
+#[derive(Clone)]
+pub struct PercentEncode<'a> {
     bytes: &'a [u8],
-    encode_set: E,
+    ascii_set: &'static AsciiSet,
 }
 
-impl<'a, E: EncodeSet> Iterator for PercentEncode<'a, E> {
+impl<'a> Iterator for PercentEncode<'a> {
     type Item = &'a str;
 
     fn next(&mut self) -> Option<&'a str> {
         if let Some((&first_byte, remaining)) = self.bytes.split_first() {
-            if self.encode_set.contains(first_byte) {
+            if self.ascii_set.should_percent_encode(first_byte) {
                 self.bytes = remaining;
                 Some(percent_encode_byte(first_byte))
             } else {
-                assert!(first_byte.is_ascii());
                 for (i, &byte) in remaining.iter().enumerate() {
-                    if self.encode_set.contains(byte) {
+                    if self.ascii_set.should_percent_encode(byte) {
                         // 1 for first_byte + i for previous iterations of this loop
                         let (unchanged_slice, remaining) = self.bytes.split_at(1 + i);
                         self.bytes = remaining;
                         return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) });
-                    } else {
-                        assert!(byte.is_ascii());
                     }
                 }
                 let unchanged_slice = self.bytes;
@@ -282,7 +272,7 @@ impl<'a, E: EncodeSet> Iterator for PercentEncode<'a, E> {
     }
 }
 
-impl<'a, E: EncodeSet> fmt::Display for PercentEncode<'a, E> {
+impl<'a> fmt::Display for PercentEncode<'a> {
     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
         for c in (*self).clone() {
             formatter.write_str(c)?
@@ -291,8 +281,8 @@ impl<'a, E: EncodeSet> fmt::Display for PercentEncode<'a, E> {
     }
 }
 
-impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
-    fn from(mut iter: PercentEncode<'a, E>) -> Self {
+impl<'a> From<PercentEncode<'a>> for Cow<'a, str> {
+    fn from(mut iter: PercentEncode<'a>) -> Self {
         match iter.next() {
             None => "".into(),
             Some(first) => match iter.next() {
@@ -308,19 +298,33 @@ impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
     }
 }
 
+/// Percent-decode the given string.
+///
+/// <https://url.spec.whatwg.org/#string-percent-decode>
+///
+/// See [`percent_decode`] regarding the return type.
+#[inline]
+pub fn percent_decode_str(input: &str) -> PercentDecode {
+    percent_decode(input.as_bytes())
+}
+
 /// Percent-decode the given bytes.
 ///
-/// The return value is an iterator of decoded `u8` bytes
-/// that also implements `Into<Cow<u8>>`
-/// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence)
-/// and has `decode_utf8()` and `decode_utf8_lossy()` methods.
+/// <https://url.spec.whatwg.org/#percent-decode>
+///
+/// Any sequence of `%` followed by two hexadecimal digits is decoded.
+/// The return type:
+///
+/// * Implements `Into<Cow<u8>>` borrowing `input` when it contains no percent-encoded sequence,
+/// * Implements `Iterator<Item = u8>` and therefore has a `.collect::<Vec<u8>>()` method,
+/// * Has `decode_utf8()` and `decode_utf8_lossy()` methods.
 ///
 /// # Examples
 ///
 /// ```
-/// use url::percent_encoding::percent_decode;
+/// use percent_encoding::percent_decode;
 ///
-/// assert_eq!(percent_decode(b"foo%20bar%3F").decode_utf8().unwrap(), "foo bar?");
+/// assert_eq!(percent_decode(b"foo%20bar%3f").decode_utf8().unwrap(), "foo bar?");
 /// ```
 #[inline]
 pub fn percent_decode(input: &[u8]) -> PercentDecode {
@@ -329,22 +333,18 @@ pub fn percent_decode(input: &[u8]) -> PercentDecode {
     }
 }
 
-/// The return type of `percent_decode()`.
+/// The return type of [`percent_decode`].
 #[derive(Clone, Debug)]
 pub struct PercentDecode<'a> {
     bytes: slice::Iter<'a, u8>,
 }
 
 fn after_percent_sign(iter: &mut slice::Iter<u8>) -> Option<u8> {
-    let initial_iter = iter.clone();
-    let h = iter.next().and_then(|&b| (b as char).to_digit(16));
-    let l = iter.next().and_then(|&b| (b as char).to_digit(16));
-    if let (Some(h), Some(l)) = (h, l) {
-        Some(h as u8 * 0x10 + l as u8)
-    } else {
-        *iter = initial_iter;
-        None
-    }
+    let mut cloned_iter = iter.clone();
+    let h = char::from(*cloned_iter.next()?).to_digit(16)?;
+    let l = char::from(*cloned_iter.next()?).to_digit(16)?;
+    *iter = cloned_iter;
+    Some(h as u8 * 0x10 + l as u8)
 }
 
 impl<'a> Iterator for PercentDecode<'a> {
@@ -377,7 +377,7 @@ impl<'a> From<PercentDecode<'a>> for Cow<'a, [u8]> {
 
 impl<'a> PercentDecode<'a> {
     /// If the percent-decoding is different from the input, return it as a new bytes vector.
-    pub fn if_any(&self) -> Option<Vec<u8>> {
+    fn if_any(&self) -> Option<Vec<u8>> {
         let mut bytes_iter = self.bytes.clone();
         while bytes_iter.any(|&b| b == b'%') {
             if let Some(decoded_byte) = after_percent_sign(&mut bytes_iter) {
diff --git a/src/form_urlencoded.rs b/src/form_urlencoded.rs
index 176ffb750..bdf1f9fbd 100644
--- a/src/form_urlencoded.rs
+++ b/src/form_urlencoded.rs
@@ -59,9 +59,9 @@ impl<'a> Iterator for Parse<'a> {
 
 fn decode(input: &[u8]) -> Cow<str> {
     let replaced = replace_plus(input);
-    decode_utf8_lossy(match percent_decode(&replaced).if_any() {
-        Some(vec) => Cow::Owned(vec),
-        None => replaced,
+    decode_utf8_lossy(match percent_decode(&replaced).into() {
+        Cow::Owned(vec) => Cow::Owned(vec),
+        Cow::Borrowed(_) => replaced,
     })
 }
 
diff --git a/src/host.rs b/src/host.rs
index 6aa820911..ea66139c3 100644
--- a/src/host.rs
+++ b/src/host.rs
@@ -8,7 +8,7 @@
 
 use idna;
 use parser::{ParseError, ParseResult};
-use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET};
+use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
 use std::cmp;
 use std::fmt::{self, Formatter};
 use std::net::{Ipv4Addr, Ipv6Addr};
@@ -207,7 +207,7 @@ impl Host<String> {
         {
             return Err(ParseError::InvalidDomainCharacter);
         }
-        let s = utf8_percent_encode(input, SIMPLE_ENCODE_SET).to_string();
+        let s = utf8_percent_encode(input, CONTROLS).to_string();
         Ok(Host::Domain(s))
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 4cbac60ea..92777e592 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -110,17 +110,13 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
 #[macro_use]
 extern crate matches;
 extern crate idna;
+extern crate percent_encoding;
 #[cfg(feature = "serde")]
 extern crate serde;
-#[macro_use]
-extern crate percent_encoding;
 
 use host::HostInternal;
-use parser::{to_u32, Context, Parser, SchemeType};
-use percent_encoding::{
-    percent_decode, percent_encode, utf8_percent_encode, PATH_SEGMENT_ENCODE_SET,
-    USERINFO_ENCODE_SET,
-};
+use parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
+use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
 use std::borrow::Borrow;
 use std::cmp;
 #[cfg(feature = "serde")]
@@ -1229,8 +1225,11 @@ impl Url {
         if let Some(input) = query {
             self.query_start = Some(to_u32(self.serialization.len()).unwrap());
             self.serialization.push('?');
+            let scheme_type = SchemeType::from(self.scheme());
             let scheme_end = self.scheme_end;
-            self.mutate(|parser| parser.parse_query(scheme_end, parser::Input::new(input)));
+            self.mutate(|parser| {
+                parser.parse_query(scheme_type, scheme_end, parser::Input::new(input))
+            });
         }
 
         self.restore_already_parsed_fragment(fragment);
@@ -1729,7 +1728,7 @@ impl Url {
             self.serialization.truncate(self.username_end as usize);
             self.serialization.push(':');
             self.serialization
-                .extend(utf8_percent_encode(password, USERINFO_ENCODE_SET));
+                .extend(utf8_percent_encode(password, USERINFO));
             self.serialization.push('@');
 
             let old_host_start = self.host_start;
@@ -1824,7 +1823,7 @@ impl Url {
         let after_username = self.slice(self.username_end..).to_owned();
         self.serialization.truncate(username_start as usize);
         self.serialization
-            .extend(utf8_percent_encode(username, USERINFO_ENCODE_SET));
+            .extend(utf8_percent_encode(username, USERINFO));
 
         let mut removed_bytes = self.username_end;
         self.username_end = to_u32(self.serialization.len()).unwrap();
@@ -2307,7 +2306,7 @@ fn path_to_file_url_segments(
         serialization.push('/');
         serialization.extend(percent_encode(
             component.as_os_str().as_bytes(),
-            PATH_SEGMENT_ENCODE_SET,
+            PATH_SEGMENT,
         ));
     }
     if empty {
@@ -2355,7 +2354,7 @@ fn path_to_file_url_segments_windows(
                 host_internal = host.into();
                 serialization.push('/');
                 let share = share.to_str().ok_or(())?;
-                serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT_ENCODE_SET));
+                serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
             }
             _ => return Err(()),
         },
@@ -2370,10 +2369,7 @@ fn path_to_file_url_segments_windows(
         // FIXME: somehow work with non-unicode?
         let component = component.as_os_str().to_str().ok_or(())?;
         serialization.push('/');
-        serialization.extend(percent_encode(
-            component.as_bytes(),
-            PATH_SEGMENT_ENCODE_SET,
-        ));
+        serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
     }
     Ok((host_end, host_internal))
 }
diff --git a/src/parser.rs b/src/parser.rs
index 7a6eaad4f..96906f94a 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -11,18 +11,38 @@ use std::fmt::{self, Formatter, Write};
 use std::str;
 
 use host::{Host, HostInternal};
-use percent_encoding::{
-    percent_encode, utf8_percent_encode, DEFAULT_ENCODE_SET, PATH_SEGMENT_ENCODE_SET,
-    QUERY_ENCODE_SET, SIMPLE_ENCODE_SET, USERINFO_ENCODE_SET,
-};
+use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};
 use query_encoding::EncodingOverride;
 use Url;
 
-define_encode_set! {
-    // The backslash (\) character is treated as a path separator in special URLs
-    // so it needs to be additionally escaped in that case.
-    pub SPECIAL_PATH_SEGMENT_ENCODE_SET = [PATH_SEGMENT_ENCODE_SET] | {'\\'}
-}
+/// https://url.spec.whatwg.org/#fragment-percent-encode-set
+const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
+
+/// https://url.spec.whatwg.org/#path-percent-encode-set
+const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
+
+/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
+pub(crate) const USERINFO: &AsciiSet = &PATH
+    .add(b'/')
+    .add(b':')
+    .add(b';')
+    .add(b'=')
+    .add(b'@')
+    .add(b'[')
+    .add(b'\\')
+    .add(b']')
+    .add(b'^')
+    .add(b'|');
+
+pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
+
+// The backslash (\) character is treated as a path separator in special URLs
+// so it needs to be additionally escaped in that case.
+pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
+
+// https://url.spec.whatwg.org/#query-state
+const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
+const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
 
 pub type ParseResult<T> = Result<T, ParseError>;
 
@@ -327,7 +347,7 @@ impl<'a> Parser<'a> {
             } else {
                 let scheme_type = SchemeType::from(base_url.scheme());
                 if scheme_type.is_file() {
-                    self.parse_file(input, Some(base_url))
+                    self.parse_file(input, scheme_type, Some(base_url))
                 } else {
                     self.parse_relative(input, scheme_type, base_url)
                 }
@@ -379,7 +399,7 @@ impl<'a> Parser<'a> {
                     }
                 });
                 self.serialization.clear();
-                self.parse_file(input, base_file_url)
+                self.parse_file(input, scheme_type, base_file_url)
             }
             SchemeType::SpecialNotFile => {
                 // special relative or authority state
@@ -434,6 +454,7 @@ impl<'a> Parser<'a> {
             self.parse_cannot_be_a_base_path(input)
         };
         self.with_query_and_fragment(
+            scheme_type,
             scheme_end,
             username_end,
             host_start,
@@ -445,7 +466,12 @@ impl<'a> Parser<'a> {
         )
     }
 
-    fn parse_file(mut self, input: Input, mut base_file_url: Option<&Url>) -> ParseResult<Url> {
+    fn parse_file(
+        mut self,
+        input: Input,
+        scheme_type: SchemeType,
+        mut base_file_url: Option<&Url>,
+    ) -> ParseResult<Url> {
         use SyntaxViolation::Backslash;
         // file state
         debug_assert!(self.serialization.is_empty());
@@ -491,7 +517,7 @@ impl<'a> Parser<'a> {
                     };
                     self.serialization.push_str(before_query);
                     let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(base_url.scheme_end, input)?;
+                        self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
                     Ok(Url {
                         serialization: self.serialization,
                         query_start: query_start,
@@ -503,7 +529,7 @@ impl<'a> Parser<'a> {
                     let scheme_end = "file".len() as u32;
                     let path_start = "file://".len() as u32;
                     let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(scheme_end, input)?;
+                        self.parse_query_and_fragment(scheme_type, scheme_end, input)?;
                     Ok(Url {
                         serialization: self.serialization,
                         scheme_end: scheme_end,
@@ -572,7 +598,7 @@ impl<'a> Parser<'a> {
                         host = HostInternal::None;
                     }
                     let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(scheme_end, remaining)?;
+                        self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
                     Ok(Url {
                         serialization: self.serialization,
                         scheme_end: scheme_end,
@@ -604,7 +630,7 @@ impl<'a> Parser<'a> {
                         input_after_first_char,
                     );
                     let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(scheme_end, remaining)?;
+                        self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
                     let path_start = path_start as u32;
                     Ok(Url {
                         serialization: self.serialization,
@@ -638,6 +664,7 @@ impl<'a> Parser<'a> {
                         input,
                     );
                     self.with_query_and_fragment(
+                        SchemeType::File,
                         base_url.scheme_end,
                         base_url.username_end,
                         base_url.host_start,
@@ -654,7 +681,7 @@ impl<'a> Parser<'a> {
                     let remaining =
                         self.parse_path(SchemeType::File, &mut false, path_start, input);
                     let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(scheme_end, remaining)?;
+                        self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
                     let path_start = path_start as u32;
                     Ok(Url {
                         serialization: self.serialization,
@@ -704,7 +731,7 @@ impl<'a> Parser<'a> {
                 };
                 self.serialization.push_str(before_query);
                 let (query_start, fragment_start) =
-                    self.parse_query_and_fragment(base_url.scheme_end, input)?;
+                    self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
                 Ok(Url {
                     serialization: self.serialization,
                     query_start: query_start,
@@ -740,6 +767,7 @@ impl<'a> Parser<'a> {
                     input_after_first_char,
                 );
                 self.with_query_and_fragment(
+                    scheme_type,
                     base_url.scheme_end,
                     base_url.username_end,
                     base_url.host_start,
@@ -761,6 +789,7 @@ impl<'a> Parser<'a> {
                 let remaining =
                     self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input);
                 self.with_query_and_fragment(
+                    scheme_type,
                     base_url.scheme_end,
                     base_url.username_end,
                     base_url.host_start,
@@ -792,6 +821,7 @@ impl<'a> Parser<'a> {
         let path_start = to_u32(self.serialization.len())?;
         let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
         self.with_query_and_fragment(
+            scheme_type,
             scheme_end,
             username_end,
             host_start,
@@ -854,7 +884,7 @@ impl<'a> Parser<'a> {
                 }
                 self.check_url_code_point(c, &input);
                 self.serialization
-                    .extend(utf8_percent_encode(utf8_c, USERINFO_ENCODE_SET));
+                    .extend(utf8_percent_encode(utf8_c, USERINFO));
             }
         }
         let username_end = match username_end {
@@ -1082,17 +1112,14 @@ impl<'a> Parser<'a> {
                         self.check_url_code_point(c, &input);
                         if self.context == Context::PathSegmentSetter {
                             if scheme_type.is_special() {
-                                self.serialization.extend(utf8_percent_encode(
-                                    utf8_c,
-                                    SPECIAL_PATH_SEGMENT_ENCODE_SET,
-                                ));
+                                self.serialization
+                                    .extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT));
                             } else {
                                 self.serialization
-                                    .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT_ENCODE_SET));
+                                    .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT));
                             }
                         } else {
-                            self.serialization
-                                .extend(utf8_percent_encode(utf8_c, DEFAULT_ENCODE_SET));
+                            self.serialization.extend(utf8_percent_encode(utf8_c, PATH));
                         }
                     }
                 }
@@ -1161,7 +1188,7 @@ impl<'a> Parser<'a> {
                 Some((c, utf8_c)) => {
                     self.check_url_code_point(c, &input);
                     self.serialization
-                        .extend(utf8_percent_encode(utf8_c, SIMPLE_ENCODE_SET));
+                        .extend(utf8_percent_encode(utf8_c, CONTROLS));
                 }
                 None => return input,
             }
@@ -1170,6 +1197,7 @@ impl<'a> Parser<'a> {
 
     fn with_query_and_fragment(
         mut self,
+        scheme_type: SchemeType,
         scheme_end: u32,
         username_end: u32,
         host_start: u32,
@@ -1179,7 +1207,8 @@ impl<'a> Parser<'a> {
         path_start: u32,
         remaining: Input,
     ) -> ParseResult<Url> {
-        let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_end, remaining)?;
+        let (query_start, fragment_start) =
+            self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
         Ok(Url {
             serialization: self.serialization,
             scheme_end: scheme_end,
@@ -1197,6 +1226,7 @@ impl<'a> Parser<'a> {
     /// Return (query_start, fragment_start)
     fn parse_query_and_fragment(
         &mut self,
+        scheme_type: SchemeType,
         scheme_end: u32,
         mut input: Input,
     ) -> ParseResult<(Option<u32>, Option<u32>)> {
@@ -1206,7 +1236,7 @@ impl<'a> Parser<'a> {
             Some('?') => {
                 query_start = Some(to_u32(self.serialization.len())?);
                 self.serialization.push('?');
-                let remaining = self.parse_query(scheme_end, input);
+                let remaining = self.parse_query(scheme_type, scheme_end, input);
                 if let Some(remaining) = remaining {
                     input = remaining
                 } else {
@@ -1223,7 +1253,12 @@ impl<'a> Parser<'a> {
         Ok((query_start, Some(fragment_start)))
     }
 
-    pub fn parse_query<'i>(&mut self, scheme_end: u32, mut input: Input<'i>) -> Option<Input<'i>> {
+    pub fn parse_query<'i>(
+        &mut self,
+        scheme_type: SchemeType,
+        scheme_end: u32,
+        mut input: Input<'i>,
+    ) -> Option<Input<'i>> {
         let mut query = String::new(); // FIXME: use a streaming decoder instead
         let mut remaining = None;
         while let Some(c) = input.next() {
@@ -1241,8 +1276,12 @@ impl<'a> Parser<'a> {
             _ => None,
         };
         let query_bytes = ::query_encoding::encode(encoding, &query);
-        self.serialization
-            .extend(percent_encode(&query_bytes, QUERY_ENCODE_SET));
+        let set = if scheme_type.is_special() {
+            SPECIAL_QUERY
+        } else {
+            QUERY
+        };
+        self.serialization.extend(percent_encode(&query_bytes, set));
         remaining
     }
 
@@ -1272,8 +1311,14 @@ impl<'a> Parser<'a> {
                 self.log_violation(SyntaxViolation::NullInFragment)
             } else {
                 self.check_url_code_point(c, &input);
-                self.serialization
-                    .extend(utf8_percent_encode(utf8_c, SIMPLE_ENCODE_SET));
+                self.serialization.extend(utf8_percent_encode(
+                    utf8_c,
+                    // FIXME: tests fail when we use the FRAGMENT set here
+                    // as defined in the spec as of 2019-07-17,
+                    // likely because tests are out of date.
+                    // See https://github.com/servo/rust-url/issues/290
+                    CONTROLS,
+                ));
             }
         }
     }
diff --git a/tests/data.rs b/tests/data.rs
index 1981814e6..b462ec2fd 100644
--- a/tests/data.rs
+++ b/tests/data.rs
@@ -47,7 +47,7 @@ fn run_parsing(input: &str, base: &str, expected: Result<ExpectedAttributes, ()>
             let got = $got;
             assert!(
                 expected == got,
-                "{:?} != {} {:?} for URL {:?}",
+                "\n{:?}\n!= {}\n{:?}\nfor URL {:?}\n",
                 got,
                 stringify!($expected),
                 expected,
diff --git a/tests/unit.rs b/tests/unit.rs
index 9f3764911..d5e81986a 100644
--- a/tests/unit.rs
+++ b/tests/unit.rs
@@ -8,9 +8,8 @@
 
 //! Unit tests
 
-extern crate url;
-#[macro_use]
 extern crate percent_encoding;
+extern crate url;
 
 use std::borrow::Cow;
 use std::cell::{Cell, RefCell};
@@ -429,41 +428,6 @@ fn test_leading_dots() {
     assert_eq!(Url::parse("file://./foo").unwrap().domain(), Some("."));
 }
 
-// This is testing that the macro produces buildable code when invoked
-// inside both a module and a function
-#[test]
-fn define_encode_set_scopes() {
-    use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
-
-    define_encode_set! {
-        /// This encode set is used in the URL parser for query strings.
-        pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
-    }
-
-    assert_eq!(
-        utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(),
-        "foo%20bar"
-    );
-
-    mod m {
-        use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
-
-        define_encode_set! {
-            /// This encode set is used in the URL parser for query strings.
-            pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
-        }
-
-        pub fn test() {
-            assert_eq!(
-                utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(),
-                "foo%20bar"
-            );
-        }
-    }
-
-    m::test();
-}
-
 #[test]
 /// https://github.com/servo/rust-url/issues/302
 fn test_origin_hash() {

From a1fe49eeaa2fd4c762498e104039884e04740571 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Wed, 17 Jul 2019 17:37:33 +0200
Subject: [PATCH 2/2] Local variables in const fn require 1.33

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index f3417d793..ccbeb75be 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,7 @@ script: cargo test --all-features --all
 
 jobs:
   include:
-  - rust: 1.30.0
+  - rust: 1.33.0
   - rust: stable
   - rust: beta
   - rust: nightly