Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IPv4 variant to Host and remove custom Ipv6 address type #135

Merged
merged 1 commit into from Nov 15, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
@@ -1,7 +1,7 @@
[package]

name = "url"
version = "0.2.37"
version = "0.3.0"
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]

description = "URL library for Rust, based on the WHATWG URL Standard"
Expand All @@ -17,7 +17,7 @@ serde_serialization = ["serde"]
heap_size = ["heapsize", "heapsize_plugin"]

[dependencies.heapsize]
version = "0.1.1"
version = "0.1.3"
optional = true

[dependencies.heapsize_plugin]
Expand Down
258 changes: 30 additions & 228 deletions src/host.rs
Expand Up @@ -7,37 +7,23 @@
// except according to those terms.

use std::ascii::AsciiExt;
use std::cmp;
use std::fmt::{self, Formatter};
use std::net::{Ipv4Addr, Ipv6Addr};
use parser::{ParseResult, ParseError};
use percent_encoding::{from_hex, percent_decode};
use percent_encoding::{percent_decode};


/// The host name of an URL.
#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]
#[cfg_attr(feature="heap_size", derive(HeapSizeOf))]
pub enum Host {
/// A (DNS) domain name or an IPv4 address.
///
/// FIXME: IPv4 probably should be a separate variant.
/// See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26431
/// A (DNS) domain name.
Domain(String),

/// An IPv6 address, represented inside `[...]` square brackets
/// so that `:` colon characters in the address are not ambiguous
/// with the port number delimiter.
Ipv6(Ipv6Address),
}


/// A 128 bit IPv6 address
#[derive(Clone, Eq, PartialEq, Copy, Debug, Hash, PartialOrd, Ord)]
pub struct Ipv6Address {
pub pieces: [u16; 8]
/// An IPv4 address.
V4(Ipv4Addr),
/// An IPv6 address.
V6(Ipv6Addr),
}
#[cfg(feature="heap_size")]
known_heap_size!(0, Ipv6Address);


impl Host {
/// Parse a host: either an IPv6 address in [] square brackets, or a domain.
Expand All @@ -51,22 +37,30 @@ impl Host {
Err(ParseError::EmptyHost)
} else if input.starts_with("[") {
if input.ends_with("]") {
Ipv6Address::parse(&input[1..input.len() - 1]).map(Host::Ipv6)
if let Ok(addr) = input[1..input.len() - 1].parse() {
Ok(Host::V6(addr))
} else {
Err(ParseError::InvalidIpv6Address)
}
} else {
Err(ParseError::InvalidIpv6Address)
}
} else {
let decoded = percent_decode(input.as_bytes());
let domain = String::from_utf8_lossy(&decoded);
// TODO: Remove this check and use IDNA "domain to ASCII"
if !domain.is_ascii() {
Err(ParseError::NonAsciiDomainsNotSupportedYet)
} else if domain.find(&[
'\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']'
][..]).is_some() {
Err(ParseError::InvalidDomainCharacter)
if let Ok(addr) = input.parse() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pyfisch @Ms2ger have you verified that Ipv4Addr::parse and Ipv6::parse match https://url.spec.whatwg.org/#host-parsing in every edge case? For example: leading zeros, hex/octal components in v4, …

Ok(Host::V4(addr))
} else {
Ok(Host::Domain(domain.to_ascii_lowercase()))
let decoded = percent_decode(input.as_bytes());
let domain = String::from_utf8_lossy(&decoded);
// TODO: Remove this check and use IDNA "domain to ASCII"
if !domain.is_ascii() {
Err(ParseError::NonAsciiDomainsNotSupportedYet)
} else if domain.find(&[
'\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']'
][..]).is_some() {
Err(ParseError::InvalidDomainCharacter)
} else {
Ok(Host::Domain(domain.to_ascii_lowercase()))
}
}
}
}
Expand All @@ -81,203 +75,11 @@ impl Host {


impl fmt::Display for Host {
fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match *self {
Host::Domain(ref domain) => domain.fmt(formatter),
Host::Ipv6(ref address) => {
try!(formatter.write_str("["));
try!(address.fmt(formatter));
formatter.write_str("]")
}
}
}
}


impl Ipv6Address {
/// Parse an IPv6 address, without the [] square brackets.
pub fn parse(input: &str) -> ParseResult<Ipv6Address> {
let input = input.as_bytes();
let len = input.len();
let mut is_ip_v4 = false;
let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
let mut piece_pointer = 0;
let mut compress_pointer = None;
let mut i = 0;

if len < 2 {
return Err(ParseError::InvalidIpv6Address)
}

if input[0] == b':' {
if input[1] != b':' {
return Err(ParseError::InvalidIpv6Address)
}
i = 2;
piece_pointer = 1;
compress_pointer = Some(1);
}

while i < len {
if piece_pointer == 8 {
return Err(ParseError::InvalidIpv6Address)
}
if input[i] == b':' {
if compress_pointer.is_some() {
return Err(ParseError::InvalidIpv6Address)
}
i += 1;
piece_pointer += 1;
compress_pointer = Some(piece_pointer);
continue
}
let start = i;
let end = cmp::min(len, start + 4);
let mut value = 0u16;
while i < end {
match from_hex(input[i]) {
Some(digit) => {
value = value * 0x10 + digit as u16;
i += 1;
},
None => break
}
}
if i < len {
match input[i] {
b'.' => {
if i == start {
return Err(ParseError::InvalidIpv6Address)
}
i = start;
is_ip_v4 = true;
},
b':' => {
i += 1;
if i == len {
return Err(ParseError::InvalidIpv6Address)
}
},
_ => return Err(ParseError::InvalidIpv6Address)
}
}
if is_ip_v4 {
break
}
pieces[piece_pointer] = value;
piece_pointer += 1;
}

if is_ip_v4 {
if piece_pointer > 6 {
return Err(ParseError::InvalidIpv6Address)
}
let mut dots_seen = 0;
while i < len {
// FIXME: https://github.com/whatwg/url/commit/1c22aa119c354e0020117e02571cec53f7c01064
let mut value = 0u16;
while i < len {
let digit = match input[i] {
c @ b'0' ... b'9' => c - b'0',
_ => break
};
value = value * 10 + digit as u16;
if value == 0 || value > 255 {
return Err(ParseError::InvalidIpv6Address)
}
}
if dots_seen < 3 && !(i < len && input[i] == b'.') {
return Err(ParseError::InvalidIpv6Address)
}
pieces[piece_pointer] = pieces[piece_pointer] * 0x100 + value;
if dots_seen == 0 || dots_seen == 2 {
piece_pointer += 1;
}
i += 1;
if dots_seen == 3 && i < len {
return Err(ParseError::InvalidIpv6Address)
}
dots_seen += 1;
}
}

match compress_pointer {
Some(compress_pointer) => {
let mut swaps = piece_pointer - compress_pointer;
piece_pointer = 7;
while swaps > 0 {
pieces[piece_pointer] = pieces[compress_pointer + swaps - 1];
pieces[compress_pointer + swaps - 1] = 0;
swaps -= 1;
piece_pointer -= 1;
}
}
_ => if piece_pointer != 8 {
return Err(ParseError::InvalidIpv6Address)
}
}
Ok(Ipv6Address { pieces: pieces })
}

/// Serialize the IPv6 address to a string.
pub fn serialize(&self) -> String {
self.to_string()
}
}


impl fmt::Display for Ipv6Address {
fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
let (compress_start, compress_end) = longest_zero_sequence(&self.pieces);
let mut i = 0;
while i < 8 {
if i == compress_start {
try!(formatter.write_str(":"));
if i == 0 {
try!(formatter.write_str(":"));
}
if compress_end < 8 {
i = compress_end;
} else {
break;
}
}
try!(write!(formatter, "{:x}", self.pieces[i as usize]));
if i < 7 {
try!(formatter.write_str(":"));
}
i += 1;
}
Ok(())
}
}


fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
let mut longest = -1;
let mut longest_length = -1;
let mut start = -1;
macro_rules! finish_sequence(
($end: expr) => {
if start >= 0 {
let length = $end - start;
if length > longest_length {
longest = start;
longest_length = length;
}
}
};
);
for i in 0..8 {
if pieces[i as usize] == 0 {
if start < 0 {
start = i;
}
} else {
finish_sequence!(i);
start = -1;
Host::Domain(ref domain) => domain.fmt(f),
Host::V4(ref addr) => addr.fmt(f),
Host::V6(ref addr) => write!(f, "[{}]", addr),
}
}
finish_sequence!(8);
(longest, longest + longest_length)
}
3 changes: 1 addition & 2 deletions src/lib.rs
Expand Up @@ -143,7 +143,7 @@ use std::cmp::Ordering;
#[cfg(feature="serde_serialization")]
use std::str::FromStr;

pub use host::{Host, Ipv6Address};
pub use host::Host;
pub use parser::{ErrorHandler, ParseResult, ParseError};

use percent_encoding::{percent_encode, lossy_utf8_percent_decode, DEFAULT_ENCODE_SET};
Expand Down Expand Up @@ -1140,4 +1140,3 @@ fn file_url_path_to_pathbuf_windows(path: &[String]) -> Result<PathBuf, ()> {
"to_file_path() failed to produce an absolute Path");
Ok(path)
}

12 changes: 12 additions & 0 deletions src/tests.rs
Expand Up @@ -8,6 +8,7 @@


use std::char;
use std::net::{Ipv4Addr, Ipv6Addr};
use super::{UrlParser, Url, SchemeData, RelativeSchemeData, Host};


Expand Down Expand Up @@ -347,3 +348,14 @@ fn relative_scheme_data_equality() {
let b: Url = url("http://foo.com/");
check_eq(&a, &b);
}

#[test]
fn host() {
let a = Host::parse("www.mozilla.org").unwrap();
let b = Host::parse("1.35.33.49").unwrap();
let c = Host::parse("[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]").unwrap();
assert_eq!(a, Host::Domain("www.mozilla.org".to_owned()));
assert_eq!(b, Host::V4(Ipv4Addr::new(1, 35, 33, 49)));
assert_eq!(c, Host::V6(Ipv6Addr::new(0x2001, 0x0db8, 0x85a3, 0x08d3,
0x1319, 0x8a2e, 0x0370, 0x7344)));
}