From ecc7bb79bcd2bb484377e9b377e40b492c1bfd0d Mon Sep 17 00:00:00 2001 From: Pyfisch Date: Sat, 14 Nov 2015 10:42:53 +0100 Subject: [PATCH] Add IPv4 variant to Host and remove custom Ipv6 address type The Whatwg URL Standard was changed to describe Ipv4 address parsing. Before this change there was no difference made between domains and Ipv4 addresses. This change also removes the custom Ipv6 type and uses the types provided by std for ip addresses. This is a breaking change. Version bumped to 0.3.0 --- Cargo.toml | 4 +- src/host.rs | 258 ++++++--------------------------------------------- src/lib.rs | 3 +- src/tests.rs | 12 +++ 4 files changed, 45 insertions(+), 232 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 38c7e3c5a..4a5834f7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "url" -version = "0.2.37" +version = "0.3.0" authors = [ "Simon Sapin " ] description = "URL library for Rust, based on the WHATWG URL Standard" @@ -17,7 +17,7 @@ serde_serialization = ["serde"] heap_size = ["heapsize", "heapsize_plugin"] [dependencies.heapsize] -version = "0.1.1" +version = "0.1.3" optional = true [dependencies.heapsize_plugin] diff --git a/src/host.rs b/src/host.rs index c65aa6d33..51555bbf0 100644 --- a/src/host.rs +++ b/src/host.rs @@ -7,37 +7,23 @@ // except according to those terms. use std::ascii::AsciiExt; -use std::cmp; use std::fmt::{self, Formatter}; +use std::net::{Ipv4Addr, Ipv6Addr}; use parser::{ParseResult, ParseError}; -use percent_encoding::{from_hex, percent_decode}; +use percent_encoding::{percent_decode}; /// The host name of an URL. #[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)] #[cfg_attr(feature="heap_size", derive(HeapSizeOf))] pub enum Host { - /// A (DNS) domain name or an IPv4 address. - /// - /// FIXME: IPv4 probably should be a separate variant. - /// See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26431 + /// A (DNS) domain name. Domain(String), - - /// An IPv6 address, represented inside `[...]` square brackets - /// so that `:` colon characters in the address are not ambiguous - /// with the port number delimiter. - Ipv6(Ipv6Address), -} - - -/// A 128 bit IPv6 address -#[derive(Clone, Eq, PartialEq, Copy, Debug, Hash, PartialOrd, Ord)] -pub struct Ipv6Address { - pub pieces: [u16; 8] + /// An IPv4 address. + V4(Ipv4Addr), + /// An IPv6 address. + V6(Ipv6Addr), } -#[cfg(feature="heap_size")] -known_heap_size!(0, Ipv6Address); - impl Host { /// Parse a host: either an IPv6 address in [] square brackets, or a domain. @@ -51,22 +37,30 @@ impl Host { Err(ParseError::EmptyHost) } else if input.starts_with("[") { if input.ends_with("]") { - Ipv6Address::parse(&input[1..input.len() - 1]).map(Host::Ipv6) + if let Ok(addr) = input[1..input.len() - 1].parse() { + Ok(Host::V6(addr)) + } else { + Err(ParseError::InvalidIpv6Address) + } } else { Err(ParseError::InvalidIpv6Address) } } else { - let decoded = percent_decode(input.as_bytes()); - let domain = String::from_utf8_lossy(&decoded); - // TODO: Remove this check and use IDNA "domain to ASCII" - if !domain.is_ascii() { - Err(ParseError::NonAsciiDomainsNotSupportedYet) - } else if domain.find(&[ - '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']' - ][..]).is_some() { - Err(ParseError::InvalidDomainCharacter) + if let Ok(addr) = input.parse() { + Ok(Host::V4(addr)) } else { - Ok(Host::Domain(domain.to_ascii_lowercase())) + let decoded = percent_decode(input.as_bytes()); + let domain = String::from_utf8_lossy(&decoded); + // TODO: Remove this check and use IDNA "domain to ASCII" + if !domain.is_ascii() { + Err(ParseError::NonAsciiDomainsNotSupportedYet) + } else if domain.find(&[ + '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']' + ][..]).is_some() { + Err(ParseError::InvalidDomainCharacter) + } else { + Ok(Host::Domain(domain.to_ascii_lowercase())) + } } } } @@ -81,203 +75,11 @@ impl Host { impl fmt::Display for Host { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { match *self { - Host::Domain(ref domain) => domain.fmt(formatter), - Host::Ipv6(ref address) => { - try!(formatter.write_str("[")); - try!(address.fmt(formatter)); - formatter.write_str("]") - } - } - } -} - - -impl Ipv6Address { - /// Parse an IPv6 address, without the [] square brackets. - pub fn parse(input: &str) -> ParseResult { - let input = input.as_bytes(); - let len = input.len(); - let mut is_ip_v4 = false; - let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0]; - let mut piece_pointer = 0; - let mut compress_pointer = None; - let mut i = 0; - - if len < 2 { - return Err(ParseError::InvalidIpv6Address) - } - - if input[0] == b':' { - if input[1] != b':' { - return Err(ParseError::InvalidIpv6Address) - } - i = 2; - piece_pointer = 1; - compress_pointer = Some(1); - } - - while i < len { - if piece_pointer == 8 { - return Err(ParseError::InvalidIpv6Address) - } - if input[i] == b':' { - if compress_pointer.is_some() { - return Err(ParseError::InvalidIpv6Address) - } - i += 1; - piece_pointer += 1; - compress_pointer = Some(piece_pointer); - continue - } - let start = i; - let end = cmp::min(len, start + 4); - let mut value = 0u16; - while i < end { - match from_hex(input[i]) { - Some(digit) => { - value = value * 0x10 + digit as u16; - i += 1; - }, - None => break - } - } - if i < len { - match input[i] { - b'.' => { - if i == start { - return Err(ParseError::InvalidIpv6Address) - } - i = start; - is_ip_v4 = true; - }, - b':' => { - i += 1; - if i == len { - return Err(ParseError::InvalidIpv6Address) - } - }, - _ => return Err(ParseError::InvalidIpv6Address) - } - } - if is_ip_v4 { - break - } - pieces[piece_pointer] = value; - piece_pointer += 1; - } - - if is_ip_v4 { - if piece_pointer > 6 { - return Err(ParseError::InvalidIpv6Address) - } - let mut dots_seen = 0; - while i < len { - // FIXME: https://github.com/whatwg/url/commit/1c22aa119c354e0020117e02571cec53f7c01064 - let mut value = 0u16; - while i < len { - let digit = match input[i] { - c @ b'0' ... b'9' => c - b'0', - _ => break - }; - value = value * 10 + digit as u16; - if value == 0 || value > 255 { - return Err(ParseError::InvalidIpv6Address) - } - } - if dots_seen < 3 && !(i < len && input[i] == b'.') { - return Err(ParseError::InvalidIpv6Address) - } - pieces[piece_pointer] = pieces[piece_pointer] * 0x100 + value; - if dots_seen == 0 || dots_seen == 2 { - piece_pointer += 1; - } - i += 1; - if dots_seen == 3 && i < len { - return Err(ParseError::InvalidIpv6Address) - } - dots_seen += 1; - } - } - - match compress_pointer { - Some(compress_pointer) => { - let mut swaps = piece_pointer - compress_pointer; - piece_pointer = 7; - while swaps > 0 { - pieces[piece_pointer] = pieces[compress_pointer + swaps - 1]; - pieces[compress_pointer + swaps - 1] = 0; - swaps -= 1; - piece_pointer -= 1; - } - } - _ => if piece_pointer != 8 { - return Err(ParseError::InvalidIpv6Address) - } - } - Ok(Ipv6Address { pieces: pieces }) - } - - /// Serialize the IPv6 address to a string. - pub fn serialize(&self) -> String { - self.to_string() - } -} - - -impl fmt::Display for Ipv6Address { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - let (compress_start, compress_end) = longest_zero_sequence(&self.pieces); - let mut i = 0; - while i < 8 { - if i == compress_start { - try!(formatter.write_str(":")); - if i == 0 { - try!(formatter.write_str(":")); - } - if compress_end < 8 { - i = compress_end; - } else { - break; - } - } - try!(write!(formatter, "{:x}", self.pieces[i as usize])); - if i < 7 { - try!(formatter.write_str(":")); - } - i += 1; - } - Ok(()) - } -} - - -fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { - let mut longest = -1; - let mut longest_length = -1; - let mut start = -1; - macro_rules! finish_sequence( - ($end: expr) => { - if start >= 0 { - let length = $end - start; - if length > longest_length { - longest = start; - longest_length = length; - } - } - }; - ); - for i in 0..8 { - if pieces[i as usize] == 0 { - if start < 0 { - start = i; - } - } else { - finish_sequence!(i); - start = -1; + Host::Domain(ref domain) => domain.fmt(f), + Host::V4(ref addr) => addr.fmt(f), + Host::V6(ref addr) => write!(f, "[{}]", addr), } } - finish_sequence!(8); - (longest, longest + longest_length) } diff --git a/src/lib.rs b/src/lib.rs index 08d32ef55..d46810857 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -143,7 +143,7 @@ use std::cmp::Ordering; #[cfg(feature="serde_serialization")] use std::str::FromStr; -pub use host::{Host, Ipv6Address}; +pub use host::Host; pub use parser::{ErrorHandler, ParseResult, ParseError}; use percent_encoding::{percent_encode, lossy_utf8_percent_decode, DEFAULT_ENCODE_SET}; @@ -1140,4 +1140,3 @@ fn file_url_path_to_pathbuf_windows(path: &[String]) -> Result { "to_file_path() failed to produce an absolute Path"); Ok(path) } - diff --git a/src/tests.rs b/src/tests.rs index e25500f5c..b49ed29b7 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -8,6 +8,7 @@ use std::char; +use std::net::{Ipv4Addr, Ipv6Addr}; use super::{UrlParser, Url, SchemeData, RelativeSchemeData, Host}; @@ -347,3 +348,14 @@ fn relative_scheme_data_equality() { let b: Url = url("http://foo.com/"); check_eq(&a, &b); } + +#[test] +fn host() { + let a = Host::parse("www.mozilla.org").unwrap(); + let b = Host::parse("1.35.33.49").unwrap(); + let c = Host::parse("[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]").unwrap(); + assert_eq!(a, Host::Domain("www.mozilla.org".to_owned())); + assert_eq!(b, Host::V4(Ipv4Addr::new(1, 35, 33, 49))); + assert_eq!(c, Host::V6(Ipv6Addr::new(0x2001, 0x0db8, 0x85a3, 0x08d3, + 0x1319, 0x8a2e, 0x0370, 0x7344))); +}