diff --git a/Cargo.toml b/Cargo.toml index 38c7e3c5..4a5834f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "url" -version = "0.2.37" +version = "0.3.0" authors = [ "Simon Sapin " ] description = "URL library for Rust, based on the WHATWG URL Standard" @@ -17,7 +17,7 @@ serde_serialization = ["serde"] heap_size = ["heapsize", "heapsize_plugin"] [dependencies.heapsize] -version = "0.1.1" +version = "0.1.3" optional = true [dependencies.heapsize_plugin] diff --git a/src/host.rs b/src/host.rs index c65aa6d3..51555bbf 100644 --- a/src/host.rs +++ b/src/host.rs @@ -7,37 +7,23 @@ // except according to those terms. use std::ascii::AsciiExt; -use std::cmp; use std::fmt::{self, Formatter}; +use std::net::{Ipv4Addr, Ipv6Addr}; use parser::{ParseResult, ParseError}; -use percent_encoding::{from_hex, percent_decode}; +use percent_encoding::{percent_decode}; /// The host name of an URL. #[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)] #[cfg_attr(feature="heap_size", derive(HeapSizeOf))] pub enum Host { - /// A (DNS) domain name or an IPv4 address. - /// - /// FIXME: IPv4 probably should be a separate variant. - /// See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26431 + /// A (DNS) domain name. Domain(String), - - /// An IPv6 address, represented inside `[...]` square brackets - /// so that `:` colon characters in the address are not ambiguous - /// with the port number delimiter. - Ipv6(Ipv6Address), -} - - -/// A 128 bit IPv6 address -#[derive(Clone, Eq, PartialEq, Copy, Debug, Hash, PartialOrd, Ord)] -pub struct Ipv6Address { - pub pieces: [u16; 8] + /// An IPv4 address. + V4(Ipv4Addr), + /// An IPv6 address. + V6(Ipv6Addr), } -#[cfg(feature="heap_size")] -known_heap_size!(0, Ipv6Address); - impl Host { /// Parse a host: either an IPv6 address in [] square brackets, or a domain. @@ -51,22 +37,30 @@ impl Host { Err(ParseError::EmptyHost) } else if input.starts_with("[") { if input.ends_with("]") { - Ipv6Address::parse(&input[1..input.len() - 1]).map(Host::Ipv6) + if let Ok(addr) = input[1..input.len() - 1].parse() { + Ok(Host::V6(addr)) + } else { + Err(ParseError::InvalidIpv6Address) + } } else { Err(ParseError::InvalidIpv6Address) } } else { - let decoded = percent_decode(input.as_bytes()); - let domain = String::from_utf8_lossy(&decoded); - // TODO: Remove this check and use IDNA "domain to ASCII" - if !domain.is_ascii() { - Err(ParseError::NonAsciiDomainsNotSupportedYet) - } else if domain.find(&[ - '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']' - ][..]).is_some() { - Err(ParseError::InvalidDomainCharacter) + if let Ok(addr) = input.parse() { + Ok(Host::V4(addr)) } else { - Ok(Host::Domain(domain.to_ascii_lowercase())) + let decoded = percent_decode(input.as_bytes()); + let domain = String::from_utf8_lossy(&decoded); + // TODO: Remove this check and use IDNA "domain to ASCII" + if !domain.is_ascii() { + Err(ParseError::NonAsciiDomainsNotSupportedYet) + } else if domain.find(&[ + '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']' + ][..]).is_some() { + Err(ParseError::InvalidDomainCharacter) + } else { + Ok(Host::Domain(domain.to_ascii_lowercase())) + } } } } @@ -81,203 +75,11 @@ impl Host { impl fmt::Display for Host { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { match *self { - Host::Domain(ref domain) => domain.fmt(formatter), - Host::Ipv6(ref address) => { - try!(formatter.write_str("[")); - try!(address.fmt(formatter)); - formatter.write_str("]") - } - } - } -} - - -impl Ipv6Address { - /// Parse an IPv6 address, without the [] square brackets. - pub fn parse(input: &str) -> ParseResult { - let input = input.as_bytes(); - let len = input.len(); - let mut is_ip_v4 = false; - let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0]; - let mut piece_pointer = 0; - let mut compress_pointer = None; - let mut i = 0; - - if len < 2 { - return Err(ParseError::InvalidIpv6Address) - } - - if input[0] == b':' { - if input[1] != b':' { - return Err(ParseError::InvalidIpv6Address) - } - i = 2; - piece_pointer = 1; - compress_pointer = Some(1); - } - - while i < len { - if piece_pointer == 8 { - return Err(ParseError::InvalidIpv6Address) - } - if input[i] == b':' { - if compress_pointer.is_some() { - return Err(ParseError::InvalidIpv6Address) - } - i += 1; - piece_pointer += 1; - compress_pointer = Some(piece_pointer); - continue - } - let start = i; - let end = cmp::min(len, start + 4); - let mut value = 0u16; - while i < end { - match from_hex(input[i]) { - Some(digit) => { - value = value * 0x10 + digit as u16; - i += 1; - }, - None => break - } - } - if i < len { - match input[i] { - b'.' => { - if i == start { - return Err(ParseError::InvalidIpv6Address) - } - i = start; - is_ip_v4 = true; - }, - b':' => { - i += 1; - if i == len { - return Err(ParseError::InvalidIpv6Address) - } - }, - _ => return Err(ParseError::InvalidIpv6Address) - } - } - if is_ip_v4 { - break - } - pieces[piece_pointer] = value; - piece_pointer += 1; - } - - if is_ip_v4 { - if piece_pointer > 6 { - return Err(ParseError::InvalidIpv6Address) - } - let mut dots_seen = 0; - while i < len { - // FIXME: https://github.com/whatwg/url/commit/1c22aa119c354e0020117e02571cec53f7c01064 - let mut value = 0u16; - while i < len { - let digit = match input[i] { - c @ b'0' ... b'9' => c - b'0', - _ => break - }; - value = value * 10 + digit as u16; - if value == 0 || value > 255 { - return Err(ParseError::InvalidIpv6Address) - } - } - if dots_seen < 3 && !(i < len && input[i] == b'.') { - return Err(ParseError::InvalidIpv6Address) - } - pieces[piece_pointer] = pieces[piece_pointer] * 0x100 + value; - if dots_seen == 0 || dots_seen == 2 { - piece_pointer += 1; - } - i += 1; - if dots_seen == 3 && i < len { - return Err(ParseError::InvalidIpv6Address) - } - dots_seen += 1; - } - } - - match compress_pointer { - Some(compress_pointer) => { - let mut swaps = piece_pointer - compress_pointer; - piece_pointer = 7; - while swaps > 0 { - pieces[piece_pointer] = pieces[compress_pointer + swaps - 1]; - pieces[compress_pointer + swaps - 1] = 0; - swaps -= 1; - piece_pointer -= 1; - } - } - _ => if piece_pointer != 8 { - return Err(ParseError::InvalidIpv6Address) - } - } - Ok(Ipv6Address { pieces: pieces }) - } - - /// Serialize the IPv6 address to a string. - pub fn serialize(&self) -> String { - self.to_string() - } -} - - -impl fmt::Display for Ipv6Address { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - let (compress_start, compress_end) = longest_zero_sequence(&self.pieces); - let mut i = 0; - while i < 8 { - if i == compress_start { - try!(formatter.write_str(":")); - if i == 0 { - try!(formatter.write_str(":")); - } - if compress_end < 8 { - i = compress_end; - } else { - break; - } - } - try!(write!(formatter, "{:x}", self.pieces[i as usize])); - if i < 7 { - try!(formatter.write_str(":")); - } - i += 1; - } - Ok(()) - } -} - - -fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { - let mut longest = -1; - let mut longest_length = -1; - let mut start = -1; - macro_rules! finish_sequence( - ($end: expr) => { - if start >= 0 { - let length = $end - start; - if length > longest_length { - longest = start; - longest_length = length; - } - } - }; - ); - for i in 0..8 { - if pieces[i as usize] == 0 { - if start < 0 { - start = i; - } - } else { - finish_sequence!(i); - start = -1; + Host::Domain(ref domain) => domain.fmt(f), + Host::V4(ref addr) => addr.fmt(f), + Host::V6(ref addr) => write!(f, "[{}]", addr), } } - finish_sequence!(8); - (longest, longest + longest_length) } diff --git a/src/lib.rs b/src/lib.rs index 08d32ef5..d4681085 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -143,7 +143,7 @@ use std::cmp::Ordering; #[cfg(feature="serde_serialization")] use std::str::FromStr; -pub use host::{Host, Ipv6Address}; +pub use host::Host; pub use parser::{ErrorHandler, ParseResult, ParseError}; use percent_encoding::{percent_encode, lossy_utf8_percent_decode, DEFAULT_ENCODE_SET}; @@ -1140,4 +1140,3 @@ fn file_url_path_to_pathbuf_windows(path: &[String]) -> Result { "to_file_path() failed to produce an absolute Path"); Ok(path) } - diff --git a/src/tests.rs b/src/tests.rs index e25500f5..b49ed29b 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -8,6 +8,7 @@ use std::char; +use std::net::{Ipv4Addr, Ipv6Addr}; use super::{UrlParser, Url, SchemeData, RelativeSchemeData, Host}; @@ -347,3 +348,14 @@ fn relative_scheme_data_equality() { let b: Url = url("http://foo.com/"); check_eq(&a, &b); } + +#[test] +fn host() { + let a = Host::parse("www.mozilla.org").unwrap(); + let b = Host::parse("1.35.33.49").unwrap(); + let c = Host::parse("[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]").unwrap(); + assert_eq!(a, Host::Domain("www.mozilla.org".to_owned())); + assert_eq!(b, Host::V4(Ipv4Addr::new(1, 35, 33, 49))); + assert_eq!(c, Host::V6(Ipv6Addr::new(0x2001, 0x0db8, 0x85a3, 0x08d3, + 0x1319, 0x8a2e, 0x0370, 0x7344))); +}