From 56f8eb2a5b62a1d2280a728f10027d987d241157 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 16 Feb 2016 15:45:51 +0100 Subject: [PATCH 1/2] Make some details of the idna module private. --- make_idna_table.py | 5 +---- src/idna.rs | 8 +++++--- src/idna_mapping.rs | 5 +---- src/lib.rs | 1 - 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/make_idna_table.py b/make_idna_table.py index bd1bc811..5700d680 100644 --- a/make_idna_table.py +++ b/make_idna_table.py @@ -22,10 +22,7 @@ // Generated by make_idna_table.py -use idna::Mapping::*; -use idna::Range; - -pub static TABLE: &'static [Range] = &[ +static TABLE: &'static [Range] = &[ ''') txt = open("IdnaMappingTable.txt") diff --git a/src/idna.rs b/src/idna.rs index 9fe95d18..19b9e446 100644 --- a/src/idna.rs +++ b/src/idna.rs @@ -2,15 +2,17 @@ //! //! https://url.spec.whatwg.org/#idna -use idna_mapping::TABLE; +use self::Mapping::*; use punycode; use std::ascii::AsciiExt; use unicode_normalization::UnicodeNormalization; use unicode_normalization::char::is_combining_mark; use unicode_bidi::{BidiClass, bidi_class}; +include!("idna_mapping.rs"); + #[derive(Debug)] -pub enum Mapping { +enum Mapping { Valid, Ignored, Mapped(&'static str), @@ -20,7 +22,7 @@ pub enum Mapping { DisallowedStd3Mapped(&'static str), } -pub struct Range { +struct Range { pub from: char, pub to: char, pub mapping: Mapping, diff --git a/src/idna_mapping.rs b/src/idna_mapping.rs index 45d61326..eb57dfb2 100644 --- a/src/idna_mapping.rs +++ b/src/idna_mapping.rs @@ -8,10 +8,7 @@ // Generated by make_idna_table.py -use idna::Mapping::*; -use idna::Range; - -pub static TABLE: &'static [Range] = &[ +static TABLE: &'static [Range] = &[ Range { from: '\0', to: ',', mapping: DisallowedStd3Valid }, Range { from: '-', to: '.', mapping: Valid }, diff --git a/src/lib.rs b/src/lib.rs index be81b139..6a066791 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -173,7 +173,6 @@ pub mod form_urlencoded; pub mod punycode; pub mod format; pub mod idna; -mod idna_mapping; /// The parsed representation of an absolute URL. #[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)] From 5916b5e85c40edae1551c7b1bd1993e95f40c44c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 16 Feb 2016 17:08:09 +0100 Subject: [PATCH 2/2] Add idna::domain_to_unicode --- src/idna.rs | 84 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 31 deletions(-) diff --git a/src/idna.rs b/src/idna.rs index 19b9e446..e0efdb39 100644 --- a/src/idna.rs +++ b/src/idna.rs @@ -45,7 +45,7 @@ fn find_char(codepoint: char) -> &'static Mapping { &TABLE[min].mapping } -fn map_char(codepoint: char, flags: Uts46Flags, output: &mut String) -> Result<(), Error> { +fn map_char(codepoint: char, flags: Uts46Flags, output: &mut String, errors: &mut Vec) { match *find_char(codepoint) { Mapping::Valid => output.push(codepoint), Mapping::Ignored => {}, @@ -57,23 +57,23 @@ fn map_char(codepoint: char, flags: Uts46Flags, output: &mut String) -> Result<( output.push(codepoint) } } - Mapping::Disallowed => return Err(Error::DissallowedCharacter), + Mapping::Disallowed => { + errors.push(Error::DissallowedCharacter); + output.push(codepoint); + } Mapping::DisallowedStd3Valid => { if flags.use_std3_ascii_rules { - return Err(Error::DissallowedByStd3AsciiRules); - } else { - output.push(codepoint) + errors.push(Error::DissallowedByStd3AsciiRules); } + output.push(codepoint) } Mapping::DisallowedStd3Mapped(mapping) => { if flags.use_std3_ascii_rules { - return Err(Error::DissallowedMappedInStd3); - } else { - output.push_str(mapping) + errors.push(Error::DissallowedMappedInStd3); } + output.push_str(mapping) } } - Ok(()) } // http://tools.ietf.org/html/rfc5893#section-2 @@ -185,9 +185,9 @@ fn passes_bidi(label: &str, transitional_processing: bool) -> bool { } /// http://www.unicode.org/reports/tr46/#Validity_Criteria -fn validate(label: &str, flags: Uts46Flags) -> Result<(), Error> { +fn validate(label: &str, flags: Uts46Flags, errors: &mut Vec) { if label.nfc().ne(label.chars()) { - return Err(Error::ValidityCriteria); + errors.push(Error::ValidityCriteria); } // Can not contain '.' since the input is from .split('.') @@ -207,17 +207,15 @@ fn validate(label: &str, flags: Uts46Flags) -> Result<(), Error> { }) || !passes_bidi(label, flags.transitional_processing) { - Err(Error::ValidityCriteria) - } else { - Ok(()) + errors.push(Error::ValidityCriteria) } } /// http://www.unicode.org/reports/tr46/#Processing -fn uts46_processing(domain: &str, flags: Uts46Flags) -> Result { +fn uts46_processing(domain: &str, flags: Uts46Flags, errors: &mut Vec) -> String { let mut mapped = String::new(); for c in domain.chars() { - try!(map_char(c, flags, &mut mapped)) + map_char(c, flags, &mut mapped, errors) } let normalized: String = mapped.nfc().collect(); let mut validated = String::new(); @@ -227,21 +225,19 @@ fn uts46_processing(domain: &str, flags: Uts46Flags) -> Result { } if label.starts_with("xn--") { match punycode::decode_to_string(&label["xn--".len()..]) { - Some(label) => { - try!(validate(&label, Uts46Flags { - transitional_processing: false, - ..flags - })); - validated.push_str(&label) + Some(decoded_label) => { + let flags = Uts46Flags { transitional_processing: false, ..flags }; + validate(&decoded_label, flags, errors); + validated.push_str(&decoded_label) } - None => return Err(Error::PunycodeError), + None => errors.push(Error::PunycodeError) } } else { - try!(validate(label, flags)); + validate(label, flags, errors); validated.push_str(label) } } - Ok(validated) + validated } #[derive(Copy, Clone)] @@ -262,9 +258,10 @@ pub enum Error { } /// http://www.unicode.org/reports/tr46/#ToASCII -pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result { +pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result> { + let mut errors = Vec::new(); let mut result = String::new(); - for label in try!(uts46_processing(domain, flags)).split('.') { + for label in uts46_processing(domain, flags, &mut errors).split('.') { if result.len() > 0 { result.push('.'); } @@ -276,7 +273,7 @@ pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result result.push_str("xn--"); result.push_str(&x); }, - None => return Err(Error::PunycodeError) + None => errors.push(Error::PunycodeError) } } } @@ -285,17 +282,42 @@ pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; if domain.len() < 1 || domain.len() > 253 || domain.split('.').any(|label| label.len() < 1 || label.len() > 63) { - return Err(Error::TooLongForDns) + errors.push(Error::TooLongForDns) } } - Ok(result) + if errors.is_empty() { + Ok(result) + } else { + Err(errors) + } } /// https://url.spec.whatwg.org/#concept-domain-to-ascii -pub fn domain_to_ascii(domain: &str) -> Result { +pub fn domain_to_ascii(domain: &str) -> Result> { uts46_to_ascii(domain, Uts46Flags { use_std3_ascii_rules: false, transitional_processing: true, // XXX: switch when Firefox does verify_dns_length: false, }) } + +/// http://www.unicode.org/reports/tr46/#ToUnicode +/// +/// Only `use_std3_ascii_rules` is used in `flags`. +pub fn uts46_to_unicode(domain: &str, mut flags: Uts46Flags) -> (String, Vec) { + flags.transitional_processing = false; + let mut errors = Vec::new(); + let domain = uts46_processing(domain, flags, &mut errors); + (domain, errors) +} + +/// https://url.spec.whatwg.org/#concept-domain-to-unicode +pub fn domain_to_unicode(domain: &str) -> (String, Vec) { + uts46_to_unicode(domain, Uts46Flags { + use_std3_ascii_rules: false, + + // Unused: + transitional_processing: true, + verify_dns_length: false, + }) +}