From 101c330cb06b3e6de7f30509b4237421b94e3f28 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 3 Jan 2014 23:54:23 +0000 Subject: [PATCH 01/69] Add a regression test for a tinycss2 bug. --- component_value_list.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/component_value_list.json b/component_value_list.json index 01c9cbb1..5abe2453 100644 --- a/component_value_list.json +++ b/component_value_list.json @@ -80,7 +80,7 @@ ["at-keyword", "0media\uFFFD"] ], -"#red0 #-Red #--red #-\\-red #0red #-0red #_Red #.red #rêd #\\.red\\", [ +"#red0 #-Red #--red #-\\-red #0red #-0red #_Red #.red #rêd #êrd #\\.red\\", [ ["hash", "red0", "id"], " ", ["hash", "-Red", "id"], " ", ["hash", "--red", "unrestricted"], " ", @@ -90,6 +90,7 @@ ["hash", "_Red", "id"], " ", "#", ".", ["ident", "red"], " ", ["hash", "rêd", "id"], " ", + ["hash", "êrd", "id"], " ", ["hash", ".red\uFFFD", "id"] ], From 95ad783414ce5c5ad53d447239032fe55570579b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 21 Apr 2014 14:51:10 +0100 Subject: [PATCH 02/69] Add more regression tests for tinycss2 bugs. --- declaration_list.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/declaration_list.json b/declaration_list.json index 4866c703..fbd6ef6d 100644 --- a/declaration_list.json +++ b/declaration_list.json @@ -7,6 +7,16 @@ ["declaration", "c", [["ident", "d"], " ", ["number", "42", 42, "integer"]], true] ], +"z;a:b", [ + ["error", "invalid"], + ["declaration", "a", [["ident", "b"]], false] +], + +"z:x!;a:b", [ + ["error", "invalid"], + ["declaration", "a", [["ident", "b"]], false] +], + "@import 'foo.css'; a:b; @import 'bar.css'", [ ["at-rule", "import", [" ", ["string", "foo.css"]], null], ["declaration", "a", [["ident", "b"]], false], From 691160f3bce1034cddd5425dd902a4732110636d Mon Sep 17 00:00:00 2001 From: Ezequiel Rodriguez Date: Mon, 21 Jul 2014 13:10:41 -0700 Subject: [PATCH 03/69] Identifiers may now begin with "--". See bullet point two of: http://dev.w3.org/csswg/css-syntax/#changes-CR-20140220 --- component_value_list.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/component_value_list.json b/component_value_list.json index 5abe2453..5048248a 100644 --- a/component_value_list.json +++ b/component_value_list.json @@ -25,7 +25,7 @@ "red0 -red --red -\\-red\\ blue 0red -0red \u0000red _Red .red rêd r\\êd \u007F\u0080\u0081", [ ["ident", "red0"], " ", ["ident", "-red"], " ", - "-", ["ident", "-red"], " ", + ["ident", "--red"], " ", ["ident", "--red blue"], " ", ["dimension", "0", 0, "integer", "red"], " ", ["dimension", "-0", 0, "integer", "red"], " ", @@ -54,7 +54,7 @@ "rgba0() -rgba() --rgba() -\\-rgba() 0rgba() -0rgba() _rgba() .rgba() rgbâ() \\30rgba() rgba () @rgba() #rgba()", [ ["function", "rgba0"], " ", ["function", "-rgba"], " ", - "-", ["function", "-rgba"], " ", + ["function", "--rgba"], " ", ["function", "--rgba"], " ", ["dimension", "0", 0, "integer", "rgba"], ["()"], " ", ["dimension", "-0", 0, "integer", "rgba"], ["()"], " ", @@ -70,7 +70,7 @@ "@media0 @-Media @--media @-\\-media @0media @-0media @_media @.media @medİa @\\30 media\\", [ ["at-keyword", "media0"], " ", ["at-keyword", "-Media"], " ", - "@", "-", ["ident", "-media"], " ", + ["at-keyword", "--media"], " ", ["at-keyword", "--media"], " ", "@", ["dimension", "0", 0, "integer", "media"], " ", "@", ["dimension", "-0", 0, "integer", "media"], " ", @@ -83,7 +83,7 @@ "#red0 #-Red #--red #-\\-red #0red #-0red #_Red #.red #rêd #êrd #\\.red\\", [ ["hash", "red0", "id"], " ", ["hash", "-Red", "id"], " ", - ["hash", "--red", "unrestricted"], " ", + ["hash", "--red", "id"], " ", ["hash", "--red", "id"], " ", ["hash", "0red", "unrestricted"], " ", ["hash", "-0red", "unrestricted"], " ", @@ -308,7 +308,7 @@ "12red0 12.0-red 12--red 12-\\-red 120red 12-0red 12\u0000red 12_Red 12.red 12rêd", [ ["dimension", "12", 12, "integer", "red0"], " ", ["dimension", "12.0", 12, "number", "-red"], " ", - ["number", "12", 12, "integer"], "-", ["ident", "-red"], " ", + ["dimension", "12", 12, "integer", "--red"], " ", ["dimension", "12", 12, "integer", "--red"], " ", ["dimension", "120", 120, "integer", "red"], " ", ["number", "12", 12, "integer"], ["dimension", "-0", 0, "integer", "red"], " ", @@ -392,7 +392,7 @@ ], "~=|=^=$=*=|| |/**/| ~/**/=", [ - "~=", "|=", "^=", "$=", "*=", "||", "", + "~=", "|=", "^=", "$=", "*=", "||", "") { tokenizer.position += 3; CDC + } else if is_ident_start(tokenizer) { + consume_ident_like(tokenizer) } else { tokenizer.position += 1; Delim(c) @@ -320,7 +320,7 @@ fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { !tokenizer.is_eof() && match tokenizer.current_char() { 'a'...'z' | 'A'...'Z' | '_' | '\0' => true, '-' => tokenizer.position + 1 < tokenizer.length && match tokenizer.char_at(1) { - 'a'...'z' | 'A'...'Z' | '_' | '\0' => true, + 'a'...'z' | 'A'...'Z' | '-' | '_' | '\0' => true, '\\' => !tokenizer.has_newline_at(1), c => c > '\x7F', // Non-ASCII }, From 41fa928b7df1acc05b16af5f4ec3edb229274957 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 23 Dec 2014 17:15:30 +0100 Subject: [PATCH 15/69] Rewrite ALL THE THINGS! The tokenizer is now fully incremental, rather than yielding a block/function at a time. The convention is now to take `&mut Parser` as input for parsing, and only consume as necessary. --- src/ast.rs | 73 +---- src/color.rs | 103 +++--- src/from_bytes.rs | 30 ++ src/lib.rs | 20 +- src/nth.rs | 158 ++++----- src/parser.rs | 601 ++++++++++++++++++++++------------ src/rules_and_declarations.rs | 284 ++++++++++++++++ src/serializer.rs | 300 ++++++----------- src/tests.rs | 460 +++++++++++++++----------- src/tokenizer.rs | 188 ++++++----- 10 files changed, 1301 insertions(+), 916 deletions(-) create mode 100644 src/rules_and_declarations.rs diff --git a/src/ast.rs b/src/ast.rs index a0244fa2..edb99022 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3,8 +3,6 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use std::fmt; -use std::slice; -use std::vec; #[deriving(PartialEq, Show)] @@ -22,18 +20,18 @@ pub struct SourceLocation { } -pub type Node = (ComponentValue, SourceLocation); // TODO this is not a good name +pub type Node = (Token, SourceLocation); // TODO this is not a good name #[deriving(PartialEq, Show)] -pub enum ComponentValue { +pub enum Token { // Preserved tokens. Ident(String), AtKeyword(String), Hash(String), IDHash(String), // Hash that is a valid ID selector. QuotedString(String), - URL(String), + Url(String), Delim(char), Number(NumericValue), Percentage(NumericValue), @@ -53,15 +51,15 @@ pub enum ComponentValue { CDC, // --> // Function - Function(String, Vec), // name, arguments + Function(String), // name // Simple block - ParenthesisBlock(Vec), // (…) - SquareBracketBlock(Vec), // […] - CurlyBracketBlock(Vec), // {…} + ParenthesisBlock, // (…) + SquareBracketBlock, // […] + CurlyBracketBlock, // {…} // These are always invalid - BadURL, + BadUrl, BadString, CloseParenthesis, // ) CloseSquareBracket, // ] @@ -73,14 +71,14 @@ pub enum ComponentValue { pub struct Declaration { pub location: SourceLocation, pub name: String, - pub value: Vec, + pub value: Vec, pub important: bool, } #[deriving(PartialEq)] pub struct QualifiedRule { pub location: SourceLocation, - pub prelude: Vec, + pub prelude: Vec, pub block: Vec, } @@ -88,7 +86,7 @@ pub struct QualifiedRule { pub struct AtRule { pub location: SourceLocation, pub name: String, - pub prelude: Vec, + pub prelude: Vec, pub block: Option>, } @@ -125,52 +123,3 @@ impl fmt::Show for SyntaxError { write!(f, "{}:{} {}", self.location.line, self.location.column, self.reason) } } - - -pub trait SkipWhitespaceIterable<'a> { - fn skip_whitespace(self) -> SkipWhitespaceIterator<'a>; -} - -impl<'a> SkipWhitespaceIterable<'a> for &'a [ComponentValue] { - fn skip_whitespace(self) -> SkipWhitespaceIterator<'a> { - SkipWhitespaceIterator{ iter_with_whitespace: self.iter() } - } -} - -#[deriving(Clone)] -pub struct SkipWhitespaceIterator<'a> { - pub iter_with_whitespace: slice::Items<'a, ComponentValue>, -} - -impl<'a> Iterator<&'a ComponentValue> for SkipWhitespaceIterator<'a> { - fn next(&mut self) -> Option<&'a ComponentValue> { - for component_value in self.iter_with_whitespace { - if component_value != &ComponentValue::WhiteSpace { return Some(component_value) } - } - None - } -} - - -pub trait MoveSkipWhitespaceIterable { - fn move_skip_whitespace(self) -> MoveSkipWhitespaceIterator; -} - -impl MoveSkipWhitespaceIterable for Vec { - fn move_skip_whitespace(self) -> MoveSkipWhitespaceIterator { - MoveSkipWhitespaceIterator{ iter_with_whitespace: self.into_iter() } - } -} - -pub struct MoveSkipWhitespaceIterator { - iter_with_whitespace: vec::MoveItems, -} - -impl Iterator for MoveSkipWhitespaceIterator { - fn next(&mut self) -> Option { - for component_value in self.iter_with_whitespace { - if component_value != ComponentValue::WhiteSpace { return Some(component_value) } - } - None - } -} diff --git a/src/color.rs b/src/color.rs index 83208b6d..ed58c130 100644 --- a/src/color.rs +++ b/src/color.rs @@ -8,9 +8,7 @@ use std::num::{Float, FloatMath}; use text_writer::{mod, TextWriter}; -use ast::{ComponentValue, SkipWhitespaceIterable}; -use ast::ComponentValue::{Number, Percentage, Function, Ident, Hash, IDHash, Comma}; -use serializer::ToCss; +use super::{Token, Parser, ToCss}; #[deriving(Clone, PartialEq)] @@ -66,13 +64,18 @@ impl fmt::Show for Color { /// Return `Err(())` on invalid or unsupported value (not a color). impl Color { - pub fn parse(component_value: &ComponentValue) -> Result { - match *component_value { - Hash(ref value) | IDHash(ref value) => parse_color_hash(value.as_slice()), - Ident(ref value) => parse_color_keyword(value.as_slice()), - Function(ref name, ref arguments) - => parse_color_function(name.as_slice(), arguments.as_slice()), - _ => Err(()) + pub fn parse(input: &mut Parser) -> Result { + match try!(input.next()) { + Token::Hash(ref value) | Token::IDHash(ref value) => { + parse_color_hash(value.as_slice()) + } + Token::Ident(ref value) => parse_color_keyword(value.as_slice()), + Token::Function(ref name) => { + input.parse_nested_block().parse_entirely(|arguments| { + parse_color_function(name.as_slice(), arguments) + }) + } + token => input.unexpected(token) } } } @@ -279,70 +282,43 @@ fn parse_color_hash(value: &str) -> Result { #[inline] -fn parse_color_function(name: &str, arguments: &[ComponentValue]) - -> Result { - let lower_name = name.to_ascii_lower(); - let lower_name = lower_name.as_slice(); - +fn parse_color_function(name: &str, arguments: &mut Parser) -> Result { let (is_rgb, has_alpha) = - if "rgba" == lower_name { (true, true) } - else if "rgb" == lower_name { (true, false) } - else if "hsl" == lower_name { (false, false) } - else if "hsla" == lower_name { (false, true) } + if name.eq_ignore_ascii_case("rgba") { (true, true) } + else if name.eq_ignore_ascii_case("rgb") { (true, false) } + else if name.eq_ignore_ascii_case("hsl") { (false, false) } + else if name.eq_ignore_ascii_case("hsla") { (false, true) } else { return Err(()) }; - let mut iter = arguments.skip_whitespace(); - macro_rules! expect_comma( - () => ( match iter.next() { Some(&Comma) => {}, _ => { return Err(()) } } ); - ) - macro_rules! expect_percentage( - () => ( match iter.next() { - Some(&Percentage(ref v)) => v.value, - _ => return Err(()), - }); - ) - macro_rules! expect_integer( - () => ( match iter.next() { - Some(&Number(ref v)) if v.int_value.is_some() => v.value, - _ => return Err(()), - }); - ) - macro_rules! expect_number( - () => ( match iter.next() { - Some(&Number(ref v)) => v.value, - _ => return Err(()), - }); - ) - let red: f32; let green: f32; let blue: f32; if is_rgb { // Either integers or percentages, but all the same type. - match iter.next() { - Some(&Number(ref v)) if v.int_value.is_some() => { + match try!(arguments.next()) { + Token::Number(ref v) if v.int_value.is_some() => { red = (v.value / 255.) as f32; - expect_comma!(); - green = (expect_integer!() / 255.) as f32; - expect_comma!(); - blue = (expect_integer!() / 255.) as f32; + try!(arguments.expect_comma()); + green = try!(arguments.expect_integer()) as f32 / 255.; + try!(arguments.expect_comma()); + blue = try!(arguments.expect_integer()) as f32 / 255.; } - Some(&Percentage(ref v)) => { + Token::Percentage(ref v) => { red = (v.value / 100.) as f32; - expect_comma!(); - green = (expect_percentage!() / 100.) as f32; - expect_comma!(); - blue = (expect_percentage!() / 100.) as f32; + try!(arguments.expect_comma()); + green = (try!(arguments.expect_percentage()) / 100.) as f32; + try!(arguments.expect_comma()); + blue = (try!(arguments.expect_percentage()) / 100.) as f32; } _ => return Err(()) }; } else { - let hue = expect_number!() / 360.; + let hue = try!(arguments.expect_number()) / 360.; let hue = hue - hue.floor(); - expect_comma!(); - let saturation = (expect_percentage!() / 100.).max(0.).min(1.); - expect_comma!(); - let lightness = (expect_percentage!() / 100.).max(0.).min(1.); + try!(arguments.expect_comma()); + let saturation = (try!(arguments.expect_percentage()) / 100.).max(0.).min(1.); + try!(arguments.expect_comma()); + let lightness = (try!(arguments.expect_percentage()) / 100.).max(0.).min(1.); // http://www.w3.org/TR/css3-color/#hsl-color fn hue_to_rgb(m1: f64, m2: f64, mut h: f64) -> f64 { @@ -363,14 +339,11 @@ fn parse_color_function(name: &str, arguments: &[ComponentValue]) } let alpha = if has_alpha { - expect_comma!(); - (expect_number!()).max(0.).min(1.) as f32 + try!(arguments.expect_comma()); + (try!(arguments.expect_number())).max(0.).min(1.) as f32 } else { 1. }; - if iter.next().is_none() { - Ok(Color::RGBA(RGBA { red: red, green: green, blue: blue, alpha: alpha })) - } else { - Err(()) - } + try!(arguments.expect_exhausted()); + Ok(Color::RGBA(RGBA { red: red, green: green, blue: blue, alpha: alpha })) } diff --git a/src/from_bytes.rs b/src/from_bytes.rs index fd142f9f..9e474bef 100644 --- a/src/from_bytes.rs +++ b/src/from_bytes.rs @@ -8,6 +8,8 @@ use encoding::label::encoding_from_whatwg_label; use encoding::all::UTF_8; use encoding::{EncodingRef, DecoderTrap, decode}; +use super::{Parser, RuleListParser, QualifiedRuleParser, AtRuleParser}; + /// Determine the character encoding of a CSS stylesheet and decode it. /// @@ -65,3 +67,31 @@ fn decode_replace(input: &[u8], fallback_encoding: EncodingRef)-> (String, Encod let (result, used_encoding) = decode(input, DecoderTrap::Replace, fallback_encoding); (result.unwrap(), used_encoding) } + + +/// Parse stylesheet from bytes. +/// +/// * `css_bytes`: A byte string. +/// * `protocol_encoding`: The encoding label, if any, defined by HTTP or equivalent protocol. +/// (e.g. via the `charset` parameter of the `Content-Type` header.) +/// * `environment_encoding`: An optional `Encoding` object for the [environment encoding] +/// (http://www.w3.org/TR/css-syntax/#environment-encoding), if any. +/// +/// Returns a 2-tuple of a `Iterator>` +/// and the `Encoding` object that was used. +pub fn parse_stylesheet_rules_from_bytes( + css_bytes: &[u8], + protocol_encoding_label: Option<&str>, + environment_encoding: Option, + rules_parser: P, + parse: |EncodingRef, RuleListParser| -> T) + -> T + where P: QualifiedRuleParser + AtRuleParser { + let (css_unicode, encoding) = decode_stylesheet_bytes( + css_bytes, protocol_encoding_label, environment_encoding); + // FIXME: Remove option dance when unboxed closures permit. + let mut rules_parser = Some(rules_parser); + Parser::parse_str(css_unicode.as_slice(), |input| { + parse(encoding, RuleListParser::new_for_stylesheet(input, rules_parser.take().unwrap())) + }) +} diff --git a/src/lib.rs b/src/lib.rs index 7a34b24c..74b2f310 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ #![crate_name = "cssparser"] #![crate_type = "rlib"] -#![feature(globs, macro_rules)] +#![feature(globs, macro_rules, if_let, while_let, unsafe_destructor)] extern crate encoding; extern crate text_writer; @@ -16,18 +16,18 @@ extern crate test; #[cfg(test)] extern crate serialize; -pub use color::{parse_color_keyword}; -pub use tokenizer::{tokenize, Tokenizer}; -pub use parser::{parse_stylesheet_rules, StylesheetParser, - parse_rule_list, RuleListParser, - parse_declaration_list, DeclarationListParser, - parse_one_rule, parse_one_declaration, parse_one_component_value}; -pub use from_bytes::decode_stylesheet_bytes; -pub use color::{RGBA, Color}; +pub use tokenizer::{Tokenizer, Token, NumericValue}; +pub use rules_and_declarations::{Priority, parse_important}; +pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; +pub use rules_and_declarations::{RuleListParser, parse_one_rule}; +pub use rules_and_declarations::{AtRulePrelude, QualifiedRuleParser, AtRuleParser}; +pub use from_bytes::{decode_stylesheet_bytes, parse_stylesheet_rules_from_bytes}; +pub use color::{RGBA, Color, parse_color_keyword}; pub use nth::parse_nth; pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string}; +pub use parser::{Parser, Delimiter, Delimiters}; -pub mod ast; +mod rules_and_declarations; mod tokenizer; mod parser; mod from_bytes; diff --git a/src/nth.rs b/src/nth.rs index 9b9fd8e9..dd1bca82 100644 --- a/src/nth.rs +++ b/src/nth.rs @@ -4,124 +4,100 @@ use std::ascii::AsciiExt; -use ast::{ComponentValue, NumericValue, SkipWhitespaceIterator, SkipWhitespaceIterable}; -use ast::ComponentValue::{Number, Dimension, Ident, Delim}; +use super::{Token, NumericValue, Parser}; /// Parse the *An+B* notation, as found in the `:nth-child()` selector. -/// The input is typically the arguments of a function component value. +/// The input is typically the arguments of a function, +/// in which case the caller needs to check if the arguments’ parser is exhausted. /// Return `Ok((A, B))`, or `Err(())` for a syntax error. -pub fn parse_nth(input: &[ComponentValue]) -> Result<(i32, i32), ()> { - let iter = &mut input.skip_whitespace(); - match iter.next() { - Some(&Number(ref value)) => match value.int_value { - Some(b) => parse_end(iter, 0, b as i32), - _ => Err(()), - }, - Some(&Dimension(ref value, ref unit)) => match value.int_value { - Some(a) => { - let unit = unit.as_slice().to_ascii_lower(); - let unit = unit.as_slice(); - match unit { - "n" => parse_b(iter, a as i32), - "n-" => parse_signless_b(iter, a as i32, -1), - _ => match parse_n_dash_digits(unit) { - Some(b) => parse_end(iter, a as i32, b), - _ => Err(()) - }, - } - }, - _ => Err(()), - }, - Some(&Ident(ref value)) => { - let ident = value.as_slice().to_ascii_lower(); - let ident = ident.as_slice(); - match ident { - "even" => parse_end(iter, 2, 0), - "odd" => parse_end(iter, 2, 1), - "n" => parse_b(iter, 1), - "-n" => parse_b(iter, -1), - "n-" => parse_signless_b(iter, 1, -1), - "-n-" => parse_signless_b(iter, -1, -1), - _ if ident.starts_with("-") => match parse_n_dash_digits(ident.slice_from(1)) { - Some(b) => parse_end(iter, -1, b), - _ => Err(()) - }, - _ => match parse_n_dash_digits(ident) { - Some(b) => parse_end(iter, 1, b), - _ => Err(()) - }, +pub fn parse_nth(input: &mut Parser) -> Result<(i32, i32), ()> { + match try!(input.next()) { + Token::Number(value) => Ok((0, try!(value.int_value.ok_or(())) as i32)), + Token::Dimension(value, unit) => { + let a = try!(value.int_value.ok_or(())) as i32; + if unit.eq_ignore_ascii_case("n") { + parse_b(input, a) + } else if unit.eq_ignore_ascii_case("n-") { + parse_signless_b(input, a, -1) + } else { + Ok((a, try!(parse_n_dash_digits(unit.as_slice())))) } - }, - Some(&Delim('+')) => match iter.iter_with_whitespace.next() { - Some(&Ident(ref value)) => { - let ident = value.as_slice().to_ascii_lower(); - let ident = ident.as_slice(); - match ident { - "n" => parse_b(iter, 1), - "n-" => parse_signless_b(iter, 1, -1), - _ => match parse_n_dash_digits(ident) { - Some(b) => parse_end(iter, 1, b), - _ => Err(()) - }, + } + Token::Ident(value) => { + if value.eq_ignore_ascii_case("even") { + Ok((2, 0)) + } else if value.eq_ignore_ascii_case("odd") { + Ok((2, 1)) + } else if value.eq_ignore_ascii_case("n") { + parse_b(input, 1) + } else if value.eq_ignore_ascii_case("-n") { + parse_b(input, -1) + } else if value.eq_ignore_ascii_case("n-") { + parse_signless_b(input, 1, -1) + } else if value.eq_ignore_ascii_case("-n-") { + parse_signless_b(input, -1, -1) + } else if value.starts_with("-") { + Ok((-1, try!(parse_n_dash_digits(value.slice_from(1))))) + } else { + Ok((1, try!(parse_n_dash_digits(value.as_slice())))) + } + } + Token::Delim('+') => match try!(input.next_including_whitespace()) { + Token::Ident(value) => { + if value.eq_ignore_ascii_case("n") { + parse_b(input, 1) + } else if value.eq_ignore_ascii_case("n-") { + parse_signless_b(input, 1, -1) + } else { + Ok((1, try!(parse_n_dash_digits(value.as_slice())))) } - }, + } _ => Err(()) }, - _ => Err(()) + token => input.unexpected(token) } } -type Nth = Result<(i32, i32), ()>; -type Iter<'a> = SkipWhitespaceIterator<'a>; - -fn parse_b(iter: &mut Iter, a: i32) -> Nth { - match iter.next() { - None => Ok((a, 0)), - Some(&Delim('+')) => parse_signless_b(iter, a, 1), - Some(&Delim('-')) => parse_signless_b(iter, a, -1), - Some(&Number(ref value)) => match value.int_value { - Some(b) if has_sign(value) => parse_end(iter, a, b as i32), - _ => Err(()), - }, - _ => Err(()) +fn parse_b(input: &mut Parser, a: i32) -> Result<(i32, i32), ()> { + match input.next() { + Ok(Token::Delim('+')) => parse_signless_b(input, a, 1), + Ok(Token::Delim('-')) => parse_signless_b(input, a, -1), + Ok(Token::Number(ref value)) if has_sign(value) => { + Ok((a, try!(value.int_value.ok_or(())) as i32)) + } + token => { + input.push_back_result(token); + Ok((a, 0)) + } } } -fn parse_signless_b(iter: &mut Iter, a: i32, b_sign: i32) -> Nth { - match iter.next() { - Some(&Number(ref value)) => match value.int_value { - Some(b) if !has_sign(value) => parse_end(iter, a, b_sign * (b as i32)), - _ => Err(()), - }, +fn parse_signless_b(input: &mut Parser, a: i32, b_sign: i32) -> Result<(i32, i32), ()> { + match try!(input.next()) { + Token::Number(ref value) if !has_sign(value) => { + Ok((a, b_sign * (try!(value.int_value.ok_or(())) as i32))) + } _ => Err(()) } } -fn parse_end(iter: &mut Iter, a: i32, b: i32) -> Nth { - match iter.next() { - None => Ok((a, b)), - Some(_) => Err(()), - } -} - -fn parse_n_dash_digits(string: &str) -> Option { +fn parse_n_dash_digits(string: &str) -> Result { if string.len() >= 3 - && string.starts_with("n-") + && string.slice_to(2).eq_ignore_ascii_case("n-") && string.slice_from(2).chars().all(|c| match c { '0'...'9' => true, _ => false }) { - let result = from_str(string.slice_from(1)); // Include the minus sign - assert!(result.is_some()); - result + Ok(from_str(string.slice_from(1)).unwrap()) // Include the minus sign + } else { + Err(()) } - else { None } } #[inline] fn has_sign(value: &NumericValue) -> bool { - match value.representation.as_bytes()[0] as char { - '+' | '-' => true, + match value.representation.as_bytes()[0] { + b'+' | b'-' => true, _ => false } } diff --git a/src/parser.rs b/src/parser.rs index ecf187b3..d9919581 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,274 +2,459 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -// http://dev.w3.org/csswg/css-syntax/#parsing +use super::{Token, NumericValue, Tokenizer}; -/// The input to these functions needs to implement `Iterator<(ComponentValue, SourceLocation)>`. -/// The input is consumed to avoid doing a lot of copying. -/// A conforming input can be obtained: -/// -/// * From a string in CSS syntax, with tokenize() -/// * From a ~[(ComponentValue, SourceLocation)] vector -/// (as found in "nested" component values such as CurlyBracketBlock), -/// with v.consume_iter() +pub struct Parser<'i: 't, 't> { + tokenizer: Option<&'t mut Tokenizer<'i>>, + parent_state: Option<&'t mut ParserState>, + state: ParserState, +} -use std::iter::Iterator; -use std::ascii::AsciiExt; +struct ParserState { + nested_blocks: NestedBlockList, + /// For block/function parsers that need to stop at the matching `)`, `]`, or `}` + at_start_of: Option, + /// For block/function parsers that need to stop at the matching `)`, `]`, or `}` + stop_at_end_of: Option, + /// For parsers from `parse_until` + stop_before: Delimiters, +} -use ast::*; -use ast::ComponentValue::{WhiteSpace, CDC, CDO, Ident, Delim, AtKeyword, - Semicolon, Colon, CurlyBracketBlock}; +#[deriving(Copy, PartialEq, Eq, Show)] +enum BlockType { + Parenthesis, + SquareBracket, + CurlyBracket, +} -pub struct StylesheetParser{ iter: T } -pub struct RuleListParser{ iter: T } -pub struct DeclarationListParser{ iter: T } -/// Parse top-level of a CSS stylesheet. -/// Return a `Iterator>` -#[inline] -pub fn parse_stylesheet_rules>(iter: T) -> StylesheetParser { - StylesheetParser{ iter: iter } +impl BlockType { + fn opening(token: &Token) -> Option { + match *token { + Token::Function(_) | + Token::ParenthesisBlock => Some(BlockType::Parenthesis), + Token::SquareBracketBlock => Some(BlockType::SquareBracket), + Token::CurlyBracketBlock => Some(BlockType::CurlyBracket), + _ => None + } + } + + fn closing(token: &Token) -> Option { + match *token { + Token::CloseParenthesis => Some(BlockType::Parenthesis), + Token::CloseSquareBracket => Some(BlockType::SquareBracket), + Token::CloseCurlyBracket => Some(BlockType::CurlyBracket), + _ => None + } + } } -/// Parse a non-top level list of rules eg. the content of an @media rule. -/// Return a `Iterator>` -#[inline] -pub fn parse_rule_list>(iter: T) -> RuleListParser { - RuleListParser{ iter: iter } -} +type NestedBlockList = Option>; +struct NestedBlockListItem { + next: NestedBlockList, + block_type: BlockType, +} -/// Parse a list of declarations and at-rules, -/// like @page in CSS 2.1, all declaration lists in level 3 -/// Return a `Iterator>` -#[inline] -pub fn parse_declaration_list>(iter: T) -> DeclarationListParser { - DeclarationListParser{ iter: iter } +trait NestedBlockListExt { + fn push(&mut self, block_type: BlockType); + fn consume(self, tokenizer: &mut Tokenizer) -> bool; } +impl NestedBlockListExt for NestedBlockList { + fn push(&mut self, block_type: BlockType) { + debug_assert!(self.is_none()); + *self = Some(box NestedBlockListItem { + next: self.take(), + block_type: block_type, + }) + } -/// Parse a single rule. -/// Used eg. for CSSRuleList.insertRule() -pub fn parse_one_rule>(iter: T) -> Result { - let mut parser = RuleListParser{ iter: iter }; - match parser.next() { - None => error(START_LOCATION, ErrorReason::EmptyInput), - Some(result) => { - if result.is_err() { result } - else { match next_non_whitespace(&mut parser.iter) { - None => result, - Some((_component_value, location)) => error(location, ErrorReason::ExtraInput), - }} + /// Return value indicates whether the end of the input was reached. + fn consume(self, tokenizer: &mut Tokenizer) -> bool { + if let Some(box NestedBlockListItem { block_type, next }) = self { + // Recursion first: the inner-most item of the list + // is for the inner-most nested block. + next.consume(tokenizer) || consume_until_end_of_block(block_type, tokenizer) + } else { + false } } } -/// Parse a single declaration (not an at-rule) -/// Used eg. in @supports -pub fn parse_one_declaration>(mut iter: T) -> Result { - match next_non_whitespace(&mut iter) { - None => error(START_LOCATION, ErrorReason::EmptyInput), - Some((component_value, location)) => { - let result = parse_declaration(&mut iter, component_value, location); - if result.is_err() { result } - else { match next_non_whitespace(&mut iter) { - None => result, - Some((_component_value, location)) => error(location, ErrorReason::ExtraInput), - }} +#[unsafe_destructor] // FIXME What does this mean? +impl<'i, 't> Drop for Parser<'i, 't> { + fn drop(&mut self) { + if self.tokenizer.is_none() { + // We’ve already reached the end of our delimited input: + // nothing to inform the parent of. + debug_assert!(self.state.nested_blocks.is_none()); + return + } + if let Some(parent_state) = self.parent_state.take() { + // Inform our parent parser of what they need to consume. + debug_assert!(parent_state.nested_blocks.is_none()); + parent_state.nested_blocks = self.state.nested_blocks.take(); + if let Some(block_type) = self.state.at_start_of { + parent_state.nested_blocks.push(block_type) + } + // Don’t propagate stop_at_end_of back for delimited parsers: + if self.state.stop_before.is_none() { + debug_assert!(parent_state.at_start_of.is_none()); + parent_state.at_start_of = self.state.stop_at_end_of; + } } } } -/// Parse a single component value. -/// Used eg. in attr(foo, color) -pub fn parse_one_component_value>(mut iter: T) - -> Result { - match next_non_whitespace(&mut iter) { - None => error(START_LOCATION, ErrorReason::EmptyInput), - Some((component_value, _location)) => { - match next_non_whitespace(&mut iter) { - None => Ok(component_value), - Some((_component_value, location)) => error(location, ErrorReason::ExtraInput), - } +#[deriving(Copy, PartialEq, Eq, Show)] +pub struct Delimiters { + bits: u8, +} + +#[allow(non_upper_case_globals, non_snake_case)] +pub mod Delimiter { + use super::Delimiters; + + pub const None: Delimiters = Delimiters { bits: 0 }; + /// `{` + pub const CurlyBracketBlock: Delimiters = Delimiters { bits: 1 << 1 }; + /// `;` + pub const Semicolon: Delimiters = Delimiters { bits: 1 << 2 }; + /// `!` + pub const Bang: Delimiters = Delimiters { bits: 1 << 3 }; + /// `,` + pub const Comma: Delimiters = Delimiters { bits: 1 << 4 }; +} + +impl BitOr for Delimiters { + fn bitor(&self, other: &Delimiters) -> Delimiters { + Delimiters { bits: self.bits | other.bits } + } +} + +impl Delimiters { + fn contains(&self, other: Delimiters) -> bool { + (self.bits & other.bits) != 0 + } + + fn is_none(&self) -> bool { + self.bits == 0 + } + + fn from_token(token: &Token) -> Delimiters { + match *token { + Token::Semicolon => Delimiter::Semicolon, + Token::Comma => Delimiter::Comma, + Token::Delim('!') => Delimiter::Bang, + Token::CurlyBracketBlock => Delimiter::CurlyBracketBlock, + _ => Delimiter::None, } } } +impl<'i, 't> Parser<'i, 't> { + #[inline] + pub fn new(tokenizer: &'t mut Tokenizer<'i>) -> Parser<'i, 't> { + Parser { + tokenizer: Some(tokenizer), + parent_state: None, + state: ParserState { + nested_blocks: None, + at_start_of: None, + stop_at_end_of: None, + stop_before: Delimiter::None, + }, + } + } -// *********** End of public API *********** + #[inline] + pub fn parse_str(input: &str, parse: |&mut Parser| -> T) -> T { + parse(&mut Parser::new(&mut Tokenizer::new(input.as_slice()))) + } + #[inline] + pub fn is_exhausted(&mut self) -> bool { + self.peek().is_err() + } -// Work around "error: cannot borrow `*iter` as mutable more than once at a time" -// when using a normal for loop. -macro_rules! for_iter( - ($iter: ident, $pattern: pat, $loop_body: expr) => ( + #[inline] + pub fn expect_exhausted(&mut self) -> Result<(), ()> { + if self.is_exhausted() { + Ok(()) + } else { + Err(()) + } + } + + #[inline] + pub fn peek(&mut self) -> Result<&Token, ()> { + // Consume whatever needs to be consumed (e.g. open blocks). + let token = try!(self.next()); + self.push_back(token); + + self.tokenizer().peek() + } + + #[inline] + pub fn push_back(&mut self, token: Token) { + if BlockType::opening(&token) == self.state.at_start_of { + self.state.at_start_of = None; + } + self.tokenizer.as_mut().expect( + "Can not use Parser::push_back after the end of the input was reached." + ).push_back(token) + } + + #[inline] + pub fn push_back_result(&mut self, token_result: Result) { + if let Ok(token) = token_result { + self.push_back(token) + } + } + + #[inline] + pub fn unexpected(&mut self, token: Token) -> Result { + self.push_back(token); + Err(()) + } + + #[inline] + fn tokenizer<'a>(&'a mut self) -> &'a mut Tokenizer<'i> { + &mut **self.tokenizer.as_mut().unwrap() + } + + pub fn next(&mut self) -> Result { loop { - match $iter.next() { None => break, Some($pattern) => $loop_body } - } - ); -) - - -impl> Iterator> for StylesheetParser { - fn next(&mut self) -> Option> { - let iter = &mut self.iter; - for_iter!(iter, (component_value, location), { - match component_value { - WhiteSpace | CDO | CDC => (), - AtKeyword(name) => return Some(Ok( - Rule::AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_qualified_rule(iter, component_value, location) { - Ok(rule) => Ok(Rule::QualifiedRule(rule)), - Err(e) => Err(e), - }), + match self.next_including_whitespace() { + Ok(Token::WhiteSpace) => {}, + result => return result } - }) - None + } } -} - -impl> Iterator> for RuleListParser { - fn next(&mut self) -> Option> { - let iter = &mut self.iter; - for_iter!(iter, (component_value, location), { - match component_value { - WhiteSpace => (), - AtKeyword(name) => return Some(Ok( - Rule::AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_qualified_rule(iter, component_value, location) { - Ok(rule) => Ok(Rule::QualifiedRule(rule)), - Err(e) => Err(e), - }), + pub fn next_including_whitespace(&mut self) -> Result { + if self.tokenizer.is_none() { + return Err(()) + } + if self.state.nested_blocks.take().consume(self.tokenizer()) { + self.tokenizer = None; + return Err(()) + } + if let Some(block_type) = self.state.at_start_of.take() { + if consume_until_end_of_block(block_type, self.tokenizer()) { + self.tokenizer = None; + return Err(()) } - }) - None + } + match self.tokenizer().next() { + Err(()) => { + self.tokenizer = None; + Err(()) + }, + Ok(token) => { + if self.state.stop_before.contains(Delimiters::from_token(&token)) { + self.tokenizer.take().unwrap().push_back(token); + return Err(()) + } + if self.state.stop_at_end_of.is_some() && + BlockType::closing(&token) == self.state.stop_at_end_of { + self.tokenizer = None; + return Err(()) + } + if let Some(block_type) = BlockType::opening(&token) { + self.state.at_start_of = Some(block_type); + } + Ok(token) + } + } } -} + #[inline] + pub fn err_consume_until_after(&mut self, stop_after: Delimiters) -> Result { + self.consume_until_after(stop_after); + Err(()) + } -impl> Iterator> -for DeclarationListParser { - fn next(&mut self) -> Option> { - let iter = &mut self.iter; - for_iter!(iter, (component_value, location), { - match component_value { - WhiteSpace | Semicolon => (), - AtKeyword(name) => return Some(Ok( - DeclarationListItem::AtRule(parse_at_rule(iter, name, location)))), - _ => return Some(match parse_declaration(iter, component_value, location) { - Ok(declaration) => Ok(DeclarationListItem::Declaration(declaration)), - Err(e) => { - // Find the end of the declaration - for (v, _) in *iter { if v == Semicolon { break } } - Err(e) - } - }), + pub fn consume_until_after(&mut self, stop_after: Delimiters) { + if self.tokenizer.is_none() { + return + } + // FIXME: have a special-purpose tokenizer method for this that does less work. + while let Ok(token) = self.tokenizer().next() { + if stop_after.contains(Delimiters::from_token(&token)) { + return } - }) - None + if self.state.stop_before.contains(Delimiters::from_token(&token)) { + self.tokenizer.take().unwrap().push_back(token); + return + } + if self.state.stop_at_end_of.is_some() && + BlockType::closing(&token) == self.state.stop_at_end_of { + self.tokenizer = None; + return + } + if let Some(block_type) = BlockType::opening(&token) { + if consume_until_end_of_block(block_type, self.tokenizer()) { + self.tokenizer = None; + return + } + } + } + self.tokenizer = None; } -} + // FIXME: Take an unboxed `FnOnce` closure. + #[inline] + pub fn parse_entirely(&mut self, parse: |&mut Parser| -> Result) + -> Result { + let result = parse(self); + try!(self.expect_exhausted()); + result + } -fn parse_at_rule>(iter: &mut T, name: String, location: SourceLocation) - -> AtRule { - let mut prelude = Vec::new(); - let mut block = None; - for_iter!(iter, (component_value, _location), { - match component_value { - CurlyBracketBlock(content) => { block = Some(content); break }, - Semicolon => break, - component_value => prelude.push(component_value), + #[inline] + pub fn parse_nested_block<'a>(&'a mut self) -> Parser<'i, 'a> { + if let Some(block_type) = self.state.at_start_of.take() { + Parser { + // Unwrap here should never fail + // because `self.tokenizer` is only ever set to `None` + // when `self.state` is also `None`. + tokenizer: Some(&mut **self.tokenizer.as_mut().unwrap()), + state: ParserState { + nested_blocks: self.state.nested_blocks.take(), + at_start_of: None, + stop_at_end_of: Some(block_type), + stop_before: Delimiter::None, + }, + parent_state: Some(&mut self.state), + } + } else { + panic!("\ + parse_block can only be called when a Function, \ + ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \ + token was just consumed.\ + "); } - }) - AtRule {location: location, name: name, prelude: prelude, block: block} -} + } + #[inline] + pub fn parse_until_before<'a>(&'a mut self, stop_before: Delimiters) -> Parser<'i, 'a> { + if self.state.stop_before != Delimiter::None { + panic!("`parse_until_before` cannot be used on a `Parser` \ + that is itself from `parse_until_before`") + } + Parser { + tokenizer: self.tokenizer.as_mut().map(|t| &mut **t), + state: ParserState { + nested_blocks: self.state.nested_blocks.take(), + at_start_of: self.state.at_start_of.take(), + stop_at_end_of: self.state.stop_at_end_of, + stop_before: stop_before, + }, + parent_state: Some(&mut self.state), + } + } -fn parse_qualified_rule>(iter: &mut T, first: ComponentValue, - location: SourceLocation) - -> Result { - match first { - CurlyBracketBlock(content) - => return Ok(QualifiedRule { location: location, prelude: Vec::new(), block: content }), - _ => (), + #[inline] + pub fn expect_ident(&mut self) -> Result { + match try!(self.next()) { + Token::Ident(value) => Ok(value), + token => self.unexpected(token) + } } - let mut prelude = vec!(first); - for_iter!(iter, (component_value, _location), { - match component_value { - CurlyBracketBlock(content) - => return Ok(QualifiedRule {location: location, prelude: prelude, block: content}), - component_value => prelude.push(component_value), + + #[inline] + pub fn expect_quoted_string(&mut self) -> Result { + match try!(self.next()) { + Token::QuotedString(value) => Ok(value), + token => self.unexpected(token) } - }) - error(location, ErrorReason::MissingQualifiedRuleBlock) -} + } + #[inline] + pub fn expect_url(&mut self) -> Result { + match try!(self.next()) { + Token::Url(value) => Ok(value), + token => self.unexpected(token) + } + } -fn parse_declaration>(iter: &mut T, first: ComponentValue, - location: SourceLocation) - -> Result { - let name = match first { - Ident(name) => name, - _ => return error(location, ErrorReason::InvalidDeclarationSyntax) - }; - match next_non_whitespace(iter) { - Some((Colon, _)) => (), - _ => return error(location, ErrorReason::InvalidDeclarationSyntax), - } - let mut value = Vec::new(); - let mut important = false; - for_iter!(iter, (component_value, _location), { - match component_value { - Semicolon => break, - Delim('!') => if parse_declaration_important(iter) { - important = true; - break - } else { - return error(location, ErrorReason::InvalidBangImportantSyntax) - }, - component_value => value.push(component_value), + #[inline] + pub fn expect_number(&mut self) -> Result { + match try!(self.next()) { + Token::Number(NumericValue { value, .. }) => Ok(value), + token => self.unexpected(token) } - }) - Ok(Declaration{location: location, name: name, value: value, important: important}) -} + } + #[inline] + pub fn expect_integer(&mut self) -> Result { + match try!(self.next()) { + Token::Number(NumericValue { int_value, .. }) => int_value.ok_or(()), + token => self.unexpected(token) + } + } -#[inline] -fn parse_declaration_important>(iter: &mut T) -> bool { - let ident_value = match next_non_whitespace(iter) { - Some((Ident(value), _)) => value, - _ => return false, - }; - if !ident_value.as_slice().eq_ignore_ascii_case("important") { return false } - match next_non_whitespace(iter) { - Some((Semicolon, _)) => true, - None => true, - _ => false + #[inline] + pub fn expect_percentage(&mut self) -> Result { + match try!(self.next()) { + Token::Percentage(NumericValue { value, .. }) => Ok(value), + token => self.unexpected(token) + } } -} + #[inline] + pub fn expect_colon(&mut self) -> Result<(), ()> { + match try!(self.next()) { + Token::Colon => Ok(()), + token => self.unexpected(token) + } + } -#[inline] -fn next_non_whitespace>(iter: &mut T) -> Option { - for (component_value, location) in *iter { - if component_value != WhiteSpace { return Some((component_value, location)) } + #[inline] + pub fn expect_semicolon(&mut self) -> Result<(), ()> { + match try!(self.next()) { + Token::Semicolon => Ok(()), + token => self.unexpected(token) + } } - None -} + #[inline] + pub fn expect_comma(&mut self) -> Result<(), ()> { + match try!(self.next()) { + Token::Comma => Ok(()), + token => self.unexpected(token) + } + } -#[inline] -fn error(location: SourceLocation, reason: ErrorReason) -> Result { - Err(SyntaxError{location: location, reason: reason}) + #[inline] + pub fn expect_curly_bracke_block(&mut self) -> Result<(), ()> { + match try!(self.next()) { + Token::CurlyBracketBlock => Ok(()), + token => self.unexpected(token) + } + } } -// When parsing one thing on an empty input -static START_LOCATION: SourceLocation = SourceLocation{ line: 1, column: 1 }; +/// Return value indicates whether the end of the input was reached. +fn consume_until_end_of_block(block_type: BlockType, tokenizer: &mut Tokenizer) -> bool { + // FIXME: have a special-purpose tokenizer method for this that does less work. + while let Ok(ref token) = tokenizer.next() { + if BlockType::closing(token) == Some(block_type) { + return false + } + if let Some(block_type) = BlockType::opening(token) { + if consume_until_end_of_block(block_type, tokenizer) { + return true + } + } + } + true +} diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs new file mode 100644 index 00000000..2dc3a115 --- /dev/null +++ b/src/rules_and_declarations.rs @@ -0,0 +1,284 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// http://dev.w3.org/csswg/css-syntax/#parsing + +use std::ascii::AsciiExt; +use super::{Token, Parser, Delimiter}; + + +#[deriving(Copy, Eq, PartialEq)] +pub enum Priority { + Normal, + Important, +} + + +pub fn parse_important(input: &mut Parser) -> Result { + match input.next() { + Ok(Token::Delim('!')) => { + match try!(input.next()) { + Token::Ident(ref value) if value.eq_ignore_ascii_case("important") => { + Ok(Priority::Important) + } + token => input.unexpected(token) + } + } + token => { + input.push_back_result(token); + Ok(Priority::Normal) + } + } +} + + +pub enum AtRulePrelude { + WithoutBlock(R), + WithBlock(P), + OptionalBlock(P), +} + + +// FIXME: Use associated types +pub trait DeclarationParser { + fn parse_value(&mut self, name: &str, input: &mut Parser) -> Result { + let _ = name; + let _ = input; + Err(()) + } +} + +pub trait AtRuleParser { + fn parse_prelude(&mut self, name: &str, input: &mut Parser) + -> Result, ()> { + let _ = name; + let _ = input; + Err(()) + } + + fn parse_block(&mut self, prelude: P, input: &mut Parser) -> Result { + let _ = prelude; + let _ = input; + Err(()) + } + + /// An `OptionalBlock` prelude was followed by `;` + fn rule_without_block(&mut self, prelude: P) -> Result { + let _ = prelude; + Err(()) + } +} + +pub trait QualifiedRuleParser { + fn parse_prelude(&mut self, input: &mut Parser) -> Result { + let _ = input; + Err(()) + } + + fn parse_block(&mut self, prelude: P, input: &mut Parser) -> Result { + let _ = prelude; + let _ = input; + Err(()) + } +} + + +pub struct DeclarationListParser<'i: 't, 't: 'a, 'a, AP, I, P> +where P: DeclarationParser + AtRuleParser { + input: &'a mut Parser<'i, 't>, + parser: P, +} + + +impl<'i, 't, 'a, AP, I, P> DeclarationListParser<'i, 't, 'a, AP, I, P> +where P: DeclarationParser + AtRuleParser { + pub fn new(input: &'a mut Parser<'i, 't>, parser: P) + -> DeclarationListParser<'i, 't, 'a, AP, I, P> { + DeclarationListParser { + input: input, + parser: parser, + } + } +} + +impl<'i, 't, 'a, AP, I, P> Iterator> +for DeclarationListParser<'i, 't, 'a, AP, I, P> +where P: DeclarationParser + AtRuleParser { + fn next(&mut self) -> Option> { + loop { + match self.input.next() { + Ok(Token::Semicolon) => {} + Ok(Token::Ident(name)) => { + return Some(parse_declaration(name, self.input, &mut self.parser)) + } + Ok(Token::AtKeyword(name)) => { + return Some(parse_at_rule(name, self.input, &mut self.parser)) + } + Ok(_) => { + return Some(self.input.err_consume_until_after(Delimiter::Semicolon)) + } + Err(()) => return None, + } + } + } +} + + +pub struct RuleListParser<'i: 't, 't: 'a, 'a, R, QP, AP, P> +where P: QualifiedRuleParser + AtRuleParser { + input: &'a mut Parser<'i, 't>, + parser: P, + is_stylesheet: bool, +} + + +impl<'i: 't, 't: 'a, 'a, R, QP, AP, P> RuleListParser<'i, 't, 'a, R, QP, AP, P> +where P: QualifiedRuleParser + AtRuleParser { + pub fn new_for_stylesheet(input: &'a mut Parser<'i, 't>, parser: P) + -> RuleListParser<'i, 't, 'a, R, QP, AP, P> { + RuleListParser { + input: input, + parser: parser, + is_stylesheet: true, + } + } + + pub fn new_for_nested_rule(input: &'a mut Parser<'i, 't>, parser: P) + -> RuleListParser<'i, 't, 'a, R, QP, AP, P> { + RuleListParser { + input: input, + parser: parser, + is_stylesheet: false, + } + } +} + + + +impl<'i, 't, 'a, R, QP, AP, P> Iterator> +for RuleListParser<'i, 't, 'a, R, QP, AP, P> +where P: QualifiedRuleParser + AtRuleParser { + fn next(&mut self) -> Option> { + loop { + match self.input.next() { + Ok(Token::CDO) | Ok(Token::CDC) if self.is_stylesheet => {} + Ok(Token::AtKeyword(name)) => { + return Some(parse_at_rule(name, self.input, &mut self.parser)) + } + Ok(token) => { + self.input.push_back(token); + return Some(parse_qualified_rule(self.input, &mut self.parser)) + } + Err(()) => return None, + } + } + } +} + +pub fn parse_one_declaration(input: &mut Parser, parser: &mut P) + -> Result + where P: DeclarationParser { + input.parse_entirely(|input| { + let name = try!(input.expect_ident()); + try!(input.expect_colon()); + parser.parse_value(name.as_slice(), input) + }) +} + + +pub fn parse_one_rule(input: &mut Parser, parser: &mut P) + -> Result + where P: QualifiedRuleParser + AtRuleParser { + input.parse_entirely(|input| { + match try!(input.next()) { + Token::AtKeyword(name) => { + parse_at_rule(name, input, parser) + } + token => { + input.push_back(token); + parse_qualified_rule(input, parser) + } + } + }) +} + + +fn parse_declaration(name: String, input: &mut Parser, parser: &mut P) + -> Result + where P: DeclarationParser { + let result = input.parse_until_before(Delimiter::Semicolon).parse_entirely(|input| { + try!(input.expect_colon()); + parser.parse_value(name.as_slice(), input) + }); + match input.next() { + Ok(Token::Semicolon) | Err(()) => result, + _ => input.err_consume_until_after(Delimiter::Semicolon) + } +} + + +fn parse_at_rule(name: String, input: &mut Parser, parser: &mut P) + -> Result + where P: AtRuleParser { + let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock; + let result = try!(input.parse_until_before(delimiters).parse_entirely(|input| { + parser.parse_prelude(name.as_slice(), input) + }).or_else(|()| input.err_consume_until_after(delimiters))); + match result { + AtRulePrelude::WithoutBlock(rule) => { + match input.next() { + Ok(Token::Semicolon) | Err(()) => Ok(rule), + _ => input.err_consume_until_after(delimiters) + } + } + AtRulePrelude::WithBlock(prelude) => { + match input.next() { + Ok(Token::CurlyBracketBlock) => { + // FIXME: Make parse_entirely take `FnOnce` + // and remove this Option dance. + let mut prelude = Some(prelude); + input.parse_nested_block().parse_entirely(|input| { + parser.parse_block(prelude.take().unwrap(), input) + }) + } + _ => input.err_consume_until_after(delimiters) + } + } + AtRulePrelude::OptionalBlock(prelude) => { + match input.next() { + Ok(Token::Semicolon) | Err(()) => parser.rule_without_block(prelude), + Ok(Token::CurlyBracketBlock) => { + // FIXME: Make parse_entirely take `FnOnce` + // and remove this Option dance. + let mut prelude = Some(prelude); + input.parse_nested_block().parse_entirely(|input| { + parser.parse_block(prelude.take().unwrap(), input) + }) + } + _ => input.err_consume_until_after(delimiters) + } + } + } +} + + +fn parse_qualified_rule(input: &mut Parser, parser: &mut P) + -> Result + where P: QualifiedRuleParser { + let prelude = try!(input.parse_until_before(Delimiter::CurlyBracketBlock) + .parse_entirely(|input| { + parser.parse_prelude(input) + }).or_else(|()| input.err_consume_until_after(Delimiter::CurlyBracketBlock))); + match input.next() { + Ok(Token::CurlyBracketBlock) => { + // FIXME: Make parse_entirely take `FnOnce` + // and remove this Option dance. + let mut prelude = Some(prelude); + input.parse_nested_block().parse_entirely(|input| { + parser.parse_block(prelude.take().unwrap(), input) + }) + } + _ => input.err_consume_until_after(Delimiter::CurlyBracketBlock) + } +} diff --git a/src/serializer.rs b/src/serializer.rs index 87b3ef6c..37234c39 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -6,8 +6,7 @@ use std::fmt; use text_writer::{mod, TextWriter}; -use ast::*; -use ast::ComponentValue::*; +use super::Token; pub trait ToCss for Sized? { @@ -43,38 +42,38 @@ pub trait ToCss for Sized? { } -impl ToCss for ComponentValue { +impl ToCss for Token { fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - match self { - &Ident(ref value) => try!(serialize_identifier(value.as_slice(), dest)), - &AtKeyword(ref value) => { + match *self { + Token::Ident(ref value) => try!(serialize_identifier(value.as_slice(), dest)), + Token::AtKeyword(ref value) => { try!(dest.write_char('@')); try!(serialize_identifier(value.as_slice(), dest)); }, - &Hash(ref value) => { + Token::Hash(ref value) => { try!(dest.write_char('#')); for c in value.as_slice().chars() { try!(serialize_char(c, dest, /* is_identifier_start = */ false)); } }, - &IDHash(ref value) => { + Token::IDHash(ref value) => { try!(dest.write_char('#')); try!(serialize_identifier(value.as_slice(), dest)); } - &QuotedString(ref value) => try!(serialize_string(value.as_slice(), dest)), - &URL(ref value) => { + Token::QuotedString(ref value) => try!(serialize_string(value.as_slice(), dest)), + Token::Url(ref value) => { try!(dest.write_str("url(")); try!(serialize_string(value.as_slice(), dest)); try!(dest.write_char(')')); }, - &Delim(value) => try!(dest.write_char(value)), + Token::Delim(value) => try!(dest.write_char(value)), - &Number(ref value) => try!(dest.write_str(value.representation.as_slice())), - &Percentage(ref value) => { + Token::Number(ref value) => try!(dest.write_str(value.representation.as_slice())), + Token::Percentage(ref value) => { try!(dest.write_str(value.representation.as_slice())); try!(dest.write_char('%')); }, - &Dimension(ref value, ref unit) => { + Token::Dimension(ref value, ref unit) => { try!(dest.write_str(value.representation.as_slice())); // Disambiguate with scientific notation. let unit = unit.as_slice(); @@ -88,53 +87,39 @@ impl ToCss for ComponentValue { } }, - &UnicodeRange(start, end) => { + Token::UnicodeRange(start, end) => { try!(dest.write_str(format!("U+{:X}", start).as_slice())); if end != start { try!(dest.write_str(format!("-{:X}", end).as_slice())); } } - &WhiteSpace => try!(dest.write_char(' ')), - &Colon => try!(dest.write_char(':')), - &Semicolon => try!(dest.write_char(';')), - &Comma => try!(dest.write_char(',')), - &IncludeMatch => try!(dest.write_str("~=")), - &DashMatch => try!(dest.write_str("|=")), - &PrefixMatch => try!(dest.write_str("^=")), - &SuffixMatch => try!(dest.write_str("$=")), - &SubstringMatch => try!(dest.write_str("*=")), - &Column => try!(dest.write_str("||")), - &CDO => try!(dest.write_str("")), - - &Function(ref name, ref arguments) => { + Token::WhiteSpace => try!(dest.write_char(' ')), + Token::Colon => try!(dest.write_char(':')), + Token::Semicolon => try!(dest.write_char(';')), + Token::Comma => try!(dest.write_char(',')), + Token::IncludeMatch => try!(dest.write_str("~=")), + Token::DashMatch => try!(dest.write_str("|=")), + Token::PrefixMatch => try!(dest.write_str("^=")), + Token::SuffixMatch => try!(dest.write_str("$=")), + Token::SubstringMatch => try!(dest.write_str("*=")), + Token::Column => try!(dest.write_str("||")), + Token::CDO => try!(dest.write_str("")), + + Token::Function(ref name) => { try!(serialize_identifier(name.as_slice(), dest)); try!(dest.write_char('(')); - try!(arguments.to_css(dest)); - try!(dest.write_char(')')); - }, - &ParenthesisBlock(ref content) => { - try!(dest.write_char('(')); - try!(content.to_css(dest)); - try!(dest.write_char(')')); - }, - &SquareBracketBlock(ref content) => { - try!(dest.write_char('[')); - try!(content.to_css(dest)); - try!(dest.write_char(']')); }, - &CurlyBracketBlock(ref content) => { - try!(dest.write_char('{')); - try!(content.to_css(dest)); - try!(dest.write_char('}')); - }, - - &BadURL => try!(dest.write_str("url()")), - &BadString => try!(dest.write_str("\"\n")), - &CloseParenthesis => try!(dest.write_char(')')), - &CloseSquareBracket => try!(dest.write_char(']')), - &CloseCurlyBracket => try!(dest.write_char('}')), + Token::ParenthesisBlock => try!(dest.write_char('(')), + Token::SquareBracketBlock => try!(dest.write_char('[')), + Token::CurlyBracketBlock => try!(dest.write_char('{')), + + Token::BadUrl => try!(dest.write_str("url()")), + Token::BadString => try!(dest.write_str("\"\n")), + Token::CloseParenthesis => try!(dest.write_char(')')), + Token::CloseSquareBracket => try!(dest.write_char(']')), + Token::CloseCurlyBracket => try!(dest.write_char('}')), } Ok(()) } @@ -234,161 +219,68 @@ impl<'a, W> TextWriter for CssStringWriter<'a, W> where W: TextWriter { } -impl<'a> ToCss for [ComponentValue] { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - component_values_to_css(self.iter(), dest) - } -} - -impl<'a> ToCss for [Node] { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - let component_values = self.iter().map(|n| match n { &(ref c, _) => c }); - component_values_to_css(component_values, dest) - } -} - -fn component_values_to_css<'a, I, W>(mut iter: I, dest: &mut W) -> text_writer::Result -where I: Iterator<&'a ComponentValue>, W: TextWriter { - let mut previous = match iter.next() { - None => return Ok(()), - Some(first) => { try!(first.to_css(dest)); first } - }; - macro_rules! matches( - ($value:expr, $($pattern:pat)|+) => ( - match $value { $($pattern)|+ => true, _ => false } - ); - ) - // This does not borrow-check: for component_value in iter { - loop { match iter.next() { None => break, Some(component_value) => { - let (a, b) = (previous, component_value); - if ( - matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | - Dimension(..) | Delim('#') | Delim('-') | Number(..)) && - matches!(*b, Ident(..) | Function(..) | URL(..) | BadURL(..) | - Number(..) | Percentage(..) | Dimension(..) | UnicodeRange(..)) - ) || ( - matches!(*a, Ident(..)) && - matches!(*b, ParenthesisBlock(..)) - ) || ( - matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | Dimension(..)) && - matches!(*b, Delim('-') | CDC) - ) || ( - matches!(*a, Delim('#') | Delim('-') | Number(..) | Delim('@')) && - matches!(*b, Ident(..) | Function(..) | URL(..) | BadURL(..)) - ) || ( - matches!(*a, Delim('@')) && - matches!(*b, Ident(..) | Function(..) | URL(..) | BadURL(..) | - UnicodeRange(..) | Delim('-')) - ) || ( - matches!(*a, UnicodeRange(..) | Delim('.') | Delim('+')) && - matches!(*b, Number(..) | Percentage(..) | Dimension(..)) - ) || ( - matches!(*a, UnicodeRange(..)) && - matches!(*b, Ident(..) | Function(..) | Delim('?')) - ) || (match (a, b) { (&Delim(a), &Delim(b)) => matches!((a, b), - ('#', '-') | - ('$', '=') | - ('*', '=') | - ('^', '=') | - ('~', '=') | - ('|', '=') | - ('|', '|') | - ('/', '*') - ), _ => false }) { - try!(dest.write_str("/**/")); - } - // Skip whitespace when '\n' was previously written at the previous iteration. - if !matches!((previous, component_value), (&Delim('\\'), &WhiteSpace)) { - try!(component_value.to_css(dest)); - } - if component_value == &Delim('\\') { - try!(dest.write_char('\n')); - } - previous = component_value; - }}} - Ok(()) -} - - -impl ToCss for Declaration { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - try!(dest.write_str(self.name.as_slice())); - try!(dest.write_char(':')); - try!(self.value.to_css(dest)); - Ok(()) - } -} - - -impl ToCss for [Declaration] { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - for declaration in self.iter() { - try!(declaration.to_css(dest)); - try!(dest.write_char(';')); - } - Ok(()) - } -} - - -impl ToCss for QualifiedRule { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - try!(self.prelude.to_css(dest)); - try!(dest.write_char('{')); - try!(self.block.to_css(dest)); - try!(dest.write_char('}')); - Ok(()) - } -} - - -impl ToCss for AtRule { +impl<'a> ToCss for [Token] { fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - try!(dest.write_char('@')); - try!(dest.write_str(self.name.as_slice())); - try!(self.prelude.to_css(dest)); - match self.block { - Some(ref block) => { - try!(dest.write_char('{')); - try!(block.to_css(dest)); - try!(dest.write_char('}')); + use Token::*; + + let mut iter = self.iter(); + let mut previous = match iter.next() { + None => return Ok(()), + Some(first) => { try!(first.to_css(dest)); first } + }; + macro_rules! matches( + ($value:expr, $($pattern:pat)|+) => ( + match $value { $($pattern)|+ => true, _ => false } + ); + ) + // This does not borrow-check: for component_value in iter { + loop { match iter.next() { None => break, Some(component_value) => { + let (a, b) = (previous, component_value); + if ( + matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | + Dimension(..) | Delim('#') | Delim('-') | Number(..)) && + matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..) | + Number(..) | Percentage(..) | Dimension(..) | UnicodeRange(..)) + ) || ( + matches!(*a, Ident(..)) && + matches!(*b, ParenthesisBlock(..)) + ) || ( + matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | Dimension(..)) && + matches!(*b, Delim('-') | CDC) + ) || ( + matches!(*a, Delim('#') | Delim('-') | Number(..) | Delim('@')) && + matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..)) + ) || ( + matches!(*a, Delim('@')) && + matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..) | + UnicodeRange(..) | Delim('-')) + ) || ( + matches!(*a, UnicodeRange(..) | Delim('.') | Delim('+')) && + matches!(*b, Number(..) | Percentage(..) | Dimension(..)) + ) || ( + matches!(*a, UnicodeRange(..)) && + matches!(*b, Ident(..) | Function(..) | Delim('?')) + ) || (match (a, b) { (&Delim(a), &Delim(b)) => matches!((a, b), + ('#', '-') | + ('$', '=') | + ('*', '=') | + ('^', '=') | + ('~', '=') | + ('|', '=') | + ('|', '|') | + ('/', '*') + ), _ => false }) { + try!(dest.write_str("/**/")); } - None => try!(dest.write_char(';')) - } - Ok(()) - } -} - - -impl ToCss for DeclarationListItem { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - match self { - &DeclarationListItem::Declaration(ref declaration) => declaration.to_css(dest), - &DeclarationListItem::AtRule(ref at_rule) => at_rule.to_css(dest), - } - } -} - - -impl ToCss for [DeclarationListItem] { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - for item in self.iter() { - try!(item.to_css(dest)); - match item { - &DeclarationListItem::AtRule(_) => {} - &DeclarationListItem::Declaration(_) => try!(dest.write_char(';')) + // Skip whitespace when '\n' was previously written at the previous iteration. + if !matches!((previous, component_value), (&Delim('\\'), &WhiteSpace)) { + try!(component_value.to_css(dest)); } - } + if component_value == &Delim('\\') { + try!(dest.write_char('\n')); + } + previous = component_value; + }}} Ok(()) } } - - -impl ToCss for Rule { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - match self { - &Rule::QualifiedRule(ref rule) => rule.to_css(dest), - &Rule::AtRule(ref rule) => rule.to_css(dest), - } - } -} diff --git a/src/tests.rs b/src/tests.rs index 32afc088..395194b5 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -2,25 +2,25 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use std::io; -use std::io::{File, Command, Writer, TempDir, IoResult}; +use std::io::{mod, File, Command, Writer, TempDir, IoResult}; use std::num::Float; -use serialize::{json}; -use serialize::json::ToJson; +use std::mem; +use serialize::json::{mod, Json, ToJson}; use test; use encoding::label::encoding_from_whatwg_label; -use super::*; -use ast::*; -use ast::ComponentValue::*; +use super::{Tokenizer, Parser, Token, NumericValue, + DeclarationListParser, DeclarationParser, RuleListParser, + AtRulePrelude, AtRuleParser, QualifiedRuleParser, Priority, + parse_one_declaration, parse_one_rule, parse_important, + parse_stylesheet_rules_from_bytes, + Color, RGBA, parse_nth, ToCss}; -macro_rules! JString { - ($e: expr) => { json::String($e.to_string()) } -} macro_rules! JList { - ($($e: expr),*) => { json::List(vec!( $($e),* )) } + ($($e: expr,)*) => { JList![ $( $e ),* ] }; + ($($e: expr),*) => { json::List(vec![ $( $e.to_json() ),* ]) }; } @@ -62,16 +62,68 @@ fn almost_equals(a: &json::Json, b: &json::Json) -> bool { (&json::Boolean(a), &json::Boolean(b)) => a == b, (&json::String(ref a), &json::String(ref b)) => a == b, - (&json::List(ref a), &json::List(ref b)) - => a.iter().zip(b.iter()).all(|(ref a, ref b)| almost_equals(*a, *b)), + (&json::List(ref a), &json::List(ref b)) => { + a.len() == b.len() && + a.iter().zip(b.iter()).all(|(ref a, ref b)| almost_equals(*a, *b)) + }, (&json::Object(_), &json::Object(_)) => panic!("Not implemented"), (&json::Null, &json::Null) => true, _ => false, } } +fn normalize(json: &mut Json) { + match *json { + Json::List(ref mut list) => { + match find_url(list.as_mut_slice()) { + Some(Ok(url)) => *list = vec!["url".to_json(), Json::String(url)], + Some(Err(())) => *list = vec!["error".to_json(), "bad-url".to_json()], + None => { + for item in list.iter_mut() { + normalize(item) + } + } + } + } + Json::String(ref mut s) => { + if s.as_slice() == "extra-input" || s.as_slice() == "empty" { + *s = "invalid".into_string() + } + } + _ => {} + } +} -fn assert_json_eq(results: json::Json, expected: json::Json, message: String) { +fn find_url(list: &mut [Json]) -> Option> { + if let [Json::String(ref a1), Json::String(ref a2), ..] = list.as_mut_slice() { + if !(a1.as_slice() == "function" && a2.as_slice() == "url") { + return None + } + } else { + return None + }; + let args = list.slice_from_mut(2); + + let args = if !args.is_empty() && args[0] == " ".to_json() { + args.slice_from_mut(1) + } else { + args.as_mut_slice() + }; + + if let [Json::List(ref mut arg1), ref rest..] = args.as_mut_slice() { + if let [Json::String(ref a11), Json::String(ref mut a12)] = arg1.as_mut_slice() { + if a11.as_slice() == "string" && rest.iter().all(|a| a == &" ".to_json()) { + return Some(Ok(mem::replace(a12, String::new()))) + } + } + } + + Some(Err(())) +} + + +fn assert_json_eq(results: json::Json, mut expected: json::Json, message: String) { + normalize(&mut expected); if !almost_equals(&results, &expected) { print_json_diff(&results, &expected).unwrap(); panic!(message) @@ -98,11 +150,12 @@ fn run_raw_json_tests(json_data: &str, run: |json::Json, json::Json|) { } -fn run_json_tests(json_data: &str, parse: |input: &str| -> T) { +fn run_json_tests(json_data: &str, parse: |input: &mut Parser| -> Json) { run_raw_json_tests(json_data, |input, expected| { match input { json::String(input) => { - let result = parse(input.as_slice()).to_json(); + // FIXME: Use Parser::parse_str when unboxed closures permit. + let result = parse(&mut Parser::new(&mut Tokenizer::new(input.as_slice()))); assert_json_eq(result, expected, input); }, _ => panic!("Unexpected JSON") @@ -114,7 +167,7 @@ fn run_json_tests(json_data: &str, parse: |input: &str| -> T) { #[test] fn component_value_list() { run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| { - tokenize(input).map(|(c, _)| c).collect::>() + Json::List(component_values_to_json(input)) }); } @@ -122,7 +175,9 @@ fn component_value_list() { #[test] fn one_component_value() { run_json_tests(include_str!("css-parsing-tests/one_component_value.json"), |input| { - parse_one_component_value(tokenize(input)) + input.parse_entirely(|input| { + Ok(one_component_value_to_json(try!(input.next()), input)) + }).unwrap_or(JList!["error", "invalid"]) }); } @@ -130,7 +185,9 @@ fn one_component_value() { #[test] fn declaration_list() { run_json_tests(include_str!("css-parsing-tests/declaration_list.json"), |input| { - parse_declaration_list(tokenize(input)).collect::>>() + Json::List(DeclarationListParser::new(input, JsonParser).map(|result| { + result.unwrap_or(JList!["error", "invalid"]) + }).collect()) }); } @@ -138,7 +195,7 @@ fn declaration_list() { #[test] fn one_declaration() { run_json_tests(include_str!("css-parsing-tests/one_declaration.json"), |input| { - parse_one_declaration(tokenize(input)) + parse_one_declaration(input, &mut JsonParser).unwrap_or(JList!["error", "invalid"]) }); } @@ -146,7 +203,9 @@ fn one_declaration() { #[test] fn rule_list() { run_json_tests(include_str!("css-parsing-tests/rule_list.json"), |input| { - parse_rule_list(tokenize(input)).collect::>>() + Json::List(RuleListParser::new_for_nested_rule(input, JsonParser).map(|result| { + result.unwrap_or(JList!["error", "invalid"]) + }).collect()) }); } @@ -154,7 +213,9 @@ fn rule_list() { #[test] fn stylesheet() { run_json_tests(include_str!("css-parsing-tests/stylesheet.json"), |input| { - parse_stylesheet_rules(tokenize(input)).collect::>>() + Json::List(RuleListParser::new_for_stylesheet(input, JsonParser).map(|result| { + result.unwrap_or(JList!["error", "invalid"]) + }).collect()) }); } @@ -162,7 +223,7 @@ fn stylesheet() { #[test] fn one_rule() { run_json_tests(include_str!("css-parsing-tests/one_rule.json"), |input| { - parse_one_rule(tokenize(input)) + parse_one_rule(input, &mut JsonParser).unwrap_or(JList!["error", "invalid"]) }); } @@ -170,7 +231,7 @@ fn one_rule() { #[test] fn stylesheet_from_bytes() { run_raw_json_tests(include_str!("css-parsing-tests/stylesheet_bytes.json"), - |input, expected| { + |input, expected| { let map = match input { json::Object(map) => map, _ => panic!("Unexpected JSON") @@ -185,11 +246,16 @@ fn stylesheet_from_bytes() { let environment_encoding = get_string(&map, &"environment_encoding".to_string()) .and_then(encoding_from_whatwg_label); - let (css_unicode, used_encoding) = decode_stylesheet_bytes( - css.as_slice(), protocol_encoding_label, environment_encoding); - let mut rules = parse_stylesheet_rules(tokenize(css_unicode.as_slice())); - - (rules.collect::>>(), used_encoding.name().to_string()).to_json() + parse_stylesheet_rules_from_bytes( + css.as_slice(), protocol_encoding_label, environment_encoding, + JsonParser, |encoding, rules| { + Json::List(vec![ + Json::List(rules.map(|result| { + result.unwrap_or(JList!["error", "invalid"]) + }).collect()), + encoding.name().to_json() + ]) + }) }; assert_json_eq(result, expected, json::Object(map).to_string()); }); @@ -205,25 +271,22 @@ fn stylesheet_from_bytes() { } -fn run_color_tests(json_data: &str, to_json: |result: Option| -> json::Json) { +fn run_color_tests(json_data: &str, to_json: |result: Result| -> json::Json) { run_json_tests(json_data, |input| { - match parse_one_component_value(tokenize(input)) { - Ok(component_value) => to_json(Color::parse(&component_value).ok()), - Err(_reason) => json::Null, - } + to_json(input.parse_entirely(Color::parse)) }); } #[test] fn color3() { - run_color_tests(include_str!("css-parsing-tests/color3.json"), |c| c.to_json()) + run_color_tests(include_str!("css-parsing-tests/color3.json"), |c| c.ok().to_json()) } #[test] fn color3_hsl() { - run_color_tests(include_str!("css-parsing-tests/color3_hsl.json"), |c| c.to_json()) + run_color_tests(include_str!("css-parsing-tests/color3_hsl.json"), |c| c.ok().to_json()) } @@ -232,10 +295,10 @@ fn color3_hsl() { fn color3_keywords() { run_color_tests(include_str!("css-parsing-tests/color3_keywords.json"), |c| { match c { - Some(Color::RGBA(RGBA { red: r, green: g, blue: b, alpha: a })) - => vec!(r * 255., g * 255., b * 255., a).to_json(), - Some(Color::CurrentColor) => JString!("currentColor"), - None => json::Null, + Ok(Color::RGBA(RGBA { red: r, green: g, blue: b, alpha: a })) + => [r * 255., g * 255., b * 255., a].to_json(), + Ok(Color::CurrentColor) => "currentColor".to_json(), + Err(()) => json::Null, } }); } @@ -243,29 +306,32 @@ fn color3_keywords() { #[bench] fn bench_color_lookup_red(b: &mut test::Bencher) { - let ident = parse_one_component_value(tokenize("red")).unwrap(); - b.iter(|| assert!(Color::parse(&ident).is_ok())); + b.iter(|| { + assert!(Parser::parse_str("red", Color::parse).is_ok()) + }); } #[bench] fn bench_color_lookup_lightgoldenrodyellow(b: &mut test::Bencher) { - let ident = parse_one_component_value(tokenize("lightgoldenrodyellow")).unwrap(); - b.iter(|| assert!(Color::parse(&ident).is_ok())); + b.iter(|| { + assert!(Parser::parse_str("lightgoldenrodyellow", Color::parse).is_ok()) + }); } #[bench] fn bench_color_lookup_fail(b: &mut test::Bencher) { - let ident = parse_one_component_value(tokenize("lightgoldenrodyellowbazinga")).unwrap(); - b.iter(|| assert!(Color::parse(&ident).is_err())); + b.iter(|| { + assert!(Parser::parse_str("lightgoldenrodyellowbazinga", Color::parse).is_ok()) + }); } #[test] fn nth() { run_json_tests(include_str!("css-parsing-tests/An+B.json"), |input| { - parse_nth(tokenize(input).map(|(c, _)| c).collect::>().as_slice()).ok() + input.parse_entirely(parse_nth).ok().to_json() }); } @@ -273,9 +339,25 @@ fn nth() { #[test] fn serializer() { run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| { - let component_values = tokenize(input).map(|(c, _)| c).collect::>(); - let serialized = component_values.to_css_string(); - tokenize(serialized.as_slice()).map(|(c, _)| c).collect::>() + fn flatten(input: &mut Parser, tokens: &mut Vec) { + while let Ok(token) = input.next_including_whitespace() { + let closing_token = match token { + Token::Function(_) | Token::ParenthesisBlock => Some(Token::CloseParenthesis), + Token::SquareBracketBlock => Some(Token::CloseSquareBracket), + Token::CurlyBracketBlock => Some(Token::CloseCurlyBracket), + _ => None + }; + tokens.push(token); + if let Some(closing_token) = closing_token { + flatten(&mut input.parse_nested_block(), tokens); + tokens.push(closing_token); + } + } + } + let mut tokens = vec![]; + flatten(input, &mut tokens); + let serialized = tokens.to_css_string(); + Json::List(Parser::parse_str(serialized.as_slice(), component_values_to_json)) }); } @@ -283,204 +365,182 @@ fn serializer() { #[test] fn serialize_current_color() { let c = Color::CurrentColor; - assert!(format!("{}", c).as_slice() == "currentColor"); + assert!(c.to_css_string().as_slice() == "currentColor"); } #[test] fn serialize_rgb_full_alpha() { let c = Color::RGBA(RGBA { red: 1.0, green: 0.9, blue: 0.8, alpha: 1.0 }); - assert!(format!("{}", c).as_slice() == "rgb(255, 230, 204)"); + assert!(c.to_css_string().as_slice() == "rgb(255, 230, 204)"); } #[test] fn serialize_rgba() { let c = Color::RGBA(RGBA { red: 0.1, green: 0.2, blue: 0.3, alpha: 0.5 }); - assert!(format!("{}", c).as_slice() == "rgba(26, 51, 77, 0.5)"); + assert!(c.to_css_string().as_slice() == "rgba(26, 51, 77, 0.5)"); } -impl ToJson for Result { +impl ToJson for Color { fn to_json(&self) -> json::Json { match *self { - Ok(ref a) => a.to_json(), - Err(ref b) => b.to_json(), + Color::RGBA(RGBA { red, green, blue, alpha }) => { + [red, green, blue, alpha].to_json() + }, + Color::CurrentColor => "currentColor".to_json(), } } } -impl ToJson for Result { - fn to_json(&self) -> json::Json { - match *self { - Ok(ref a) => a.to_json(), - Err(ref b) => b.to_json(), +struct JsonParser; + + +impl DeclarationParser for JsonParser { + fn parse_value(&mut self, name: &str, input: &mut Parser) -> Result { + let mut value = vec![]; + let mut important = false; + while let Ok(mut token) = input.next_including_whitespace() { + if token == Token::Delim('!') { + input.push_back(token); + match parse_important(input) { + Ok(Priority::Important) => { + if input.is_exhausted() { + important = true; + break + } + // Hack to deal with css-parsing-tests assuming that + // `!important` in the middle of a declaration value is OK. + // This can never happen per spec + // (even CSS Variables forbid top-level `!`) + value.push("!".to_json()); + token = Token::Ident("important".into_string()); + } + // More hacks + Ok(Priority::Normal) => { + token = input.next_including_whitespace().unwrap(); + assert!(token == Token::Delim('!')); + } + Err(()) => token = Token::Delim('!') + } + } + value.push(one_component_value_to_json(token, input)); } + Ok(JList![ + "declaration", + name, + value, + important, + ]) } } - -impl ToJson for Result { - fn to_json(&self) -> json::Json { - match *self { - Ok(ref a) => a.to_json(), - Err(ref b) => b.to_json(), - } +impl AtRuleParser, Json> for JsonParser { + fn parse_prelude(&mut self, name: &str, input: &mut Parser) + -> Result, Json>, ()> { + Ok(AtRulePrelude::OptionalBlock(vec![ + "at-rule".to_json(), + name.to_json(), + Json::List(component_values_to_json(input)), + ])) } -} - -impl ToJson for Result { - fn to_json(&self) -> json::Json { - match *self { - Ok(ref a) => a.to_json(), - Err(ref b) => b.to_json(), - } + fn parse_block(&mut self, mut prelude: Vec, input: &mut Parser) -> Result { + prelude.push(Json::List(component_values_to_json(input))); + Ok(Json::List(prelude)) } -} - -impl ToJson for SyntaxError { - fn to_json(&self) -> json::Json { - json::List(vec!(JString!("error"), JString!(match self.reason { - ErrorReason::EmptyInput => "empty", - ErrorReason::ExtraInput => "extra-input", - _ => "invalid", - }))) + fn rule_without_block(&mut self, mut prelude: Vec) -> Result { + prelude.push(Json::Null); + Ok(Json::List(prelude)) } } - -impl ToJson for Color { - fn to_json(&self) -> json::Json { - match *self { - Color::RGBA(RGBA { red: r, green: g, blue: b, alpha: a }) => vec!(r, g, b, a).to_json(), - Color::CurrentColor => JString!("currentColor"), - } +impl QualifiedRuleParser, Json> for JsonParser { + fn parse_prelude(&mut self, input: &mut Parser) -> Result, ()> { + Ok(component_values_to_json(input)) } -} - -impl ToJson for Rule { - fn to_json(&self) -> json::Json { - match *self { - Rule::QualifiedRule(ref rule) => rule.to_json(), - Rule::AtRule(ref rule) => rule.to_json(), - } + fn parse_block(&mut self, prelude: Vec, input: &mut Parser) -> Result { + Ok(JList![ + "qualified rule", + prelude, + component_values_to_json(input), + ]) } } - -impl ToJson for DeclarationListItem { - fn to_json(&self) -> json::Json { - match *self { - DeclarationListItem::Declaration(ref declaration) => declaration.to_json(), - DeclarationListItem::AtRule(ref at_rule) => at_rule.to_json(), - } +fn component_values_to_json(input: &mut Parser) -> Vec { + let mut values = vec![]; + while let Ok(token) = input.next_including_whitespace() { + values.push(one_component_value_to_json(token, input)); } + values } -fn list_to_json(list: &Vec<(ComponentValue, SourceLocation)>) -> Vec { - list.iter().map(|tuple| { - match *tuple { - (ref c, _) => c.to_json() - } - }).collect() -} - - -impl ToJson for AtRule { - fn to_json(&self) -> json::Json { - match *self { - AtRule{ ref name, ref prelude, ref block, ..} - => json::List(vec!(JString!("at-rule"), name.to_json(), - prelude.to_json(), block.as_ref().map(list_to_json).to_json())) +fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { + fn numeric(value: NumericValue) -> Vec { + match value { + NumericValue{representation: r, value: v, int_value: i} + => vec![r.to_json(), v.to_json(), + match i { Some(_) => "integer", None => "number" }.to_json()] } } -} - -impl ToJson for QualifiedRule { - fn to_json(&self) -> json::Json { - match *self { - QualifiedRule{ ref prelude, ref block, ..} - => json::List(vec!(JString!("qualified rule"), - prelude.to_json(), json::List(list_to_json(block)))) + match token { + Token::Ident(value) => JList!["ident", value], + Token::AtKeyword(value) => JList!["at-keyword", value], + Token::Hash(value) => JList!["hash", value, "unrestricted"], + Token::IDHash(value) => JList!["hash", value, "id"], + Token::QuotedString(value) => JList!["string", value], + Token::Url(value) => JList!["url", value], + Token::Delim('\\') => "\\".to_json(), + Token::Delim(value) => String::from_char(1, value).to_json(), + + Token::Number(value) => json::List(vec!["number".to_json()] + numeric(value)), + Token::Percentage(value) => json::List( + vec!["percentage".to_json()] + numeric(value)), + Token::Dimension(value, unit) => json::List( + vec!["dimension".to_json()] + numeric(value) + [unit.to_json()].as_slice()), + + Token::UnicodeRange(start, end) => JList!["unicode-range", start, end], + + Token::WhiteSpace => " ".to_json(), + Token::Colon => ":".to_json(), + Token::Semicolon => ";".to_json(), + Token::Comma => ",".to_json(), + Token::IncludeMatch => "~=".to_json(), + Token::DashMatch => "|=".to_json(), + Token::PrefixMatch => "^=".to_json(), + Token::SuffixMatch => "$=".to_json(), + Token::SubstringMatch => "*=".to_json(), + Token::Column => "||".to_json(), + Token::CDO => "".to_json(), + + Token::Function(name) => { + json::List(vec!["function".to_json(), name.to_json()] + + component_values_to_json(&mut input.parse_nested_block())) } - } -} - - -impl ToJson for Declaration { - fn to_json(&self) -> json::Json { - match *self { - Declaration{ ref name, ref value, ref important, ..} - => json::List(vec!(JString!("declaration"), name.to_json(), - value.to_json(), important.to_json())) + Token::ParenthesisBlock => { + json::List(vec!["()".to_json()] + + component_values_to_json(&mut input.parse_nested_block())) } - } -} - - -impl ToJson for ComponentValue { - fn to_json(&self) -> json::Json { - fn numeric(value: &NumericValue) -> Vec { - match *value { - NumericValue{representation: ref r, value: ref v, int_value: ref i} - => vec!(r.to_json(), v.to_json(), - JString!(match *i { Some(_) => "integer", _ => "number" })) - } + Token::SquareBracketBlock => { + json::List(vec!["[]".to_json()] + + component_values_to_json(&mut input.parse_nested_block())) } - - match *self { - Ident(ref value) => JList!(JString!("ident"), value.to_json()), - AtKeyword(ref value) => JList!(JString!("at-keyword"), value.to_json()), - Hash(ref value) => JList!(JString!("hash"), value.to_json(), - JString!("unrestricted")), - IDHash(ref value) => JList!(JString!("hash"), value.to_json(), JString!("id")), - QuotedString(ref value) => JList!(JString!("string"), value.to_json()), - URL(ref value) => JList!(JString!("url"), value.to_json()), - Delim('\\') => JString!("\\"), - Delim(value) => json::String(String::from_char(1, value)), - - Number(ref value) => json::List(vec!(JString!("number")) + numeric(value)), - Percentage(ref value) => json::List(vec!(JString!("percentage")) + numeric(value)), - Dimension(ref value, ref unit) => json::List( - vec!(JString!("dimension")) + numeric(value) + [unit.to_json()].as_slice()), - - UnicodeRange(start, end) - => JList!(JString!("unicode-range"), start.to_json(), end.to_json()), - - WhiteSpace => JString!(" "), - Colon => JString!(":"), - Semicolon => JString!(";"), - Comma => JString!(","), - IncludeMatch => JString!("~="), - DashMatch => JString!("|="), - PrefixMatch => JString!("^="), - SuffixMatch => JString!("$="), - SubstringMatch => JString!("*="), - Column => JString!("||"), - CDO => JString!(""), - - Function(ref name, ref arguments) - => json::List(vec!(JString!("function"), name.to_json()) + - arguments.iter().map(|a| a.to_json()).collect::>()), - ParenthesisBlock(ref content) - => json::List(vec!(JString!("()")) + content.iter().map(|c| c.to_json()).collect::>()), - SquareBracketBlock(ref content) - => json::List(vec!(JString!("[]")) + content.iter().map(|c| c.to_json()).collect::>()), - CurlyBracketBlock(ref content) - => json::List(vec!(JString!("{}")) + list_to_json(content)), - - BadURL => JList!(JString!("error"), JString!("bad-url")), - BadString => JList!(JString!("error"), JString!("bad-string")), - CloseParenthesis => JList!(JString!("error"), JString!(")")), - CloseSquareBracket => JList!(JString!("error"), JString!("]")), - CloseCurlyBracket => JList!(JString!("error"), JString!("}")), + Token::CurlyBracketBlock => { + json::List(vec!["{}".to_json()] + + component_values_to_json(&mut input.parse_nested_block())) } + Token::BadUrl => JList!["error", "bad-url"], + Token::BadString => JList!["error", "bad-string"], + Token::CloseParenthesis => JList!["error", ")"], + Token::CloseSquareBracket => JList!["error", "]"], + Token::CloseCurlyBracket => JList!["error", "}"], } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 195ea28d..bcd22c4a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -7,32 +7,68 @@ use std::{char, num}; use std::ascii::AsciiExt; -use ast::*; -use ast::ComponentValue::*; - - -/// Returns a `Iterator<(ComponentValue, SourceLocation)>` -pub fn tokenize(input: &str) -> Tokenizer { - Tokenizer { - length: input.len(), - input: input, - position: 0, - } -} - -impl<'a> Iterator for Tokenizer<'a> { - #[inline] - fn next(&mut self) -> Option { next_component_value(self) } +use self::Token::*; + + +#[deriving(PartialEq, Show)] +pub enum Token { + // Preserved tokens. + Ident(String), + AtKeyword(String), + Hash(String), + IDHash(String), // Hash that is a valid ID selector. + QuotedString(String), + Url(String), + Delim(char), + Number(NumericValue), + Percentage(NumericValue), + Dimension(NumericValue, String), + UnicodeRange(u32, u32), // (start, end) of range + WhiteSpace, + Colon, // : + Semicolon, // ; + Comma, // , + IncludeMatch, // ~= + DashMatch, // |= + PrefixMatch, // ^= + SuffixMatch, // $= + SubstringMatch, // *= + Column, // || + CDO, // + + // Function + Function(String), // name + + // Simple block + ParenthesisBlock, // (…) + SquareBracketBlock, // […] + CurlyBracketBlock, // {…} + + // These are always invalid + BadUrl, + BadString, + CloseParenthesis, // ) + CloseSquareBracket, // ] + CloseCurlyBracket, // } } -// *********** End of public API *********** +#[deriving(PartialEq, Show)] +pub struct NumericValue { + pub representation: String, + pub value: f64, + pub int_value: Option, +} pub struct Tokenizer<'a> { input: &'a str, length: uint, // All counted in bytes, not characters position: uint, // All counted in bytes, not characters + + /// For `peek` and `push_back` + buffer: Option, } macro_rules! is_match( @@ -43,6 +79,43 @@ macro_rules! is_match( impl<'a> Tokenizer<'a> { + #[inline] + pub fn new(input: &str) -> Tokenizer { + Tokenizer { + length: input.len(), + input: input, + position: 0, + buffer: None, + } + } + + #[inline] + pub fn next(&mut self) -> Result { + if let Some(token) = self.buffer.take() { + Ok(token) + } else { + next_token(self).ok_or(()) + } + } + + #[inline] + pub fn peek(&mut self) -> Result<&Token, ()> { + match self.buffer { + Some(ref token) => Ok(token), + None => { + self.buffer = next_token(self); + self.buffer.as_ref().ok_or(()) + } + } + } + + #[inline] + pub fn push_back(&mut self, token: Token) { + assert!(self.buffer.is_none(), + "Parser::push_back can only be called after Parser::next"); + self.buffer = Some(token); + } + #[inline] fn is_eof(&self) -> bool { self.position >= self.length } @@ -75,18 +148,13 @@ impl<'a> Tokenizer<'a> { } -fn next_component_value(tokenizer: &mut Tokenizer) -> Option { +fn next_token(tokenizer: &mut Tokenizer) -> Option { consume_comments(tokenizer); if tokenizer.is_eof() { return None } - let start_location = SourceLocation{ - // FIXME - line: 0, - column: tokenizer.position, - }; let c = tokenizer.current_char(); - let component_value = match c { + let token = match c { '\t' | '\n' | ' ' | '\r' | '\x0C' => { while !tokenizer.is_eof() { match tokenizer.current_char() { @@ -112,7 +180,7 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option { else { tokenizer.position += 1; Delim(c) } }, '\'' => consume_string(tokenizer, true), - '(' => ParenthesisBlock(consume_block(tokenizer, CloseParenthesis)), + '(' => { tokenizer.position += 1; ParenthesisBlock }, ')' => { tokenizer.position += 1; CloseParenthesis }, '*' => { if tokenizer.starts_with("*=") { tokenizer.position += 2; SubstringMatch } @@ -189,7 +257,7 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option { else { consume_ident_like(tokenizer) } }, 'a'...'z' | 'A'...'Z' | '_' | '\0' => consume_ident_like(tokenizer), - '[' => SquareBracketBlock(consume_block(tokenizer, CloseSquareBracket)), + '[' => { tokenizer.position += 1; SquareBracketBlock }, '\\' => { if !tokenizer.has_newline_at(1) { consume_ident_like(tokenizer) } else { tokenizer.position += 1; Delim(c) } @@ -199,7 +267,7 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option { if tokenizer.starts_with("^=") { tokenizer.position += 2; PrefixMatch } else { tokenizer.position += 1; Delim(c) } }, - '{' => CurlyBracketBlock(consume_block_with_location(tokenizer, CloseCurlyBracket)), + '{' => { tokenizer.position += 1; CurlyBracketBlock }, '|' => { if tokenizer.starts_with("|=") { tokenizer.position += 2; DashMatch } else if tokenizer.starts_with("||") { tokenizer.position += 2; Column } @@ -219,7 +287,7 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option { } }, }; - Some((component_value, start_location)) + Some(token) } @@ -239,39 +307,7 @@ fn consume_comments(tokenizer: &mut Tokenizer) { } -fn consume_block(tokenizer: &mut Tokenizer, ending_token: ComponentValue) -> Vec { - tokenizer.position += 1; // Skip the initial {[( - let mut content = Vec::new(); - loop { - match next_component_value(tokenizer) { - Some((component_value, _location)) => { - if component_value == ending_token { break } - else { content.push(component_value) } - }, - None => break, - } - } - content -} - - -fn consume_block_with_location(tokenizer: &mut Tokenizer, ending_token: ComponentValue) -> Vec { - tokenizer.position += 1; // Skip the initial {[( - let mut content = Vec::new(); - loop { - match next_component_value(tokenizer) { - Some((component_value, location)) => { - if component_value == ending_token { break } - else { content.push((component_value, location)) } - }, - None => break, - } - } - content -} - - -fn consume_string(tokenizer: &mut Tokenizer, single_quote: bool) -> ComponentValue { +fn consume_string(tokenizer: &mut Tokenizer, single_quote: bool) -> Token { match consume_quoted_string(tokenizer, single_quote) { Ok(value) => QuotedString(value), Err(()) => BadString @@ -330,11 +366,12 @@ fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { } -fn consume_ident_like(tokenizer: &mut Tokenizer) -> ComponentValue { +fn consume_ident_like(tokenizer: &mut Tokenizer) -> Token { let value = consume_name(tokenizer); if !tokenizer.is_eof() && tokenizer.current_char() == '(' { - if value.as_slice().eq_ignore_ascii_case("url") { consume_url(tokenizer) } - else { Function(value, consume_block(tokenizer, CloseParenthesis)) } + tokenizer.position += 1; + if value.eq_ignore_ascii_case("url") { consume_url(tokenizer) } + else { Function(value) } } else { Ident(value) } @@ -360,7 +397,7 @@ fn consume_name(tokenizer: &mut Tokenizer) -> String { } -fn consume_numeric(tokenizer: &mut Tokenizer) -> ComponentValue { +fn consume_numeric(tokenizer: &mut Tokenizer) -> Token { // Parse [+-]?\d*(\.\d+)?([eE][+-]?\d+)? // But this is always called so that there is at least one digit in \d*(\.\d+)? let mut representation = String::new(); @@ -436,8 +473,7 @@ fn consume_numeric(tokenizer: &mut Tokenizer) -> ComponentValue { } -fn consume_url(tokenizer: &mut Tokenizer) -> ComponentValue { - tokenizer.position += 1; // Skip the ( of url( +fn consume_url(tokenizer: &mut Tokenizer) -> Token { while !tokenizer.is_eof() { match tokenizer.current_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.position += 1, @@ -447,16 +483,16 @@ fn consume_url(tokenizer: &mut Tokenizer) -> ComponentValue { _ => return consume_unquoted_url(tokenizer), } } - return URL(String::new()); + return Url(String::new()); - fn consume_quoted_url(tokenizer: &mut Tokenizer, single_quote: bool) -> ComponentValue { + fn consume_quoted_url(tokenizer: &mut Tokenizer, single_quote: bool) -> Token { match consume_quoted_string(tokenizer, single_quote) { Ok(value) => consume_url_end(tokenizer, value), Err(()) => consume_bad_url(tokenizer), } } - fn consume_unquoted_url(tokenizer: &mut Tokenizer) -> ComponentValue { + fn consume_unquoted_url(tokenizer: &mut Tokenizer) -> Token { let mut string = String::new(); while !tokenizer.is_eof() { let next_char = match tokenizer.consume_char() { @@ -475,10 +511,10 @@ fn consume_url(tokenizer: &mut Tokenizer) -> ComponentValue { }; string.push(next_char) } - URL(string) + Url(string) } - fn consume_url_end(tokenizer: &mut Tokenizer, string: String) -> ComponentValue { + fn consume_url_end(tokenizer: &mut Tokenizer, string: String) -> Token { while !tokenizer.is_eof() { match tokenizer.consume_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => (), @@ -486,10 +522,10 @@ fn consume_url(tokenizer: &mut Tokenizer) -> ComponentValue { _ => return consume_bad_url(tokenizer) } } - URL(string) + Url(string) } - fn consume_bad_url(tokenizer: &mut Tokenizer) -> ComponentValue { + fn consume_bad_url(tokenizer: &mut Tokenizer) -> Token { // Consume up to the closing ) while !tokenizer.is_eof() { match tokenizer.consume_char() { @@ -498,13 +534,13 @@ fn consume_url(tokenizer: &mut Tokenizer) -> ComponentValue { _ => () } } - BadURL + BadUrl } } -fn consume_unicode_range(tokenizer: &mut Tokenizer) -> ComponentValue { +fn consume_unicode_range(tokenizer: &mut Tokenizer) -> Token { tokenizer.position += 2; // Skip U+ let mut hex = String::new(); while hex.len() < 6 && !tokenizer.is_eof() From 3afdf405e357168aa147f9fba1dc487fd936d780 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 29 Dec 2014 14:54:12 +0100 Subject: [PATCH 16/69] Use the `matches` crate rather than re-defining multiple copies of it. --- Cargo.toml | 1 + src/lib.rs | 11 ++++------- src/nth.rs | 7 ++----- src/serializer.rs | 9 ++------- src/tokenizer.rs | 36 +++++++++++++++--------------------- 5 files changed, 24 insertions(+), 40 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cbb4d647..5f8e9d0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ authors = [ "Simon Sapin " ] [dependencies] text_writer = "0.1.1" +matches = "0.1" [dependencies.encoding] diff --git a/src/lib.rs b/src/lib.rs index 74b2f310..3ad3480c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,16 +5,13 @@ #![crate_name = "cssparser"] #![crate_type = "rlib"] -#![feature(globs, macro_rules, if_let, while_let, unsafe_destructor)] +#![feature(globs, macro_rules, if_let, while_let, unsafe_destructor, phase)] extern crate encoding; extern crate text_writer; - -#[cfg(test)] -extern crate test; - -#[cfg(test)] -extern crate serialize; +#[phase(plugin)] extern crate matches; +#[cfg(test)] extern crate test; +#[cfg(test)] extern crate serialize; pub use tokenizer::{Tokenizer, Token, NumericValue}; pub use rules_and_declarations::{Priority, parse_important}; diff --git a/src/nth.rs b/src/nth.rs index dd1bca82..37f0054c 100644 --- a/src/nth.rs +++ b/src/nth.rs @@ -86,7 +86,7 @@ fn parse_signless_b(input: &mut Parser, a: i32, b_sign: i32) -> Result<(i32, i32 fn parse_n_dash_digits(string: &str) -> Result { if string.len() >= 3 && string.slice_to(2).eq_ignore_ascii_case("n-") - && string.slice_from(2).chars().all(|c| match c { '0'...'9' => true, _ => false }) + && string.slice_from(2).chars().all(|c| matches!(c, '0'...'9')) { Ok(from_str(string.slice_from(1)).unwrap()) // Include the minus sign } else { @@ -96,8 +96,5 @@ fn parse_n_dash_digits(string: &str) -> Result { #[inline] fn has_sign(value: &NumericValue) -> bool { - match value.representation.as_bytes()[0] { - b'+' | b'-' => true, - _ => false - } + matches!(value.representation.as_bytes()[0], b'+' | b'-') } diff --git a/src/serializer.rs b/src/serializer.rs index 37234c39..f15e8d9d 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -228,11 +228,6 @@ impl<'a> ToCss for [Token] { None => return Ok(()), Some(first) => { try!(first.to_css(dest)); first } }; - macro_rules! matches( - ($value:expr, $($pattern:pat)|+) => ( - match $value { $($pattern)|+ => true, _ => false } - ); - ) // This does not borrow-check: for component_value in iter { loop { match iter.next() { None => break, Some(component_value) => { let (a, b) = (previous, component_value); @@ -260,7 +255,7 @@ impl<'a> ToCss for [Token] { ) || ( matches!(*a, UnicodeRange(..)) && matches!(*b, Ident(..) | Function(..) | Delim('?')) - ) || (match (a, b) { (&Delim(a), &Delim(b)) => matches!((a, b), + ) || matches!((a, b), (&Delim(a), &Delim(b)) if matches!((a, b), ('#', '-') | ('$', '=') | ('*', '=') | @@ -269,7 +264,7 @@ impl<'a> ToCss for [Token] { ('|', '=') | ('|', '|') | ('/', '*') - ), _ => false }) { + )) { try!(dest.write_str("/**/")); } // Skip whitespace when '\n' was previously written at the previous iteration. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index bcd22c4a..3b781d8c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -71,12 +71,6 @@ pub struct Tokenizer<'a> { buffer: Option, } -macro_rules! is_match( - ($value:expr, $($pattern:pat)|+) => ( - match $value { $($pattern)|+ => true, _ => false } - ); -) - impl<'a> Tokenizer<'a> { #[inline] @@ -131,7 +125,7 @@ impl<'a> Tokenizer<'a> { #[inline] fn has_newline_at(&self, offset: uint) -> bool { self.position + offset < self.length && - is_match!(self.char_at(offset), '\n' | '\r' | '\x0C') + matches!(self.char_at(offset), '\n' | '\r' | '\x0C') } #[inline] @@ -189,11 +183,11 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { '+' => { if ( tokenizer.position + 1 < tokenizer.length - && is_match!(tokenizer.char_at(1), '0'...'9') + && matches!(tokenizer.char_at(1), '0'...'9') ) || ( tokenizer.position + 2 < tokenizer.length && tokenizer.char_at(1) == '.' - && is_match!(tokenizer.char_at(2), '0'...'9') + && matches!(tokenizer.char_at(2), '0'...'9') ) { consume_numeric(tokenizer) } else { @@ -205,11 +199,11 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { '-' => { if ( tokenizer.position + 1 < tokenizer.length - && is_match!(tokenizer.char_at(1), '0'...'9') + && matches!(tokenizer.char_at(1), '0'...'9') ) || ( tokenizer.position + 2 < tokenizer.length && tokenizer.char_at(1) == '.' - && is_match!(tokenizer.char_at(2), '0'...'9') + && matches!(tokenizer.char_at(2), '0'...'9') ) { consume_numeric(tokenizer) } else if tokenizer.starts_with("-->") { @@ -224,7 +218,7 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { }, '.' => { if tokenizer.position + 1 < tokenizer.length - && is_match!(tokenizer.char_at(1), '0'...'9' + && matches!(tokenizer.char_at(1), '0'...'9' ) { consume_numeric(tokenizer) } else { @@ -252,7 +246,7 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { 'u' | 'U' => { if tokenizer.position + 2 < tokenizer.length && tokenizer.char_at(1) == '+' - && is_match!(tokenizer.char_at(2), '0'...'9' | 'a'...'f' | 'A'...'F' | '?') + && matches!(tokenizer.char_at(2), '0'...'9' | 'a'...'f' | 'A'...'F' | '?') { consume_unicode_range(tokenizer) } else { consume_ident_like(tokenizer) } }, @@ -402,7 +396,7 @@ fn consume_numeric(tokenizer: &mut Tokenizer) -> Token { // But this is always called so that there is at least one digit in \d*(\.\d+)? let mut representation = String::new(); let mut is_integer = true; - if is_match!(tokenizer.current_char(), '-' | '+') { + if matches!(tokenizer.current_char(), '-' | '+') { representation.push(tokenizer.consume_char()) } while !tokenizer.is_eof() { @@ -412,7 +406,7 @@ fn consume_numeric(tokenizer: &mut Tokenizer) -> Token { } } if tokenizer.position + 1 < tokenizer.length && tokenizer.current_char() == '.' - && is_match!(tokenizer.char_at(1), '0'...'9') { + && matches!(tokenizer.char_at(1), '0'...'9') { is_integer = false; representation.push(tokenizer.consume_char()); // '.' representation.push(tokenizer.consume_char()); // digit @@ -425,13 +419,13 @@ fn consume_numeric(tokenizer: &mut Tokenizer) -> Token { } if ( tokenizer.position + 1 < tokenizer.length - && is_match!(tokenizer.current_char(), 'e' | 'E') - && is_match!(tokenizer.char_at(1), '0'...'9') + && matches!(tokenizer.current_char(), 'e' | 'E') + && matches!(tokenizer.char_at(1), '0'...'9') ) || ( tokenizer.position + 2 < tokenizer.length - && is_match!(tokenizer.current_char(), 'e' | 'E') - && is_match!(tokenizer.char_at(1), '+' | '-') - && is_match!(tokenizer.char_at(2), '0'...'9') + && matches!(tokenizer.current_char(), 'e' | 'E') + && matches!(tokenizer.char_at(1), '+' | '-') + && matches!(tokenizer.char_at(2), '0'...'9') ) { is_integer = false; representation.push(tokenizer.consume_char()); // 'e' or 'E' @@ -544,7 +538,7 @@ fn consume_unicode_range(tokenizer: &mut Tokenizer) -> Token { tokenizer.position += 2; // Skip U+ let mut hex = String::new(); while hex.len() < 6 && !tokenizer.is_eof() - && is_match!(tokenizer.current_char(), '0'...'9' | 'A'...'F' | 'a'...'f') { + && matches!(tokenizer.current_char(), '0'...'9' | 'A'...'F' | 'a'...'f') { hex.push(tokenizer.consume_char()); } let max_question_marks = 6u - hex.len(); From 9965b0d8515aae9307ed4c8acc7b92fe08d888db Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 29 Dec 2014 14:56:35 +0100 Subject: [PATCH 17/69] Use `while let` instead of `loop` + `match` + `break`. --- src/serializer.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/serializer.rs b/src/serializer.rs index f15e8d9d..f8783b23 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -228,8 +228,7 @@ impl<'a> ToCss for [Token] { None => return Ok(()), Some(first) => { try!(first.to_css(dest)); first } }; - // This does not borrow-check: for component_value in iter { - loop { match iter.next() { None => break, Some(component_value) => { + while let Some(component_value) = iter.next() { let (a, b) = (previous, component_value); if ( matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | @@ -275,7 +274,7 @@ impl<'a> ToCss for [Token] { try!(dest.write_char('\n')); } previous = component_value; - }}} + } Ok(()) } } From 307180fc71608faecd72c91aa18928d8468fc84d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 29 Dec 2014 17:28:47 +0100 Subject: [PATCH 18/69] Remove the Tokenizer::length field. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let’s assume that str::len() calls are optimized away. --- src/tokenizer.rs | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3b781d8c..ff3a5554 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -64,7 +64,6 @@ pub struct NumericValue { pub struct Tokenizer<'a> { input: &'a str, - length: uint, // All counted in bytes, not characters position: uint, // All counted in bytes, not characters /// For `peek` and `push_back` @@ -76,7 +75,6 @@ impl<'a> Tokenizer<'a> { #[inline] pub fn new(input: &str) -> Tokenizer { Tokenizer { - length: input.len(), input: input, position: 0, buffer: None, @@ -111,7 +109,7 @@ impl<'a> Tokenizer<'a> { } #[inline] - fn is_eof(&self) -> bool { self.position >= self.length } + fn is_eof(&self) -> bool { self.position >= self.input.len() } // Assumes non-EOF #[inline] @@ -124,7 +122,7 @@ impl<'a> Tokenizer<'a> { #[inline] fn has_newline_at(&self, offset: uint) -> bool { - self.position + offset < self.length && + self.position + offset < self.input.len() && matches!(self.char_at(offset), '\n' | '\r' | '\x0C') } @@ -182,10 +180,10 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { }, '+' => { if ( - tokenizer.position + 1 < tokenizer.length + tokenizer.position + 1 < tokenizer.input.len() && matches!(tokenizer.char_at(1), '0'...'9') ) || ( - tokenizer.position + 2 < tokenizer.length + tokenizer.position + 2 < tokenizer.input.len() && tokenizer.char_at(1) == '.' && matches!(tokenizer.char_at(2), '0'...'9') ) { @@ -198,10 +196,10 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { ',' => { tokenizer.position += 1; Comma }, '-' => { if ( - tokenizer.position + 1 < tokenizer.length + tokenizer.position + 1 < tokenizer.input.len() && matches!(tokenizer.char_at(1), '0'...'9') ) || ( - tokenizer.position + 2 < tokenizer.length + tokenizer.position + 2 < tokenizer.input.len() && tokenizer.char_at(1) == '.' && matches!(tokenizer.char_at(2), '0'...'9') ) { @@ -217,7 +215,7 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { } }, '.' => { - if tokenizer.position + 1 < tokenizer.length + if tokenizer.position + 1 < tokenizer.input.len() && matches!(tokenizer.char_at(1), '0'...'9' ) { consume_numeric(tokenizer) @@ -244,7 +242,7 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { else { Delim(c) } }, 'u' | 'U' => { - if tokenizer.position + 2 < tokenizer.length + if tokenizer.position + 2 < tokenizer.input.len() && tokenizer.char_at(1) == '+' && matches!(tokenizer.char_at(2), '0'...'9' | 'a'...'f' | 'A'...'F' | '?') { consume_unicode_range(tokenizer) } @@ -349,7 +347,7 @@ fn consume_quoted_string(tokenizer: &mut Tokenizer, single_quote: bool) -> Resul fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { !tokenizer.is_eof() && match tokenizer.current_char() { 'a'...'z' | 'A'...'Z' | '_' | '\0' => true, - '-' => tokenizer.position + 1 < tokenizer.length && match tokenizer.char_at(1) { + '-' => tokenizer.position + 1 < tokenizer.input.len() && match tokenizer.char_at(1) { 'a'...'z' | 'A'...'Z' | '-' | '_' | '\0' => true, '\\' => !tokenizer.has_newline_at(1), c => c > '\x7F', // Non-ASCII @@ -405,7 +403,7 @@ fn consume_numeric(tokenizer: &mut Tokenizer) -> Token { _ => break } } - if tokenizer.position + 1 < tokenizer.length && tokenizer.current_char() == '.' + if tokenizer.position + 1 < tokenizer.input.len() && tokenizer.current_char() == '.' && matches!(tokenizer.char_at(1), '0'...'9') { is_integer = false; representation.push(tokenizer.consume_char()); // '.' @@ -418,11 +416,11 @@ fn consume_numeric(tokenizer: &mut Tokenizer) -> Token { } } if ( - tokenizer.position + 1 < tokenizer.length + tokenizer.position + 1 < tokenizer.input.len() && matches!(tokenizer.current_char(), 'e' | 'E') && matches!(tokenizer.char_at(1), '0'...'9') ) || ( - tokenizer.position + 2 < tokenizer.length + tokenizer.position + 2 < tokenizer.input.len() && matches!(tokenizer.current_char(), 'e' | 'E') && matches!(tokenizer.char_at(1), '+' | '-') && matches!(tokenizer.char_at(2), '0'...'9') From 90168bede48fe20e077611140d927f6c90f793fb Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 29 Dec 2014 17:52:26 +0100 Subject: [PATCH 19/69] Encapsulate input/position access in tokenizer. This would help taking input incrementally (rather than having a single `&str`) if we want to do that. --- src/tokenizer.rs | 137 +++++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 64 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ff3a5554..c1d37e7d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -108,8 +108,17 @@ impl<'a> Tokenizer<'a> { self.buffer = Some(token); } + // `tokenizer.current_char()` will not fail. #[inline] - fn is_eof(&self) -> bool { self.position >= self.input.len() } + fn is_eof(&self) -> bool { !self.has_at_least(0) } + + // The input has at least `n` bytes left *after* the current one. + // That is, `tokenizer.char_at(n)` will not fail. + #[inline] + fn has_at_least(&self, n: uint) -> bool { self.position + n < self.input.len() } + + #[inline] + fn advance(&mut self, n: uint) { self.position += n } // Assumes non-EOF #[inline] @@ -150,7 +159,7 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { '\t' | '\n' | ' ' | '\r' | '\x0C' => { while !tokenizer.is_eof() { match tokenizer.current_char() { - ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.position += 1, + ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1), _ => break, } } @@ -158,7 +167,7 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { }, '"' => consume_string(tokenizer, false), '#' => { - tokenizer.position += 1; + tokenizer.advance(1); if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) } else if !tokenizer.is_eof() && match tokenizer.current_char() { 'a'...'z' | 'A'...'Z' | '0'...'9' | '-' | '_' => true, @@ -168,113 +177,113 @@ fn next_token(tokenizer: &mut Tokenizer) -> Option { else { Delim(c) } }, '$' => { - if tokenizer.starts_with("$=") { tokenizer.position += 2; SuffixMatch } - else { tokenizer.position += 1; Delim(c) } + if tokenizer.starts_with("$=") { tokenizer.advance(2); SuffixMatch } + else { tokenizer.advance(1); Delim(c) } }, '\'' => consume_string(tokenizer, true), - '(' => { tokenizer.position += 1; ParenthesisBlock }, - ')' => { tokenizer.position += 1; CloseParenthesis }, + '(' => { tokenizer.advance(1); ParenthesisBlock }, + ')' => { tokenizer.advance(1); CloseParenthesis }, '*' => { - if tokenizer.starts_with("*=") { tokenizer.position += 2; SubstringMatch } - else { tokenizer.position += 1; Delim(c) } + if tokenizer.starts_with("*=") { tokenizer.advance(2); SubstringMatch } + else { tokenizer.advance(1); Delim(c) } }, '+' => { if ( - tokenizer.position + 1 < tokenizer.input.len() + tokenizer.has_at_least(1) && matches!(tokenizer.char_at(1), '0'...'9') ) || ( - tokenizer.position + 2 < tokenizer.input.len() + tokenizer.has_at_least(2) && tokenizer.char_at(1) == '.' && matches!(tokenizer.char_at(2), '0'...'9') ) { consume_numeric(tokenizer) } else { - tokenizer.position += 1; + tokenizer.advance(1); Delim(c) } }, - ',' => { tokenizer.position += 1; Comma }, + ',' => { tokenizer.advance(1); Comma }, '-' => { if ( - tokenizer.position + 1 < tokenizer.input.len() + tokenizer.has_at_least(1) && matches!(tokenizer.char_at(1), '0'...'9') ) || ( - tokenizer.position + 2 < tokenizer.input.len() + tokenizer.has_at_least(2) && tokenizer.char_at(1) == '.' && matches!(tokenizer.char_at(2), '0'...'9') ) { consume_numeric(tokenizer) } else if tokenizer.starts_with("-->") { - tokenizer.position += 3; + tokenizer.advance(3); CDC } else if is_ident_start(tokenizer) { consume_ident_like(tokenizer) } else { - tokenizer.position += 1; + tokenizer.advance(1); Delim(c) } }, '.' => { - if tokenizer.position + 1 < tokenizer.input.len() + if tokenizer.has_at_least(1) && matches!(tokenizer.char_at(1), '0'...'9' ) { consume_numeric(tokenizer) } else { - tokenizer.position += 1; + tokenizer.advance(1); Delim(c) } } '0'...'9' => consume_numeric(tokenizer), - ':' => { tokenizer.position += 1; Colon }, - ';' => { tokenizer.position += 1; Semicolon }, + ':' => { tokenizer.advance(1); Colon }, + ';' => { tokenizer.advance(1); Semicolon }, '<' => { if tokenizer.starts_with(" // Function - Function(String), // name + Function(CowString<'a>), // name // Simple block ParenthesisBlock, // (…) @@ -68,7 +70,7 @@ pub struct Tokenizer<'a> { position: uint, // All counted in bytes, not characters /// For `peek` and `push_back` - buffer: Option, + buffer: Option>, } @@ -83,7 +85,7 @@ impl<'a> Tokenizer<'a> { } #[inline] - pub fn next(&mut self) -> Result { + pub fn next(&mut self) -> Result, ()> { if let Some(token) = self.buffer.take() { Ok(token) } else { @@ -92,7 +94,7 @@ impl<'a> Tokenizer<'a> { } #[inline] - pub fn peek(&mut self) -> Result<&Token, ()> { + pub fn peek(&mut self) -> Result<&Token<'a>, ()> { match self.buffer { Some(ref token) => Ok(token), None => { @@ -103,7 +105,7 @@ impl<'a> Tokenizer<'a> { } #[inline] - pub fn push_back(&mut self, token: Token) { + pub fn push_back(&mut self, token: Token<'a>) { assert!(self.buffer.is_none(), "Parser::push_back can only be called after Parser::next"); self.buffer = Some(token); @@ -155,7 +157,7 @@ impl<'a> Tokenizer<'a> { } -fn next_token(tokenizer: &mut Tokenizer) -> Option { +fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { consume_comments(tokenizer); if tokenizer.is_eof() { return None @@ -314,7 +316,7 @@ fn consume_comments(tokenizer: &mut Tokenizer) { } -fn consume_string(tokenizer: &mut Tokenizer, single_quote: bool) -> Token { +fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> { match consume_quoted_string(tokenizer, single_quote) { Ok(value) => QuotedString(value), Err(()) => BadString @@ -323,7 +325,8 @@ fn consume_string(tokenizer: &mut Tokenizer, single_quote: bool) -> Token { /// Return `Err(())` on syntax error (ie. unescaped newline) -fn consume_quoted_string(tokenizer: &mut Tokenizer, single_quote: bool) -> Result { +fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) + -> Result, ()> { tokenizer.advance(1); // Skip the initial quote let mut string = String::new(); while !tokenizer.is_eof() { @@ -353,7 +356,7 @@ fn consume_quoted_string(tokenizer: &mut Tokenizer, single_quote: bool) -> Resul c => string.push(c), } } - Ok(string) + Ok(Owned(string)) } @@ -372,7 +375,7 @@ fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { } -fn consume_ident_like(tokenizer: &mut Tokenizer) -> Token { +fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { let value = consume_name(tokenizer); if !tokenizer.is_eof() && tokenizer.current_char() == '(' { tokenizer.advance(1); @@ -383,7 +386,7 @@ fn consume_ident_like(tokenizer: &mut Tokenizer) -> Token { } } -fn consume_name(tokenizer: &mut Tokenizer) -> String { +fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowString<'a> { let mut value = String::new(); while !tokenizer.is_eof() { let c = tokenizer.current_char(); @@ -399,7 +402,7 @@ fn consume_name(tokenizer: &mut Tokenizer) -> String { else { break } }) } - value + Owned(value) } @@ -413,7 +416,7 @@ fn consume_digits(tokenizer: &mut Tokenizer) { } -fn consume_numeric(tokenizer: &mut Tokenizer) -> Token { +fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { // Parse [+-]?\d*(\.\d+)?([eE][+-]?\d+)? // But this is always called so that there is at least one digit in \d*(\.\d+)? let start_pos = tokenizer.position; @@ -470,7 +473,7 @@ fn consume_numeric(tokenizer: &mut Tokenizer) -> Token { } -fn consume_url(tokenizer: &mut Tokenizer) -> Token { +fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { while !tokenizer.is_eof() { match tokenizer.current_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1), @@ -480,20 +483,22 @@ fn consume_url(tokenizer: &mut Tokenizer) -> Token { _ => return consume_unquoted_url(tokenizer), } } - return Url(String::new()); + return Url(Borrowed("")); - fn consume_quoted_url(tokenizer: &mut Tokenizer, single_quote: bool) -> Token { + fn consume_quoted_url<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> { match consume_quoted_string(tokenizer, single_quote) { Ok(value) => consume_url_end(tokenizer, value), Err(()) => consume_bad_url(tokenizer), } } - fn consume_unquoted_url(tokenizer: &mut Tokenizer) -> Token { + fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { let mut string = String::new(); while !tokenizer.is_eof() { let next_char = match tokenizer.consume_char() { - ' ' | '\t' | '\n' | '\r' | '\x0C' => return consume_url_end(tokenizer, string), + ' ' | '\t' | '\n' | '\r' | '\x0C' => { + return consume_url_end(tokenizer, Owned(string)) + } ')' => break, '\x01'...'\x08' | '\x0B' | '\x0E'...'\x1F' | '\x7F' // non-printable | '"' | '\'' | '(' => return consume_bad_url(tokenizer), @@ -508,10 +513,10 @@ fn consume_url(tokenizer: &mut Tokenizer) -> Token { }; string.push(next_char) } - Url(string) + Url(Owned(string)) } - fn consume_url_end(tokenizer: &mut Tokenizer, string: String) -> Token { + fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: CowString<'a>) -> Token<'a> { while !tokenizer.is_eof() { match tokenizer.consume_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => (), @@ -522,7 +527,7 @@ fn consume_url(tokenizer: &mut Tokenizer) -> Token { Url(string) } - fn consume_bad_url(tokenizer: &mut Tokenizer) -> Token { + fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { // Consume up to the closing ) while !tokenizer.is_eof() { match tokenizer.consume_char() { @@ -537,7 +542,7 @@ fn consume_url(tokenizer: &mut Tokenizer) -> Token { -fn consume_unicode_range(tokenizer: &mut Tokenizer) -> Token { +fn consume_unicode_range<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { tokenizer.advance(2); // Skip U+ let mut hex = String::new(); while hex.len() < 6 && !tokenizer.is_eof() From c9632be48051d2def2863c0b6aafdae88d9f6fae Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 00:11:54 +0000 Subject: [PATCH 26/69] Use `Cow::Borrowed` for the value of ident-like tokens without escapes or NUL. --- src/tokenizer.rs | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8f4a32ca..56de3de0 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -6,6 +6,7 @@ use std::{char, num}; use std::ascii::AsciiExt; +use std::borrow::ToOwned; use std::str::CowString; use std::borrow::Cow::{Owned, Borrowed}; @@ -151,7 +152,7 @@ impl<'a> Tokenizer<'a> { } #[inline] - fn slice_from(&self, start_pos: uint) -> &str { + fn slice_from(&self, start_pos: uint) -> &'a str { self.input.slice(start_pos, self.position) } } @@ -387,19 +388,51 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowString<'a> { - let mut value = String::new(); + let start_pos = tokenizer.position; + let mut value; + loop { + if tokenizer.is_eof() { + return Borrowed(tokenizer.slice_from(start_pos)) + } + match tokenizer.current_char() { + 'a'...'z' | 'A'...'Z' | '0'...'9' | '_' | '-' => tokenizer.advance(1), + '\\' => { + if tokenizer.has_newline_at(1) { + return Borrowed(tokenizer.slice_from(start_pos)) + } + value = tokenizer.slice_from(start_pos).to_owned(); + tokenizer.advance(1); + value.push(consume_escape(tokenizer)); + break + } + '\0' => { + value = tokenizer.slice_from(start_pos).to_owned(); + tokenizer.advance(1); + value.push_str("\u{FFFD}"); + break + } + c if c.is_ascii() => return Borrowed(tokenizer.slice_from(start_pos)), + _ => { + tokenizer.consume_char(); + } + } + } + while !tokenizer.is_eof() { let c = tokenizer.current_char(); value.push(match c { - 'a'...'z' | 'A'...'Z' | '0'...'9' | '_' | '-' => { tokenizer.advance(1); c }, + 'a'...'z' | 'A'...'Z' | '0'...'9' | '_' | '-' => { + tokenizer.advance(1); + c + } '\\' => { if tokenizer.has_newline_at(1) { break } tokenizer.advance(1); consume_escape(tokenizer) - }, + } '\0' => { tokenizer.advance(1); '\u{FFFD}' }, - _ => if c > '\x7F' { tokenizer.consume_char() } // Non-ASCII - else { break } + c if c.is_ascii() => break, + _ => tokenizer.consume_char(), }) } Owned(value) From f307a40a4bc7ea08bd17310ca99fd5e441fc0df6 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 00:23:33 +0000 Subject: [PATCH 27/69] Use `Cow::Borrowed` for the value of string-like tokens without escapes or NUL. --- src/tokenizer.rs | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 56de3de0..80ac1215 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -329,7 +329,34 @@ fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Toke fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Result, ()> { tokenizer.advance(1); // Skip the initial quote - let mut string = String::new(); + let start_pos = tokenizer.position; + let mut string; + loop { + if tokenizer.is_eof() { + return Ok(Borrowed(tokenizer.slice_from(start_pos))) + } + match tokenizer.current_char() { + '"' if !single_quote => { + let value = tokenizer.slice_from(start_pos); + tokenizer.advance(1); + return Ok(Borrowed(value)) + } + '\'' if single_quote => { + let value = tokenizer.slice_from(start_pos); + tokenizer.advance(1); + return Ok(Borrowed(value)) + } + '\\' | '\0' => { + string = tokenizer.slice_from(start_pos).to_owned(); + break + } + '\n' | '\r' | '\x0C' => return Err(()), + _ => { + tokenizer.consume_char(); + } + } + } + while !tokenizer.is_eof() { if matches!(tokenizer.current_char(), '\n' | '\r' | '\x0C') { return Err(()); @@ -396,19 +423,8 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowString<'a> { } match tokenizer.current_char() { 'a'...'z' | 'A'...'Z' | '0'...'9' | '_' | '-' => tokenizer.advance(1), - '\\' => { - if tokenizer.has_newline_at(1) { - return Borrowed(tokenizer.slice_from(start_pos)) - } - value = tokenizer.slice_from(start_pos).to_owned(); - tokenizer.advance(1); - value.push(consume_escape(tokenizer)); - break - } - '\0' => { + '\\' | '\0' => { value = tokenizer.slice_from(start_pos).to_owned(); - tokenizer.advance(1); - value.push_str("\u{FFFD}"); break } c if c.is_ascii() => return Borrowed(tokenizer.slice_from(start_pos)), From f52dad39ff92d875863c4489e9587d9f960c23fc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 00:30:19 +0000 Subject: [PATCH 28/69] Use `Cow::Borrowed` for the value of unquoted URL tokens without escapes or NUL. --- src/tokenizer.rs | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 80ac1215..d2273b3c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -542,7 +542,37 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { - let mut string = String::new(); + let start_pos = tokenizer.position; + let mut string; + loop { + if tokenizer.is_eof() { + return Url(Borrowed(tokenizer.slice_from(start_pos))) + } + match tokenizer.current_char() { + ' ' | '\t' | '\n' | '\r' | '\x0C' => { + let value = tokenizer.slice_from(start_pos); + tokenizer.advance(1); + return consume_url_end(tokenizer, Borrowed(value)) + } + ')' => { + let value = tokenizer.slice_from(start_pos); + tokenizer.advance(1); + return Url(Borrowed(value)) + } + '\x01'...'\x08' | '\x0B' | '\x0E'...'\x1F' | '\x7F' // non-printable + | '"' | '\'' | '(' => { + tokenizer.advance(1); + return consume_bad_url(tokenizer) + }, + '\\' | '\0' => { + string = tokenizer.slice_from(start_pos).to_owned(); + break + } + _ => { + tokenizer.consume_char(); + } + } + } while !tokenizer.is_eof() { let next_char = match tokenizer.consume_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => { From 0172d5837e14fa1d30df5f6f3b0ea58930eb5ff8 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 15:34:17 +0000 Subject: [PATCH 29/69] Fix escaped NULs --- src/css-parsing-tests/component_value_list.json | 4 ++-- src/tokenizer.rs | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/css-parsing-tests/component_value_list.json b/src/css-parsing-tests/component_value_list.json index e5c24ec0..e5e28713 100644 --- a/src/css-parsing-tests/component_value_list.json +++ b/src/css-parsing-tests/component_value_list.json @@ -22,7 +22,7 @@ ["ident", "red--"], ">" ], -"\\- red0 -red --red -\\-red\\ blue 0red -0red \u0000red _Red .red rêd r\\êd \u007F\u0080\u0081", [ +"\\- red0 -red --red -\\-red\\ blue 0red -0red \u0000\\\u0000red _Red .red rêd r\\êd \u007F\u0080\u0081", [ ["ident", "-"], " ", ["ident", "red0"], " ", ["ident", "-red"], " ", @@ -30,7 +30,7 @@ ["ident", "--red blue"], " ", ["dimension", "0", 0, "integer", "red"], " ", ["dimension", "0", 0, "integer", "red"], " ", - ["ident", "\uFFFDred"], " ", + ["ident", "\uFFFD\uFFFDred"], " ", ["ident", "_Red"], " ", ".", ["ident", "red"], " ", ["ident", "rêd"], " ", diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d2273b3c..ebc6e303 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -701,6 +701,7 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char { REPLACEMENT_CHAR } }, + '\0' => '\u{FFFD}', c => c } } From d2531ef28276eab042e7b9c90d290ed82c7a9623 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 16:11:21 +0000 Subject: [PATCH 30/69] Make Parser.tokenizer not be an Option. Have a boolean field instead. --- src/parser.rs | 63 ++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index f015513d..e0d3e066 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,7 +7,7 @@ use super::{Token, NumericValue, Tokenizer}; pub struct Parser<'i: 't, 't> { - tokenizer: Option<&'t mut Tokenizer<'i>>, + tokenizer: &'t mut Tokenizer<'i>, parent_state: Option<&'t mut ParserState>, state: ParserState, } @@ -20,6 +20,7 @@ struct ParserState { stop_at_end_of: Option, /// For parsers from `parse_until` stop_before: Delimiters, + exhausted: bool, } @@ -90,7 +91,7 @@ impl NestedBlockListExt for NestedBlockList { #[unsafe_destructor] // FIXME What does this mean? impl<'i, 't> Drop for Parser<'i, 't> { fn drop(&mut self) { - if self.tokenizer.is_none() { + if self.state.exhausted { // We’ve already reached the end of our delimited input: // nothing to inform the parent of. debug_assert!(self.state.nested_blocks.is_none()); @@ -163,13 +164,14 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn new(tokenizer: &'t mut Tokenizer<'i>) -> Parser<'i, 't> { Parser { - tokenizer: Some(tokenizer), + tokenizer: tokenizer, parent_state: None, state: ParserState { nested_blocks: None, at_start_of: None, stop_at_end_of: None, stop_before: Delimiter::None, + exhausted: false, }, } } @@ -199,7 +201,7 @@ impl<'i, 't> Parser<'i, 't> { let token = try!(self.next()); self.push_back(token); - self.tokenizer().peek() + self.tokenizer.peek() } #[inline] @@ -207,9 +209,7 @@ impl<'i, 't> Parser<'i, 't> { if BlockType::opening(&token) == self.state.at_start_of { self.state.at_start_of = None; } - self.tokenizer.as_mut().expect( - "Can not use Parser::push_back after the end of the input was reached." - ).push_back(token) + self.tokenizer.push_back(token) } #[inline] @@ -225,11 +225,6 @@ impl<'i, 't> Parser<'i, 't> { Err(()) } - #[inline] - fn tokenizer<'a>(&'a mut self) -> &'a mut Tokenizer<'i> { - &mut **self.tokenizer.as_mut().unwrap() - } - pub fn next(&mut self) -> Result, ()> { loop { match self.next_including_whitespace() { @@ -240,32 +235,33 @@ impl<'i, 't> Parser<'i, 't> { } pub fn next_including_whitespace(&mut self) -> Result, ()> { - if self.tokenizer.is_none() { + if self.state.exhausted { return Err(()) } - if self.state.nested_blocks.take().consume(self.tokenizer()) { - self.tokenizer = None; + if self.state.nested_blocks.take().consume(self.tokenizer) { + self.state.exhausted = true; return Err(()) } if let Some(block_type) = self.state.at_start_of.take() { - if consume_until_end_of_block(block_type, self.tokenizer()) { - self.tokenizer = None; + if consume_until_end_of_block(block_type, self.tokenizer) { + self.state.exhausted = true; return Err(()) } } - match self.tokenizer().next() { + match self.tokenizer.next() { Err(()) => { - self.tokenizer = None; + self.state.exhausted = true; Err(()) }, Ok(token) => { if self.state.stop_before.contains(Delimiters::from_token(&token)) { - self.tokenizer.take().unwrap().push_back(token); + self.tokenizer.push_back(token); + self.state.exhausted = true; return Err(()) } if self.state.stop_at_end_of.is_some() && BlockType::closing(&token) == self.state.stop_at_end_of { - self.tokenizer = None; + self.state.exhausted = true; return Err(()) } if let Some(block_type) = BlockType::opening(&token) { @@ -283,31 +279,32 @@ impl<'i, 't> Parser<'i, 't> { } pub fn consume_until_after(&mut self, stop_after: Delimiters) { - if self.tokenizer.is_none() { + if self.state.exhausted { return } // FIXME: have a special-purpose tokenizer method for this that does less work. - while let Ok(token) = self.tokenizer().next() { + while let Ok(token) = self.tokenizer.next() { if stop_after.contains(Delimiters::from_token(&token)) { return } if self.state.stop_before.contains(Delimiters::from_token(&token)) { - self.tokenizer.take().unwrap().push_back(token); + self.tokenizer.push_back(token); + self.state.exhausted = true; return } if self.state.stop_at_end_of.is_some() && BlockType::closing(&token) == self.state.stop_at_end_of { - self.tokenizer = None; + self.state.exhausted = true; return } if let Some(block_type) = BlockType::opening(&token) { - if consume_until_end_of_block(block_type, self.tokenizer()) { - self.tokenizer = None; + if consume_until_end_of_block(block_type, self.tokenizer) { + self.state.exhausted = true; return } } } - self.tokenizer = None; + self.state.exhausted = true; } // FIXME: Take an unboxed `FnOnce` closure. @@ -322,16 +319,15 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn parse_nested_block<'a>(&'a mut self) -> Parser<'i, 'a> { if let Some(block_type) = self.state.at_start_of.take() { + debug_assert!(!self.state.exhausted); Parser { - // Unwrap here should never fail - // because `self.tokenizer` is only ever set to `None` - // when `self.state` is also `None`. - tokenizer: Some(&mut **self.tokenizer.as_mut().unwrap()), + tokenizer: self.tokenizer, state: ParserState { nested_blocks: self.state.nested_blocks.take(), at_start_of: None, stop_at_end_of: Some(block_type), stop_before: Delimiter::None, + exhausted: false, }, parent_state: Some(&mut self.state), } @@ -351,12 +347,13 @@ impl<'i, 't> Parser<'i, 't> { that is itself from `parse_until_before`") } Parser { - tokenizer: self.tokenizer.as_mut().map(|t| &mut **t), + tokenizer: self.tokenizer, state: ParserState { nested_blocks: self.state.nested_blocks.take(), at_start_of: self.state.at_start_of.take(), stop_at_end_of: self.state.stop_at_end_of, stop_before: stop_before, + exhausted: self.state.exhausted, }, parent_state: Some(&mut self.state), } From e59f5cf00cc1fa368671ca8d3cc7b9e208727db9 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 16:13:34 +0000 Subject: [PATCH 31/69] Make Tokenizer::slice_from public. --- src/lib.rs | 2 +- src/parser.rs | 12 +++++++++++- src/tokenizer.rs | 27 ++++++++++++++++++--------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 24f0e691..323d104b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ extern crate text_writer; #[cfg(test)] extern crate test; #[cfg(test)] extern crate serialize; -pub use tokenizer::{Tokenizer, Token, NumericValue}; +pub use tokenizer::{Tokenizer, Token, NumericValue, TokenizerPosition}; pub use rules_and_declarations::{Priority, parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; diff --git a/src/parser.rs b/src/parser.rs index e0d3e066..2bb15f0c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,7 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use std::str::CowString; -use super::{Token, NumericValue, Tokenizer}; +use super::{Token, NumericValue, Tokenizer, TokenizerPosition}; pub struct Parser<'i: 't, 't> { @@ -225,6 +225,16 @@ impl<'i, 't> Parser<'i, 't> { Err(()) } + #[inline] + pub fn position(&self) -> TokenizerPosition { + self.tokenizer.position() + } + + #[inline] + pub fn slice_from(&self, start_pos: TokenizerPosition) -> &'i str { + self.tokenizer.slice_from(start_pos) + } + pub fn next(&mut self) -> Result, ()> { loop { match self.next_including_whitespace() { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ebc6e303..73d5e211 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -112,6 +112,16 @@ impl<'a> Tokenizer<'a> { self.buffer = Some(token); } + #[inline] + pub fn position(&self) -> TokenizerPosition { + TokenizerPosition(self.position) + } + + #[inline] + pub fn slice_from(&self, start_pos: TokenizerPosition) -> &'a str { + self.input.slice(start_pos.0, self.position) + } + // If false, `tokenizer.current_char()` will not panic. #[inline] fn is_eof(&self) -> bool { !self.has_at_least(0) } @@ -150,14 +160,13 @@ impl<'a> Tokenizer<'a> { fn starts_with(&self, needle: &str) -> bool { self.input.slice_from(self.position).starts_with(needle) } - - #[inline] - fn slice_from(&self, start_pos: uint) -> &'a str { - self.input.slice(start_pos, self.position) - } } +#[deriving(PartialEq, Eq, Show, Clone, Copy)] +pub struct TokenizerPosition(uint); + + fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { consume_comments(tokenizer); if tokenizer.is_eof() { @@ -329,7 +338,7 @@ fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Toke fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Result, ()> { tokenizer.advance(1); // Skip the initial quote - let start_pos = tokenizer.position; + let start_pos = tokenizer.position(); let mut string; loop { if tokenizer.is_eof() { @@ -415,7 +424,7 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowString<'a> { - let start_pos = tokenizer.position; + let start_pos = tokenizer.position(); let mut value; loop { if tokenizer.is_eof() { @@ -468,7 +477,7 @@ fn consume_digits(tokenizer: &mut Tokenizer) { fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { // Parse [+-]?\d*(\.\d+)?([eE][+-]?\d+)? // But this is always called so that there is at least one digit in \d*(\.\d+)? - let start_pos = tokenizer.position; + let start_pos = tokenizer.position(); let mut is_integer = true; let signed = matches!(tokenizer.current_char(), '-' | '+'); if signed { @@ -542,7 +551,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { - let start_pos = tokenizer.position; + let start_pos = tokenizer.position(); let mut string; loop { if tokenizer.is_eof() { From 66dfce857294aaeb902f2d60fbf0499f08013b62 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 17:10:12 +0000 Subject: [PATCH 32/69] Add SourceLocation (line numbers) back, but on demand. --- src/lib.rs | 2 +- src/parser.rs | 13 +++++++++- src/tests.rs | 28 ++++++++++++++++++++- src/tokenizer.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 101 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 323d104b..fdc95edf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ extern crate text_writer; #[cfg(test)] extern crate test; #[cfg(test)] extern crate serialize; -pub use tokenizer::{Tokenizer, Token, NumericValue, TokenizerPosition}; +pub use tokenizer::{Tokenizer, Token, NumericValue, TokenizerPosition, SourceLocation}; pub use rules_and_declarations::{Priority, parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; diff --git a/src/parser.rs b/src/parser.rs index 2bb15f0c..68e4aa2c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,7 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use std::str::CowString; -use super::{Token, NumericValue, Tokenizer, TokenizerPosition}; +use super::{Token, NumericValue, Tokenizer, TokenizerPosition, SourceLocation}; pub struct Parser<'i: 't, 't> { @@ -235,6 +235,17 @@ impl<'i, 't> Parser<'i, 't> { self.tokenizer.slice_from(start_pos) } + #[inline] + pub fn current_source_location(&mut self) -> SourceLocation { + self.tokenizer.current_source_location() + } + + #[inline] + pub fn source_location(&mut self, target: TokenizerPosition) -> SourceLocation { + self.tokenizer.source_location(target) + } + + pub fn next(&mut self) -> Result, ()> { loop { match self.next_including_whitespace() { diff --git a/src/tests.rs b/src/tests.rs index b0dc80a2..b3d25708 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -11,7 +11,7 @@ use test; use encoding::label::encoding_from_whatwg_label; -use super::{Tokenizer, Parser, Token, NumericValue, +use super::{Tokenizer, Parser, Token, NumericValue, SourceLocation, DeclarationListParser, DeclarationParser, RuleListParser, AtRulePrelude, AtRuleParser, QualifiedRuleParser, Priority, parse_one_declaration, parse_one_rule, parse_important, @@ -383,6 +383,32 @@ fn serialize_rgba() { assert!(c.to_css_string().as_slice() == "rgba(26, 51, 77, 0.5)"); } +#[test] +fn line_numbers() { + Parser::parse_str("foo bar\nbaz\r\n\n\"a\\\r\nb\"", |input| { + assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 1 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("foo")))); + assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 4 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 5 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("bar")))); + assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 8 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 1 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("baz")))); + assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 4 }); + let position = input.position(); + + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 1 }); + + assert_eq!(input.source_location(position), SourceLocation { line: 2, column: 4 }); + + assert_eq!(input.next_including_whitespace(), Ok(Token::QuotedString(Borrowed("ab")))); + assert_eq!(input.current_source_location(), SourceLocation { line: 5, column: 3 }); + assert_eq!(input.next_including_whitespace(), Err(())); + }) +} impl ToJson for Color { fn to_json(&self) -> json::Json { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 73d5e211..91dd78b4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -61,17 +61,23 @@ pub enum Token<'a> { pub struct NumericValue { pub value: f64, pub int_value: Option, - // Whether the number had a `+` or `-` sign. + /// Whether the number had a `+` or `-` sign. pub signed: bool, } pub struct Tokenizer<'a> { input: &'a str, - position: uint, // All counted in bytes, not characters - /// For `peek` and `push_back` + /// Counted in bytes, not code points. From 0. + position: uint, + + /// For `peek()` and `push_back()` buffer: Option>, + + /// Cache for `source_location()` + last_known_line_number: uint, + position_after_last_known_newline: uint, } @@ -82,6 +88,8 @@ impl<'a> Tokenizer<'a> { input: input, position: 0, buffer: None, + last_known_line_number: 1, + position_after_last_known_newline: 0, } } @@ -122,6 +130,46 @@ impl<'a> Tokenizer<'a> { self.input.slice(start_pos.0, self.position) } + #[inline] + pub fn current_source_location(&mut self) -> SourceLocation { + let position = TokenizerPosition(self.position); + self.source_location(position) + } + + pub fn source_location(&mut self, position: TokenizerPosition) -> SourceLocation { + let target = position.0; + let mut line_number; + let mut position; + if target >= self.position_after_last_known_newline { + position = self.position_after_last_known_newline; + line_number = self.last_known_line_number; + } else { + position = 0; + line_number = 1; + } + let mut source = self.input.slice(position, target); + while let Some(newline_position) = source.find(['\n', '\r', '\x0C'].as_slice()) { + let offset = newline_position + + if source.slice_from(newline_position).starts_with("\r\n") { + 2 + } else { + 1 + }; + source = source.slice_from(offset); + position += offset; + line_number += 1; + } + debug_assert!(position <= target); + self.position_after_last_known_newline = position; + self.last_known_line_number = line_number; + SourceLocation { + line: line_number, + // `target == position` when `target` is at the beginning of the line, + // so add 1 so that the column numbers start at 1. + column: target - position + 1, + } + } + // If false, `tokenizer.current_char()` will not panic. #[inline] fn is_eof(&self) -> bool { !self.has_at_least(0) } @@ -163,10 +211,19 @@ impl<'a> Tokenizer<'a> { } -#[deriving(PartialEq, Eq, Show, Clone, Copy)] +#[deriving(PartialEq, Eq, PartialOrd, Ord, Show, Clone, Copy)] pub struct TokenizerPosition(uint); +#[deriving(PartialEq, Eq, Show, Clone, Copy)] +pub struct SourceLocation { + /// Starts at 1 + pub line: uint, + /// Starts at 1 + pub column: uint, +} + + fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { consume_comments(tokenizer); if tokenizer.is_eof() { From fe4c3b9f27fcba7a1efaf2827eb3c80296cb7123 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 17:17:23 +0000 Subject: [PATCH 33/69] Make {Rule,Declaration}RuleListParser fields public. --- src/rules_and_declarations.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 33393bb3..73b33062 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -87,8 +87,8 @@ pub trait QualifiedRuleParser { pub struct DeclarationListParser<'i: 't, 't: 'a, 'a, AP, I, P> where P: DeclarationParser + AtRuleParser { - input: &'a mut Parser<'i, 't>, - parser: P, + pub input: &'a mut Parser<'i, 't>, + pub parser: P, } @@ -128,8 +128,8 @@ where P: DeclarationParser + AtRuleParser { pub struct RuleListParser<'i: 't, 't: 'a, 'a, R, QP, AP, P> where P: QualifiedRuleParser + AtRuleParser { - input: &'a mut Parser<'i, 't>, - parser: P, + pub input: &'a mut Parser<'i, 't>, + pub parser: P, is_stylesheet: bool, } From 516c1c69b86e03856b9dafb3b4813de43533d700 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 17:24:24 +0000 Subject: [PATCH 34/69] Rename TokenizerPosition to SourcePosition --- src/lib.rs | 2 +- src/parser.rs | 8 ++++---- src/tokenizer.rs | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fdc95edf..98a4191d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ extern crate text_writer; #[cfg(test)] extern crate test; #[cfg(test)] extern crate serialize; -pub use tokenizer::{Tokenizer, Token, NumericValue, TokenizerPosition, SourceLocation}; +pub use tokenizer::{Tokenizer, Token, NumericValue, SourcePosition, SourceLocation}; pub use rules_and_declarations::{Priority, parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; diff --git a/src/parser.rs b/src/parser.rs index 68e4aa2c..0f8c6c37 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,7 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use std::str::CowString; -use super::{Token, NumericValue, Tokenizer, TokenizerPosition, SourceLocation}; +use super::{Token, NumericValue, Tokenizer, SourcePosition, SourceLocation}; pub struct Parser<'i: 't, 't> { @@ -226,12 +226,12 @@ impl<'i, 't> Parser<'i, 't> { } #[inline] - pub fn position(&self) -> TokenizerPosition { + pub fn position(&self) -> SourcePosition { self.tokenizer.position() } #[inline] - pub fn slice_from(&self, start_pos: TokenizerPosition) -> &'i str { + pub fn slice_from(&self, start_pos: SourcePosition) -> &'i str { self.tokenizer.slice_from(start_pos) } @@ -241,7 +241,7 @@ impl<'i, 't> Parser<'i, 't> { } #[inline] - pub fn source_location(&mut self, target: TokenizerPosition) -> SourceLocation { + pub fn source_location(&mut self, target: SourcePosition) -> SourceLocation { self.tokenizer.source_location(target) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 91dd78b4..875700e6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -121,22 +121,22 @@ impl<'a> Tokenizer<'a> { } #[inline] - pub fn position(&self) -> TokenizerPosition { - TokenizerPosition(self.position) + pub fn position(&self) -> SourcePosition { + SourcePosition(self.position) } #[inline] - pub fn slice_from(&self, start_pos: TokenizerPosition) -> &'a str { + pub fn slice_from(&self, start_pos: SourcePosition) -> &'a str { self.input.slice(start_pos.0, self.position) } #[inline] pub fn current_source_location(&mut self) -> SourceLocation { - let position = TokenizerPosition(self.position); + let position = SourcePosition(self.position); self.source_location(position) } - pub fn source_location(&mut self, position: TokenizerPosition) -> SourceLocation { + pub fn source_location(&mut self, position: SourcePosition) -> SourceLocation { let target = position.0; let mut line_number; let mut position; @@ -212,7 +212,7 @@ impl<'a> Tokenizer<'a> { #[deriving(PartialEq, Eq, PartialOrd, Ord, Show, Clone, Copy)] -pub struct TokenizerPosition(uint); +pub struct SourcePosition(uint); #[deriving(PartialEq, Eq, Show, Clone, Copy)] From 8d8ea17e7de98a3e56ac6b127f86fc7f1e57df85 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 19:04:48 +0000 Subject: [PATCH 35/69] Add Parser::expect_url_or_string --- src/parser.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index 0f8c6c37..f257fd4d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -404,6 +404,15 @@ impl<'i, 't> Parser<'i, 't> { } } + #[inline] + pub fn expect_url_or_string(&mut self) -> Result, ()> { + match try!(self.next()) { + Token::Url(value) => Ok(value), + Token::QuotedString(value) => Ok(value), + token => self.unexpected(token) + } + } + #[inline] pub fn expect_number(&mut self) -> Result { match try!(self.next()) { From 702ec925acd56e3a64cc0fe73c0f52ee325462d3 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 19:15:01 +0000 Subject: [PATCH 36/69] Allow parse_until_before to be reentrant Previous revisions of it were not because they consumed the delimiter token. --- src/parser.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index f257fd4d..ec63c1ac 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -363,17 +363,13 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn parse_until_before<'a>(&'a mut self, stop_before: Delimiters) -> Parser<'i, 'a> { - if self.state.stop_before != Delimiter::None { - panic!("`parse_until_before` cannot be used on a `Parser` \ - that is itself from `parse_until_before`") - } Parser { tokenizer: self.tokenizer, state: ParserState { nested_blocks: self.state.nested_blocks.take(), at_start_of: self.state.at_start_of.take(), stop_at_end_of: self.state.stop_at_end_of, - stop_before: stop_before, + stop_before: self.state.stop_before | stop_before, exhausted: self.state.exhausted, }, parent_state: Some(&mut self.state), From 40b851ee08cd1fabbcc1b4db88e52af391c2ff45 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 6 Jan 2015 19:15:14 +0000 Subject: [PATCH 37/69] Add Parser::parse_comma_separated --- src/parser.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index ec63c1ac..ff17b0ff 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -337,6 +337,23 @@ impl<'i, 't> Parser<'i, 't> { result } + #[inline] + pub fn parse_comma_separated(&mut self, parse_one: |&mut Parser| -> Result) + -> Result, ()> { + let mut values = vec![try!(self.parse_until_before(Delimiter::Comma) + .parse_entirely(|parser| parse_one(parser)))]; + loop { + match self.next() { + Ok(Token::Comma) => { + values.push(try!(self.parse_until_before(Delimiter::Comma) + .parse_entirely(|parser| parse_one(parser)))) + } + Ok(token) => return self.unexpected(token), + Err(()) => return Ok(values), + } + } + } + #[inline] pub fn parse_nested_block<'a>(&'a mut self) -> Parser<'i, 'a> { if let Some(block_type) = self.state.at_start_of.take() { From 2a69030e86fd8e7a02ccd44aff43466ce107b235 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 7 Jan 2015 16:27:31 +0000 Subject: [PATCH 38/69] Add Parser::expect_ident_or_string --- src/parser.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index ff17b0ff..b47033ab 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -402,13 +402,22 @@ impl<'i, 't> Parser<'i, 't> { } #[inline] - pub fn expect_quoted_string(&mut self) -> Result, ()> { + pub fn expect_string(&mut self) -> Result, ()> { match try!(self.next()) { Token::QuotedString(value) => Ok(value), token => self.unexpected(token) } } + #[inline] + pub fn expect_ident_or_string(&mut self) -> Result, ()> { + match try!(self.next()) { + Token::Ident(value) => Ok(value), + Token::QuotedString(value) => Ok(value), + token => self.unexpected(token) + } + } + #[inline] pub fn expect_url(&mut self) -> Result, ()> { match try!(self.next()) { From bd837b1cbe410d1b059e70a107db05813258eb89 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 7 Jan 2015 16:28:28 +0000 Subject: [PATCH 39/69] Add DeclarationListParser::run and RuleListParser::run. --- src/rules_and_declarations.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 73b33062..36b9e3ec 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -101,6 +101,13 @@ where P: DeclarationParser + AtRuleParser { parser: parser, } } + + /// Parse until the input is exhausted, and ignore all results. + /// This can be useful when `parser` collects results by mutating itself. + pub fn run(mut self) -> P { + while let Some(_) = self.next() {} + self.parser + } } impl<'i, 't, 'a, AP, I, P> Iterator> @@ -153,6 +160,13 @@ where P: QualifiedRuleParser + AtRuleParser { is_stylesheet: false, } } + + /// Parse until the input is exhausted, and ignore all results. + /// This can be useful when `parser` collects results by mutating itself. + pub fn run(mut self) -> P { + while let Some(_) = self.next() {} + self.parser + } } From f5b224a9e2b9888b1ec27fb7bcd27b6fb72fc42d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Jan 2015 18:53:59 +0000 Subject: [PATCH 40/69] Add Parser::expect_{ident_matching,delim,function} --- src/parser.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index b47033ab..049f6fd1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,6 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +use std::ascii::AsciiExt; use std::str::CowString; use super::{Token, NumericValue, Tokenizer, SourcePosition, SourceLocation}; @@ -401,6 +402,15 @@ impl<'i, 't> Parser<'i, 't> { } } + /// Expect an *ident* token whose value is an ASCII-insensitive match for the given value. + #[inline] + pub fn expect_ident_matching<'a>(&mut self, expected_value: &str) -> Result<(), ()> { + match try!(self.next()) { + Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()), + token => self.unexpected(token) + } + } + #[inline] pub fn expect_string(&mut self) -> Result, ()> { match try!(self.next()) { @@ -484,12 +494,28 @@ impl<'i, 't> Parser<'i, 't> { } #[inline] - pub fn expect_curly_bracke_block(&mut self) -> Result<(), ()> { + pub fn expect_delim(&mut self, expected_value: char) -> Result<(), ()> { + match try!(self.next()) { + Token::Delim(value) if value == expected_value => Ok(()), + token => self.unexpected(token) + } + } + + #[inline] + pub fn expect_curly_bracket_block(&mut self) -> Result<(), ()> { match try!(self.next()) { Token::CurlyBracketBlock => Ok(()), token => self.unexpected(token) } } + + #[inline] + pub fn expect_function(&mut self) -> Result, ()> { + match try!(self.next()) { + Token::Function(name) => Ok(name), + token => self.unexpected(token) + } + } } From c971b14e91071c292a4c76fc3f1da6a7069165be Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Jan 2015 23:55:52 +0000 Subject: [PATCH 41/69] Add Parser::unexpected_ident --- src/parser.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index 049f6fd1..7f553e4e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -226,6 +226,12 @@ impl<'i, 't> Parser<'i, 't> { Err(()) } + #[inline] + pub fn unexpected_ident(&mut self, value: CowString<'i>) -> Result { + self.push_back(Token::Ident(value)); + Err(()) + } + #[inline] pub fn position(&self) -> SourcePosition { self.tokenizer.position() From 29a6941d9d286feb30b2d8dd8efc648982d15192 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Jan 2015 23:56:07 +0000 Subject: [PATCH 42/69] Rename AtRulePrelude to AtRuleType --- src/lib.rs | 2 +- src/rules_and_declarations.rs | 10 +++++----- src/tests.rs | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 98a4191d..10e90ded 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,7 @@ pub use tokenizer::{Tokenizer, Token, NumericValue, SourcePosition, SourceLocati pub use rules_and_declarations::{Priority, parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; -pub use rules_and_declarations::{AtRulePrelude, QualifiedRuleParser, AtRuleParser}; +pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser}; pub use from_bytes::{decode_stylesheet_bytes, parse_stylesheet_rules_from_bytes}; pub use color::{RGBA, Color, parse_color_keyword}; pub use nth::parse_nth; diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 36b9e3ec..bd92cf33 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -34,7 +34,7 @@ pub fn parse_important(input: &mut Parser) -> Result { } -pub enum AtRulePrelude { +pub enum AtRuleType { WithoutBlock(R), WithBlock(P), OptionalBlock(P), @@ -52,7 +52,7 @@ pub trait DeclarationParser { pub trait AtRuleParser { fn parse_prelude(&mut self, name: &str, input: &mut Parser) - -> Result, ()> { + -> Result, ()> { let _ = name; let _ = input; Err(()) @@ -241,13 +241,13 @@ fn parse_at_rule(name: CowString, input: &mut Parser, parser: &mut P) parser.parse_prelude(name.as_slice(), input) }).or_else(|()| input.err_consume_until_after(delimiters))); match result { - AtRulePrelude::WithoutBlock(rule) => { + AtRuleType::WithoutBlock(rule) => { match input.next() { Ok(Token::Semicolon) | Err(()) => Ok(rule), _ => input.err_consume_until_after(delimiters) } } - AtRulePrelude::WithBlock(prelude) => { + AtRuleType::WithBlock(prelude) => { match input.next() { Ok(Token::CurlyBracketBlock) => { // FIXME: Make parse_entirely take `FnOnce` @@ -260,7 +260,7 @@ fn parse_at_rule(name: CowString, input: &mut Parser, parser: &mut P) _ => input.err_consume_until_after(delimiters) } } - AtRulePrelude::OptionalBlock(prelude) => { + AtRuleType::OptionalBlock(prelude) => { match input.next() { Ok(Token::Semicolon) | Err(()) => parser.rule_without_block(prelude), Ok(Token::CurlyBracketBlock) => { diff --git a/src/tests.rs b/src/tests.rs index b3d25708..1a75ae5d 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -13,7 +13,7 @@ use encoding::label::encoding_from_whatwg_label; use super::{Tokenizer, Parser, Token, NumericValue, SourceLocation, DeclarationListParser, DeclarationParser, RuleListParser, - AtRulePrelude, AtRuleParser, QualifiedRuleParser, Priority, + AtRuleType, AtRuleParser, QualifiedRuleParser, Priority, parse_one_declaration, parse_one_rule, parse_important, parse_stylesheet_rules_from_bytes, Color, RGBA, parse_color_keyword, parse_nth, ToCss}; @@ -466,8 +466,8 @@ impl DeclarationParser for JsonParser { impl AtRuleParser, Json> for JsonParser { fn parse_prelude(&mut self, name: &str, input: &mut Parser) - -> Result, Json>, ()> { - Ok(AtRulePrelude::OptionalBlock(vec![ + -> Result, Json>, ()> { + Ok(AtRuleType::OptionalBlock(vec![ "at-rule".to_json(), name.to_json(), Json::Array(component_values_to_json(input)), From 1796776e4aa688469bc5c46999fc57f43bb2d65d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 13 Jan 2015 19:25:09 +0000 Subject: [PATCH 43/69] Refactor parser.rs to simplify it and propably fix multiple bugs. "Nested" or "delimited" parsers can now only be used in a closure given to the corresponding `Parser` method. This allows doing cleanup after calling the closure, rather than in a `Drop` implementation. This more local reasoning makes thing easier to understand and less likely to be buggy. --- src/color.rs | 2 +- src/lib.rs | 2 +- src/parser.rs | 268 +++++++++++++--------------------- src/rules_and_declarations.rs | 51 +++---- src/serializer.rs | 122 +++++++++------- src/tests.rs | 34 ++--- src/tokenizer.rs | 46 +++++- 7 files changed, 249 insertions(+), 276 deletions(-) diff --git a/src/color.rs b/src/color.rs index 51056d45..a310491c 100644 --- a/src/color.rs +++ b/src/color.rs @@ -70,7 +70,7 @@ impl Color { } Token::Ident(ref value) => parse_color_keyword(value.as_slice()), Token::Function(ref name) => { - input.parse_nested_block().parse_entirely(|arguments| { + input.parse_nested_block(|arguments| { parse_color_function(name.as_slice(), arguments) }) } diff --git a/src/lib.rs b/src/lib.rs index 10e90ded..6e3da2ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,7 @@ pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser}; pub use from_bytes::{decode_stylesheet_bytes, parse_stylesheet_rules_from_bytes}; pub use color::{RGBA, Color, parse_color_keyword}; pub use nth::parse_nth; -pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string}; +pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenWriter}; pub use parser::{Parser, Delimiter, Delimiters}; diff --git a/src/parser.rs b/src/parser.rs index 7f553e4e..a849f376 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -9,18 +9,12 @@ use super::{Token, NumericValue, Tokenizer, SourcePosition, SourceLocation}; pub struct Parser<'i: 't, 't> { tokenizer: &'t mut Tokenizer<'i>, - parent_state: Option<&'t mut ParserState>, - state: ParserState, -} - -struct ParserState { - nested_blocks: NestedBlockList, - /// For block/function parsers that need to stop at the matching `)`, `]`, or `}` + /// If `Some(_)`, .parse_nested_block() can be called. at_start_of: Option, - /// For block/function parsers that need to stop at the matching `)`, `]`, or `}` - stop_at_end_of: Option, + /// If `Some(_)`, this parser is from .parse_nested_block() + parse_until_after_end_of: Option, /// For parsers from `parse_until` - stop_before: Delimiters, + parse_until_before: Delimiters, exhausted: bool, } @@ -55,65 +49,6 @@ impl BlockType { } -type NestedBlockList = Option>; - -struct NestedBlockListItem { - next: NestedBlockList, - block_type: BlockType, -} - -trait NestedBlockListExt { - fn push(&mut self, block_type: BlockType); - fn consume(self, tokenizer: &mut Tokenizer) -> bool; -} - -impl NestedBlockListExt for NestedBlockList { - fn push(&mut self, block_type: BlockType) { - debug_assert!(self.is_none()); - *self = Some(box NestedBlockListItem { - next: self.take(), - block_type: block_type, - }) - } - - /// Return value indicates whether the end of the input was reached. - fn consume(self, tokenizer: &mut Tokenizer) -> bool { - if let Some(box NestedBlockListItem { block_type, next }) = self { - // Recursion first: the inner-most item of the list - // is for the inner-most nested block. - next.consume(tokenizer) || consume_until_end_of_block(block_type, tokenizer) - } else { - false - } - } -} - - -#[unsafe_destructor] // FIXME What does this mean? -impl<'i, 't> Drop for Parser<'i, 't> { - fn drop(&mut self) { - if self.state.exhausted { - // We’ve already reached the end of our delimited input: - // nothing to inform the parent of. - debug_assert!(self.state.nested_blocks.is_none()); - return - } - if let Some(parent_state) = self.parent_state.take() { - // Inform our parent parser of what they need to consume. - debug_assert!(parent_state.nested_blocks.is_none()); - parent_state.nested_blocks = self.state.nested_blocks.take(); - if let Some(block_type) = self.state.at_start_of { - parent_state.nested_blocks.push(block_type) - } - // Don’t propagate stop_at_end_of back for delimited parsers: - if self.state.stop_before.is_none() { - debug_assert!(parent_state.at_start_of.is_none()); - parent_state.at_start_of = self.state.stop_at_end_of; - } - } - } -} - #[deriving(Copy, PartialEq, Eq, Show)] pub struct Delimiters { @@ -166,14 +101,10 @@ impl<'i, 't> Parser<'i, 't> { pub fn new(tokenizer: &'t mut Tokenizer<'i>) -> Parser<'i, 't> { Parser { tokenizer: tokenizer, - parent_state: None, - state: ParserState { - nested_blocks: None, - at_start_of: None, - stop_at_end_of: None, - stop_before: Delimiter::None, - exhausted: false, - }, + at_start_of: None, + parse_until_after_end_of: None, + parse_until_before: Delimiter::None, + exhausted: false, } } @@ -184,15 +115,17 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn is_exhausted(&mut self) -> bool { - self.peek().is_err() + self.expect_exhausted().is_ok() } #[inline] pub fn expect_exhausted(&mut self) -> Result<(), ()> { - if self.is_exhausted() { - Ok(()) - } else { - Err(()) + match self.next() { + Err(()) => Ok(()), + Ok(token) => { + self.push_back(token); + Err(()) + } } } @@ -201,14 +134,13 @@ impl<'i, 't> Parser<'i, 't> { // Consume whatever needs to be consumed (e.g. open blocks). let token = try!(self.next()); self.push_back(token); - self.tokenizer.peek() } #[inline] pub fn push_back(&mut self, token: Token<'i>) { - if BlockType::opening(&token) == self.state.at_start_of { - self.state.at_start_of = None; + if BlockType::opening(&token) == self.at_start_of { + self.at_start_of = None; } self.tokenizer.push_back(token) } @@ -263,78 +195,42 @@ impl<'i, 't> Parser<'i, 't> { } pub fn next_including_whitespace(&mut self) -> Result, ()> { - if self.state.exhausted { + if self.exhausted { return Err(()) } - if self.state.nested_blocks.take().consume(self.tokenizer) { - self.state.exhausted = true; - return Err(()) - } - if let Some(block_type) = self.state.at_start_of.take() { + if let Some(block_type) = self.at_start_of.take() { if consume_until_end_of_block(block_type, self.tokenizer) { - self.state.exhausted = true; + self.exhausted = true; return Err(()) } } match self.tokenizer.next() { Err(()) => { - self.state.exhausted = true; + self.exhausted = true; Err(()) }, Ok(token) => { - if self.state.stop_before.contains(Delimiters::from_token(&token)) { + if self.parse_until_before.contains(Delimiters::from_token(&token)) { self.tokenizer.push_back(token); - self.state.exhausted = true; + self.exhausted = true; return Err(()) } - if self.state.stop_at_end_of.is_some() && - BlockType::closing(&token) == self.state.stop_at_end_of { - self.state.exhausted = true; + if self.parse_until_after_end_of.is_some() && + BlockType::closing(&token) == self.parse_until_after_end_of { + if !self.parse_until_before.is_none() { + self.tokenizer.push_back(token); + } + self.exhausted = true; return Err(()) } if let Some(block_type) = BlockType::opening(&token) { - self.state.at_start_of = Some(block_type); + self.at_start_of = Some(block_type); } Ok(token) } } } - #[inline] - pub fn err_consume_until_after(&mut self, stop_after: Delimiters) -> Result { - self.consume_until_after(stop_after); - Err(()) - } - - pub fn consume_until_after(&mut self, stop_after: Delimiters) { - if self.state.exhausted { - return - } - // FIXME: have a special-purpose tokenizer method for this that does less work. - while let Ok(token) = self.tokenizer.next() { - if stop_after.contains(Delimiters::from_token(&token)) { - return - } - if self.state.stop_before.contains(Delimiters::from_token(&token)) { - self.tokenizer.push_back(token); - self.state.exhausted = true; - return - } - if self.state.stop_at_end_of.is_some() && - BlockType::closing(&token) == self.state.stop_at_end_of { - self.state.exhausted = true; - return - } - if let Some(block_type) = BlockType::opening(&token) { - if consume_until_end_of_block(block_type, self.tokenizer) { - self.state.exhausted = true; - return - } - } - } - self.state.exhausted = true; - } - // FIXME: Take an unboxed `FnOnce` closure. #[inline] pub fn parse_entirely(&mut self, parse: |&mut Parser| -> Result) @@ -347,57 +243,89 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn parse_comma_separated(&mut self, parse_one: |&mut Parser| -> Result) -> Result, ()> { - let mut values = vec![try!(self.parse_until_before(Delimiter::Comma) - .parse_entirely(|parser| parse_one(parser)))]; + let mut values = vec![]; loop { + values.push(try!(self.parse_until_before(Delimiter::Comma, |parser| parse_one(parser)))); match self.next() { - Ok(Token::Comma) => { - values.push(try!(self.parse_until_before(Delimiter::Comma) - .parse_entirely(|parser| parse_one(parser)))) - } - Ok(token) => return self.unexpected(token), Err(()) => return Ok(values), + Ok(Token::Comma) => continue, + Ok(_) => unreachable!(), + } + } + } + + #[inline] + pub fn parse_nested_block(&mut self, parse: |&mut Parser| -> Result) + -> Result { + let block_type = self.at_start_of.take().expect("\ + A nested parser can only be created when a Function, \ + ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \ + token was just consumed.\ + "); + debug_assert!(!self.exhausted); + let (result, nested_parser_is_exhausted) = { + let mut nested_parser = Parser { + tokenizer: self.tokenizer, + at_start_of: None, + parse_until_after_end_of: Some(block_type), + parse_until_before: Delimiter::None, + exhausted: false, + }; + (nested_parser.parse_entirely(parse), nested_parser.exhausted) + }; + if !nested_parser_is_exhausted { + if consume_until_end_of_block(block_type, self.tokenizer) { + self.exhausted = true; } } + result } #[inline] - pub fn parse_nested_block<'a>(&'a mut self) -> Parser<'i, 'a> { - if let Some(block_type) = self.state.at_start_of.take() { - debug_assert!(!self.state.exhausted); - Parser { + pub fn parse_until_before(&mut self, delimiters: Delimiters, + parse: |&mut Parser| -> Result) + -> Result { + let (result, delimited_parser_is_exhausted) = { + let mut delimited_parser = Parser { tokenizer: self.tokenizer, - state: ParserState { - nested_blocks: self.state.nested_blocks.take(), - at_start_of: None, - stop_at_end_of: Some(block_type), - stop_before: Delimiter::None, - exhausted: false, - }, - parent_state: Some(&mut self.state), + at_start_of: self.at_start_of.take(), + parse_until_after_end_of: self.parse_until_after_end_of, + parse_until_before: self.parse_until_before | delimiters, + exhausted: self.exhausted, + }; + (delimited_parser.parse_entirely(parse), delimited_parser.exhausted) + }; + if !delimited_parser_is_exhausted { + // FIXME: have a special-purpose tokenizer method for this that does less work. + while let Ok(token) = self.tokenizer.next() { + if delimiters.contains(Delimiters::from_token(&token)) || ( + self.parse_until_after_end_of.is_some() && + BlockType::closing(&token) == self.parse_until_after_end_of + ) { + self.tokenizer.push_back(token); + break + } + if let Some(block_type) = BlockType::opening(&token) { + if consume_until_end_of_block(block_type, self.tokenizer) { + self.exhausted = true; + break + } + } } - } else { - panic!("\ - parse_block can only be called when a Function, \ - ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \ - token was just consumed.\ - "); } + result } #[inline] - pub fn parse_until_before<'a>(&'a mut self, stop_before: Delimiters) -> Parser<'i, 'a> { - Parser { - tokenizer: self.tokenizer, - state: ParserState { - nested_blocks: self.state.nested_blocks.take(), - at_start_of: self.state.at_start_of.take(), - stop_at_end_of: self.state.stop_at_end_of, - stop_before: self.state.stop_before | stop_before, - exhausted: self.state.exhausted, - }, - parent_state: Some(&mut self.state), + pub fn parse_until_after(&mut self, delimiters: Delimiters, + parse: |&mut Parser| -> Result) + -> Result { + let result = self.parse_until_before(delimiters, parse); + // Expect exhausted input or a relevant delimiter (which we consume): + if let Ok(token) = self.next() { + debug_assert!(delimiters.contains(Delimiters::from_token(&token))); } + result } #[inline] diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index bd92cf33..6e50b61e 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -118,13 +118,17 @@ where P: DeclarationParser + AtRuleParser { match self.input.next() { Ok(Token::Semicolon) => {} Ok(Token::Ident(name)) => { - return Some(parse_declaration(name, self.input, &mut self.parser)) + let parser = &mut self.parser; + return Some(self.input.parse_until_after(Delimiter::Semicolon, |input| { + try!(input.expect_colon()); + parser.parse_value(name.as_slice(), input) + })) } Ok(Token::AtKeyword(name)) => { return Some(parse_at_rule(name, self.input, &mut self.parser)) } Ok(_) => { - return Some(self.input.err_consume_until_after(Delimiter::Semicolon)) + return Some(self.input.parse_until_after(Delimiter::Semicolon, |_| Err(()))) } Err(()) => return None, } @@ -219,32 +223,19 @@ pub fn parse_one_rule(input: &mut Parser, parser: &mut P) } -fn parse_declaration(name: CowString, input: &mut Parser, parser: &mut P) - -> Result - where P: DeclarationParser { - let result = input.parse_until_before(Delimiter::Semicolon).parse_entirely(|input| { - try!(input.expect_colon()); - parser.parse_value(name.as_slice(), input) - }); - match input.next() { - Ok(Token::Semicolon) | Err(()) => result, - _ => input.err_consume_until_after(Delimiter::Semicolon) - } -} - - fn parse_at_rule(name: CowString, input: &mut Parser, parser: &mut P) -> Result where P: AtRuleParser { let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock; - let result = try!(input.parse_until_before(delimiters).parse_entirely(|input| { + let result = try!(input.parse_until_before(delimiters, |input| { parser.parse_prelude(name.as_slice(), input) - }).or_else(|()| input.err_consume_until_after(delimiters))); + })); match result { AtRuleType::WithoutBlock(rule) => { match input.next() { Ok(Token::Semicolon) | Err(()) => Ok(rule), - _ => input.err_consume_until_after(delimiters) + Ok(Token::CurlyBracketBlock) => Err(()), + Ok(_) => unreachable!() } } AtRuleType::WithBlock(prelude) => { @@ -253,11 +244,12 @@ fn parse_at_rule(name: CowString, input: &mut Parser, parser: &mut P) // FIXME: Make parse_entirely take `FnOnce` // and remove this Option dance. let mut prelude = Some(prelude); - input.parse_nested_block().parse_entirely(|input| { + input.parse_nested_block(|input| { parser.parse_block(prelude.take().unwrap(), input) }) } - _ => input.err_consume_until_after(delimiters) + Ok(Token::Semicolon) | Err(()) => Err(()), + Ok(_) => unreachable!() } } AtRuleType::OptionalBlock(prelude) => { @@ -267,11 +259,11 @@ fn parse_at_rule(name: CowString, input: &mut Parser, parser: &mut P) // FIXME: Make parse_entirely take `FnOnce` // and remove this Option dance. let mut prelude = Some(prelude); - input.parse_nested_block().parse_entirely(|input| { + input.parse_nested_block(|input| { parser.parse_block(prelude.take().unwrap(), input) }) } - _ => input.err_consume_until_after(delimiters) + _ => unreachable!() } } } @@ -281,19 +273,18 @@ fn parse_at_rule(name: CowString, input: &mut Parser, parser: &mut P) fn parse_qualified_rule(input: &mut Parser, parser: &mut P) -> Result where P: QualifiedRuleParser { - let prelude = try!(input.parse_until_before(Delimiter::CurlyBracketBlock) - .parse_entirely(|input| { + let prelude = try!(input.parse_until_before(Delimiter::CurlyBracketBlock, |input| { parser.parse_prelude(input) - }).or_else(|()| input.err_consume_until_after(Delimiter::CurlyBracketBlock))); - match input.next() { - Ok(Token::CurlyBracketBlock) => { + })); + match try!(input.next()) { + Token::CurlyBracketBlock => { // FIXME: Make parse_entirely take `FnOnce` // and remove this Option dance. let mut prelude = Some(prelude); - input.parse_nested_block().parse_entirely(|input| { + input.parse_nested_block(|input| { parser.parse_block(prelude.take().unwrap(), input) }) } - _ => input.err_consume_until_after(Delimiter::CurlyBracketBlock) + _ => unreachable!() } } diff --git a/src/serializer.rs b/src/serializer.rs index f7fc5c3a..a1784ebc 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -3,6 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use std::fmt; +use std::mem; use std::num::Float; use text_writer::{mod, TextWriter}; @@ -237,59 +238,76 @@ impl<'a, W> TextWriter for CssStringWriter<'a, W> where W: TextWriter { impl<'a> ToCss for [Token<'a>] { fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - use Token::*; + let mut writer = TokenWriter::new(dest); + for token in self.iter() { + try!(writer.write(token)) + } + Ok(()) + } +} - let mut iter = self.iter(); - let mut previous = match iter.next() { - None => return Ok(()), - Some(first) => { try!(first.to_css(dest)); first } - }; - while let Some(component_value) = iter.next() { - let (a, b) = (previous, component_value); - if ( - matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | - Dimension(..) | Delim('#') | Delim('-') | Number(..)) && - matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..) | - Number(..) | Percentage(..) | Dimension(..) | UnicodeRange(..)) - ) || ( - matches!(*a, Ident(..)) && - matches!(*b, ParenthesisBlock(..)) - ) || ( - matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | Dimension(..)) && - matches!(*b, Delim('-') | CDC) - ) || ( - matches!(*a, Delim('#') | Delim('-') | Number(..) | Delim('@')) && - matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..)) - ) || ( - matches!(*a, Delim('@')) && - matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..) | - UnicodeRange(..) | Delim('-')) - ) || ( - matches!(*a, UnicodeRange(..) | Delim('.') | Delim('+')) && - matches!(*b, Number(..) | Percentage(..) | Dimension(..)) - ) || ( - matches!(*a, UnicodeRange(..)) && - matches!(*b, Ident(..) | Function(..) | Delim('?')) - ) || matches!((a, b), (&Delim(a), &Delim(b)) if matches!((a, b), - ('#', '-') | - ('$', '=') | - ('*', '=') | - ('^', '=') | - ('~', '=') | - ('|', '=') | - ('|', '|') | - ('/', '*') - )) { - try!(dest.write_str("/**/")); - } - // Skip whitespace when '\n' was previously written at the previous iteration. - if !matches!((previous, component_value), (&Delim('\\'), &WhiteSpace)) { - try!(component_value.to_css(dest)); - } - if component_value == &Delim('\\') { - try!(dest.write_char('\n')); - } - previous = component_value; + +pub struct TokenWriter<'i, 'a, W: 'a> { + dest: &'a mut W, + previous_token: Option>, +} + +impl<'i, 'a, W> TokenWriter<'i, 'a, W> where W: TextWriter { + pub fn new<'a>(dest: &'a mut W) -> TokenWriter<'i, 'a, W> { + TokenWriter { + dest: dest, + previous_token: None, + } + } + + pub fn write(&mut self, token: &Token<'i>) -> text_writer::Result { + use Token::*; + let previous = &mem::replace(&mut self.previous_token, Some((*token).clone())) + // A "not special" token: + .unwrap_or(Colon); + let (a, b) = (previous, token); + if ( + matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | + Dimension(..) | Delim('#') | Delim('-') | Number(..)) && + matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..) | + Number(..) | Percentage(..) | Dimension(..) | UnicodeRange(..)) + ) || ( + matches!(*a, Ident(..)) && + matches!(*b, ParenthesisBlock(..)) + ) || ( + matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | Dimension(..)) && + matches!(*b, Delim('-') | CDC) + ) || ( + matches!(*a, Delim('#') | Delim('-') | Number(..) | Delim('@')) && + matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..)) + ) || ( + matches!(*a, Delim('@')) && + matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..) | + UnicodeRange(..) | Delim('-')) + ) || ( + matches!(*a, UnicodeRange(..) | Delim('.') | Delim('+')) && + matches!(*b, Number(..) | Percentage(..) | Dimension(..)) + ) || ( + matches!(*a, UnicodeRange(..)) && + matches!(*b, Ident(..) | Function(..) | Delim('?')) + ) || matches!((a, b), (&Delim(a), &Delim(b)) if matches!((a, b), + ('#', '-') | + ('$', '=') | + ('*', '=') | + ('^', '=') | + ('~', '=') | + ('|', '=') | + ('|', '|') | + ('/', '*') + )) { + try!(self.dest.write_str("/**/")); + } + // Skip whitespace when '\n' was previously written at the previous iteration. + if !matches!((previous, token), (&Delim('\\'), &WhiteSpace)) { + try!(token.to_css(self.dest)); + } + if token == &Delim('\\') { + try!(self.dest.write_char('\n')); } Ok(()) } diff --git a/src/tests.rs b/src/tests.rs index 1a75ae5d..3cadcfc2 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -340,7 +340,7 @@ fn nth() { #[test] fn serializer() { run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| { - fn flatten<'i, 't>(input: &mut Parser<'i, 't>, tokens: &mut Vec>) { + fn flatten(input: &mut Parser, tokens: &mut Vec>) { while let Ok(token) = input.next_including_whitespace() { let closing_token = match token { Token::Function(_) | Token::ParenthesisBlock => Some(Token::CloseParenthesis), @@ -348,9 +348,12 @@ fn serializer() { Token::CurlyBracketBlock => Some(Token::CloseCurlyBracket), _ => None }; - tokens.push(token); + tokens.push(token.into_owned()); if let Some(closing_token) = closing_token { - flatten(&mut input.parse_nested_block(), tokens); + input.parse_nested_block(|input| { + flatten(input, tokens); + Ok(()) + }).unwrap(); tokens.push(closing_token); } } @@ -517,6 +520,10 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { ] } + fn nested(input: &mut Parser) -> Vec { + input.parse_nested_block(|input| Ok(component_values_to_json(input))).unwrap() + } + match token { Token::Ident(value) => JArray!["ident", value], Token::AtKeyword(value) => JArray!["at-keyword", value], @@ -548,22 +555,11 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { Token::CDO => "".to_json(), - Token::Function(name) => { - Json::Array(vec!["function".to_json(), name.to_json()] + - component_values_to_json(&mut input.parse_nested_block())) - } - Token::ParenthesisBlock => { - Json::Array(vec!["()".to_json()] + - component_values_to_json(&mut input.parse_nested_block())) - } - Token::SquareBracketBlock => { - Json::Array(vec!["[]".to_json()] + - component_values_to_json(&mut input.parse_nested_block())) - } - Token::CurlyBracketBlock => { - Json::Array(vec!["{}".to_json()] + - component_values_to_json(&mut input.parse_nested_block())) - } + Token::Function(name) => Json::Array(vec!["function".to_json(), name.to_json()] + + nested(input)), + Token::ParenthesisBlock => Json::Array(vec!["()".to_json()] + nested(input)), + Token::SquareBracketBlock => Json::Array(vec!["[]".to_json()] + nested(input)), + Token::CurlyBracketBlock => Json::Array(vec!["{}".to_json()] + nested(input)), Token::BadUrl => JArray!["error", "bad-url"], Token::BadString => JArray!["error", "bad-string"], Token::CloseParenthesis => JArray!["error", ")"], diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 875700e6..ced8a4ee 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -6,14 +6,14 @@ use std::{char, num}; use std::ascii::AsciiExt; -use std::borrow::ToOwned; +use std::borrow::{Cow, ToOwned}; use std::str::CowString; use std::borrow::Cow::{Owned, Borrowed}; use self::Token::*; -#[deriving(PartialEq, Show)] +#[deriving(PartialEq, Show, Clone)] pub enum Token<'a> { // Preserved tokens. Ident(CowString<'a>), @@ -57,7 +57,47 @@ pub enum Token<'a> { } -#[deriving(PartialEq, Show, Copy)] +impl<'a> Token<'a> { + pub fn into_owned(self) -> Token<'static> { + match self { + Token::Ident(value) => Token::Ident(Cow::Owned(value.into_owned())), + Token::AtKeyword(value) => Token::AtKeyword(Cow::Owned(value.into_owned())), + Token::Hash(value) => Token::Hash(Cow::Owned(value.into_owned())), + Token::IDHash(value) => Token::IDHash(Cow::Owned(value.into_owned())), + Token::QuotedString(value) => Token::QuotedString(Cow::Owned(value.into_owned())), + Token::Url(value) => Token::Url(Cow::Owned(value.into_owned())), + Token::Delim(ch) => Token::Delim(ch), + Token::Number(value) => Token::Number(value), + Token::Percentage(value) => Token::Percentage(value), + Token::Dimension(value, unit) => Token::Dimension(value, Cow::Owned(unit.into_owned())), + Token::UnicodeRange(start, end) => Token::UnicodeRange(start, end), + Token::WhiteSpace => Token::WhiteSpace, + Token::Colon => Token::Colon, + Token::Semicolon => Token::Semicolon, + Token::Comma => Token::Comma, + Token::IncludeMatch => Token::IncludeMatch, + Token::DashMatch => Token::DashMatch, + Token::PrefixMatch => Token::PrefixMatch, + Token::SuffixMatch => Token::SuffixMatch, + Token::SubstringMatch => Token::SubstringMatch, + Token::Column => Token::Column, + Token::CDO => Token::CDO, + Token::CDC => Token::CDC, + Token::Function(name) => Token::Function(Cow::Owned(name.into_owned())), + Token::ParenthesisBlock => Token::ParenthesisBlock, + Token::SquareBracketBlock => Token::SquareBracketBlock, + Token::CurlyBracketBlock => Token::CurlyBracketBlock, + Token::BadUrl => Token::BadUrl, + Token::BadString => Token::BadString, + Token::CloseParenthesis => Token::CloseParenthesis, + Token::CloseSquareBracket => Token::CloseSquareBracket, + Token::CloseCurlyBracket => Token::CloseCurlyBracket, + } + } +} + + +#[deriving(PartialEq, Show, Copy, Clone)] pub struct NumericValue { pub value: f64, pub int_value: Option, From cdcb0b4a35a5b8cfba6347f056ce9e0664f1168a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 12 Jan 2015 18:17:35 +0000 Subject: [PATCH 44/69] Make Parser::new take a &str instead of &mut Tokenizer, and remove Parser::parse_str. --- src/from_bytes.rs | 5 ++--- src/lib.rs | 2 +- src/parser.rs | 52 +++++++++++++++++++++++++++++++++------------- src/tests.rs | 53 +++++++++++++++++++++++------------------------ 4 files changed, 67 insertions(+), 45 deletions(-) diff --git a/src/from_bytes.rs b/src/from_bytes.rs index 9e474bef..5dd68fa1 100644 --- a/src/from_bytes.rs +++ b/src/from_bytes.rs @@ -91,7 +91,6 @@ pub fn parse_stylesheet_rules_from_bytes( css_bytes, protocol_encoding_label, environment_encoding); // FIXME: Remove option dance when unboxed closures permit. let mut rules_parser = Some(rules_parser); - Parser::parse_str(css_unicode.as_slice(), |input| { - parse(encoding, RuleListParser::new_for_stylesheet(input, rules_parser.take().unwrap())) - }) + let input = &mut Parser::new(css_unicode.as_slice()); + parse(encoding, RuleListParser::new_for_stylesheet(input, rules_parser.take().unwrap())) } diff --git a/src/lib.rs b/src/lib.rs index 6e3da2ee..0e4b6548 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ extern crate text_writer; #[cfg(test)] extern crate test; #[cfg(test)] extern crate serialize; -pub use tokenizer::{Tokenizer, Token, NumericValue, SourcePosition, SourceLocation}; +pub use tokenizer::{Token, NumericValue, SourcePosition, SourceLocation}; pub use rules_and_declarations::{Priority, parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; diff --git a/src/parser.rs b/src/parser.rs index a849f376..7c545fc9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,11 +4,40 @@ use std::ascii::AsciiExt; use std::str::CowString; -use super::{Token, NumericValue, Tokenizer, SourcePosition, SourceLocation}; +use std::ops; +use tokenizer::{Token, NumericValue, Tokenizer, SourcePosition, SourceLocation}; + + +/// Like std::borrow::Cow, except: +/// +/// * The Owned variant is boxed +/// * The Borrowed variant contains a mutable reference. +enum MaybeOwned<'a, T: 'a> { + Owned(Box), + Borrowed(&'a mut T), +} + +impl<'a, T> ops::Deref for MaybeOwned<'a, T> { + fn deref<'a>(&'a self) -> &'a T { + match *self { + MaybeOwned::Owned(ref pointer) => &**pointer, + MaybeOwned::Borrowed(ref pointer) => &**pointer, + } + } +} + +impl<'a, T> ops::DerefMut for MaybeOwned<'a, T> { + fn deref_mut<'a>(&'a mut self) -> &'a mut T { + match *self { + MaybeOwned::Owned(ref mut pointer) => &mut **pointer, + MaybeOwned::Borrowed(ref mut pointer) => &mut **pointer, + } + } +} pub struct Parser<'i: 't, 't> { - tokenizer: &'t mut Tokenizer<'i>, + tokenizer: MaybeOwned<'t, Tokenizer<'i>>, /// If `Some(_)`, .parse_nested_block() can be called. at_start_of: Option, /// If `Some(_)`, this parser is from .parse_nested_block() @@ -98,9 +127,9 @@ impl Delimiters { impl<'i, 't> Parser<'i, 't> { #[inline] - pub fn new(tokenizer: &'t mut Tokenizer<'i>) -> Parser<'i, 't> { + pub fn new(input: &'i str) -> Parser<'i, 'i> { Parser { - tokenizer: tokenizer, + tokenizer: MaybeOwned::Owned(box Tokenizer::new(input)), at_start_of: None, parse_until_after_end_of: None, parse_until_before: Delimiter::None, @@ -108,11 +137,6 @@ impl<'i, 't> Parser<'i, 't> { } } - #[inline] - pub fn parse_str(input: &str, parse: |&mut Parser| -> T) -> T { - parse(&mut Parser::new(&mut Tokenizer::new(input.as_slice()))) - } - #[inline] pub fn is_exhausted(&mut self) -> bool { self.expect_exhausted().is_ok() @@ -199,7 +223,7 @@ impl<'i, 't> Parser<'i, 't> { return Err(()) } if let Some(block_type) = self.at_start_of.take() { - if consume_until_end_of_block(block_type, self.tokenizer) { + if consume_until_end_of_block(block_type, &mut *self.tokenizer) { self.exhausted = true; return Err(()) } @@ -265,7 +289,7 @@ impl<'i, 't> Parser<'i, 't> { debug_assert!(!self.exhausted); let (result, nested_parser_is_exhausted) = { let mut nested_parser = Parser { - tokenizer: self.tokenizer, + tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: None, parse_until_after_end_of: Some(block_type), parse_until_before: Delimiter::None, @@ -274,7 +298,7 @@ impl<'i, 't> Parser<'i, 't> { (nested_parser.parse_entirely(parse), nested_parser.exhausted) }; if !nested_parser_is_exhausted { - if consume_until_end_of_block(block_type, self.tokenizer) { + if consume_until_end_of_block(block_type, &mut *self.tokenizer) { self.exhausted = true; } } @@ -287,7 +311,7 @@ impl<'i, 't> Parser<'i, 't> { -> Result { let (result, delimited_parser_is_exhausted) = { let mut delimited_parser = Parser { - tokenizer: self.tokenizer, + tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: self.at_start_of.take(), parse_until_after_end_of: self.parse_until_after_end_of, parse_until_before: self.parse_until_before | delimiters, @@ -306,7 +330,7 @@ impl<'i, 't> Parser<'i, 't> { break } if let Some(block_type) = BlockType::opening(&token) { - if consume_until_end_of_block(block_type, self.tokenizer) { + if consume_until_end_of_block(block_type, &mut *self.tokenizer) { self.exhausted = true; break } diff --git a/src/tests.rs b/src/tests.rs index 3cadcfc2..d6735b9b 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -11,7 +11,7 @@ use test; use encoding::label::encoding_from_whatwg_label; -use super::{Tokenizer, Parser, Token, NumericValue, SourceLocation, +use super::{Parser, Token, NumericValue, SourceLocation, DeclarationListParser, DeclarationParser, RuleListParser, AtRuleType, AtRuleParser, QualifiedRuleParser, Priority, parse_one_declaration, parse_one_rule, parse_important, @@ -155,8 +155,7 @@ fn run_json_tests(json_data: &str, parse: |input: &mut Parser| -> Json) { run_raw_json_tests(json_data, |input, expected| { match input { Json::String(input) => { - // FIXME: Use Parser::parse_str when unboxed closures permit. - let result = parse(&mut Parser::new(&mut Tokenizer::new(input.as_slice()))); + let result = parse(&mut Parser::new(input.as_slice())); assert_json_eq(result, expected, input); }, _ => panic!("Unexpected JSON") @@ -361,7 +360,8 @@ fn serializer() { let mut tokens = vec![]; flatten(input, &mut tokens); let serialized = tokens.to_css_string(); - Json::Array(Parser::parse_str(serialized.as_slice(), component_values_to_json)) + let parser = &mut Parser::new(serialized.as_slice()); + Json::Array(component_values_to_json(parser)) }); } @@ -388,29 +388,28 @@ fn serialize_rgba() { #[test] fn line_numbers() { - Parser::parse_str("foo bar\nbaz\r\n\n\"a\\\r\nb\"", |input| { - assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 1 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("foo")))); - assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 4 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); - assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 5 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("bar")))); - assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 8 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); - assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 1 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("baz")))); - assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 4 }); - let position = input.position(); - - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); - assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 1 }); - - assert_eq!(input.source_location(position), SourceLocation { line: 2, column: 4 }); - - assert_eq!(input.next_including_whitespace(), Ok(Token::QuotedString(Borrowed("ab")))); - assert_eq!(input.current_source_location(), SourceLocation { line: 5, column: 3 }); - assert_eq!(input.next_including_whitespace(), Err(())); - }) + let mut input = Parser::new("foo bar\nbaz\r\n\n\"a\\\r\nb\""); + assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 1 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("foo")))); + assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 4 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 5 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("bar")))); + assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 8 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 1 }); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("baz")))); + assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 4 }); + let position = input.position(); + + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 1 }); + + assert_eq!(input.source_location(position), SourceLocation { line: 2, column: 4 }); + + assert_eq!(input.next_including_whitespace(), Ok(Token::QuotedString(Borrowed("ab")))); + assert_eq!(input.current_source_location(), SourceLocation { line: 5, column: 3 }); + assert_eq!(input.next_including_whitespace(), Err(())); } impl ToJson for Color { From d53c0d3a1c6d668d879e48f6e7632576df92d584 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 12 Jan 2015 19:03:16 +0000 Subject: [PATCH 45/69] Implement Clone for Parser. A cloned parser is "detatched" from its parent and will progress independently, but it keeps referencing the same input. --- src/parser.rs | 11 +++++++++-- src/tokenizer.rs | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 7c545fc9..2e71cb8c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -35,7 +35,14 @@ impl<'a, T> ops::DerefMut for MaybeOwned<'a, T> { } } +impl<'a, T> Clone for MaybeOwned<'a, T> where T: Clone { + fn clone(&self) -> MaybeOwned<'a, T> { + MaybeOwned::Owned(box() (**self).clone()) + } +} + +#[deriving(Clone)] pub struct Parser<'i: 't, 't> { tokenizer: MaybeOwned<'t, Tokenizer<'i>>, /// If `Some(_)`, .parse_nested_block() can be called. @@ -48,7 +55,7 @@ pub struct Parser<'i: 't, 't> { } -#[deriving(Copy, PartialEq, Eq, Show)] +#[deriving(Copy, Clone, PartialEq, Eq, Show)] enum BlockType { Parenthesis, SquareBracket, @@ -79,7 +86,7 @@ impl BlockType { -#[deriving(Copy, PartialEq, Eq, Show)] +#[deriving(Copy, Clone, PartialEq, Eq, Show)] pub struct Delimiters { bits: u8, } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ced8a4ee..f6eb178d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -106,6 +106,7 @@ pub struct NumericValue { } +#[deriving(Clone)] pub struct Tokenizer<'a> { input: &'a str, From df19cde5b12b00301ca15bdf51d6c799b2ea6b8c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 13 Jan 2015 20:17:45 +0000 Subject: [PATCH 46/69] Make source location methods take &self instead of &mut self. (Use a std::cell::Cell.) --- src/parser.rs | 4 ++-- src/tokenizer.rs | 25 +++++++++++++------------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 2e71cb8c..c1cab402 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -206,12 +206,12 @@ impl<'i, 't> Parser<'i, 't> { } #[inline] - pub fn current_source_location(&mut self) -> SourceLocation { + pub fn current_source_location(&self) -> SourceLocation { self.tokenizer.current_source_location() } #[inline] - pub fn source_location(&mut self, target: SourcePosition) -> SourceLocation { + pub fn source_location(&self, target: SourcePosition) -> SourceLocation { self.tokenizer.source_location(target) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f6eb178d..28a1872d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -4,7 +4,9 @@ // http://dev.w3.org/csswg/css3-syntax/#tokenization -use std::{char, num}; +use std::cell::Cell; +use std::char; +use std::num; use std::ascii::AsciiExt; use std::borrow::{Cow, ToOwned}; use std::str::CowString; @@ -117,8 +119,7 @@ pub struct Tokenizer<'a> { buffer: Option>, /// Cache for `source_location()` - last_known_line_number: uint, - position_after_last_known_newline: uint, + last_known_line_break: Cell<(uint, uint)>, } @@ -129,8 +130,7 @@ impl<'a> Tokenizer<'a> { input: input, position: 0, buffer: None, - last_known_line_number: 1, - position_after_last_known_newline: 0, + last_known_line_break: Cell::new((1, 0)), } } @@ -172,18 +172,20 @@ impl<'a> Tokenizer<'a> { } #[inline] - pub fn current_source_location(&mut self) -> SourceLocation { + pub fn current_source_location(&self) -> SourceLocation { let position = SourcePosition(self.position); self.source_location(position) } - pub fn source_location(&mut self, position: SourcePosition) -> SourceLocation { + pub fn source_location(&self, position: SourcePosition) -> SourceLocation { let target = position.0; let mut line_number; let mut position; - if target >= self.position_after_last_known_newline { - position = self.position_after_last_known_newline; - line_number = self.last_known_line_number; + let (last_known_line_number, position_after_last_known_newline) = + self.last_known_line_break.get(); + if target >= position_after_last_known_newline { + position = position_after_last_known_newline; + line_number = last_known_line_number; } else { position = 0; line_number = 1; @@ -201,8 +203,7 @@ impl<'a> Tokenizer<'a> { line_number += 1; } debug_assert!(position <= target); - self.position_after_last_known_newline = position; - self.last_known_line_number = line_number; + self.last_known_line_break.set((line_number, position)); SourceLocation { line: line_number, // `target == position` when `target` is at the beginning of the line, From 22317ac874531e63bf9cc784bc72c6b9b213b962 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 14 Jan 2015 13:44:46 +0000 Subject: [PATCH 47/69] Reformat some code for readability. --- src/parser.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index c1cab402..305bc545 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -294,7 +294,10 @@ impl<'i, 't> Parser<'i, 't> { token was just consumed.\ "); debug_assert!(!self.exhausted); - let (result, nested_parser_is_exhausted) = { + let result; + let nested_parser_is_exhausted; + // Introduce a new scope to limit duration of nested_parser’s borrow + { let mut nested_parser = Parser { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: None, @@ -302,8 +305,9 @@ impl<'i, 't> Parser<'i, 't> { parse_until_before: Delimiter::None, exhausted: false, }; - (nested_parser.parse_entirely(parse), nested_parser.exhausted) - }; + result = nested_parser.parse_entirely(parse); + nested_parser_is_exhausted = nested_parser.exhausted; + } if !nested_parser_is_exhausted { if consume_until_end_of_block(block_type, &mut *self.tokenizer) { self.exhausted = true; @@ -316,7 +320,10 @@ impl<'i, 't> Parser<'i, 't> { pub fn parse_until_before(&mut self, delimiters: Delimiters, parse: |&mut Parser| -> Result) -> Result { - let (result, delimited_parser_is_exhausted) = { + let result; + let delimited_parser_is_exhausted; + // Introduce a new scope to limit duration of nested_parser’s borrow + { let mut delimited_parser = Parser { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: self.at_start_of.take(), @@ -324,8 +331,9 @@ impl<'i, 't> Parser<'i, 't> { parse_until_before: self.parse_until_before | delimiters, exhausted: self.exhausted, }; - (delimited_parser.parse_entirely(parse), delimited_parser.exhausted) - }; + result = delimited_parser.parse_entirely(parse); + delimited_parser_is_exhausted = delimited_parser.exhausted; + } if !delimited_parser_is_exhausted { // FIXME: have a special-purpose tokenizer method for this that does less work. while let Ok(token) = self.tokenizer.next() { From 563d1786bc37124cba75e35c1f2e3e58adff81f6 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 15 Jan 2015 13:46:24 +0000 Subject: [PATCH 48/69] Fix some parsing bugs. --- src/parser.rs | 9 +++------ src/rules_and_declarations.rs | 8 ++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 305bc545..a1c5db31 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -153,10 +153,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_exhausted(&mut self) -> Result<(), ()> { match self.next() { Err(()) => Ok(()), - Ok(token) => { - self.push_back(token); - Err(()) - } + Ok(token) => self.unexpected(token), } } @@ -215,7 +212,6 @@ impl<'i, 't> Parser<'i, 't> { self.tokenizer.source_location(target) } - pub fn next(&mut self) -> Result, ()> { loop { match self.next_including_whitespace() { @@ -320,6 +316,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn parse_until_before(&mut self, delimiters: Delimiters, parse: |&mut Parser| -> Result) -> Result { + let delimiters = self.parse_until_before | delimiters; let result; let delimited_parser_is_exhausted; // Introduce a new scope to limit duration of nested_parser’s borrow @@ -328,7 +325,7 @@ impl<'i, 't> Parser<'i, 't> { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: self.at_start_of.take(), parse_until_after_end_of: self.parse_until_after_end_of, - parse_until_before: self.parse_until_before | delimiters, + parse_until_before: delimiters, exhausted: self.exhausted, }; result = delimited_parser.parse_entirely(parse); diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 6e50b61e..f413e6e1 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -273,18 +273,18 @@ fn parse_at_rule(name: CowString, input: &mut Parser, parser: &mut P) fn parse_qualified_rule(input: &mut Parser, parser: &mut P) -> Result where P: QualifiedRuleParser { - let prelude = try!(input.parse_until_before(Delimiter::CurlyBracketBlock, |input| { + let prelude = input.parse_until_before(Delimiter::CurlyBracketBlock, |input| { parser.parse_prelude(input) - })); + }); match try!(input.next()) { Token::CurlyBracketBlock => { // FIXME: Make parse_entirely take `FnOnce` // and remove this Option dance. - let mut prelude = Some(prelude); + let mut prelude = Some(try!(prelude)); input.parse_nested_block(|input| { parser.parse_block(prelude.take().unwrap(), input) }) } - _ => unreachable!() + _ => unreachable!(), } } From 03d06b3733162005a64a8a36a87b7156181b9793 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 15 Jan 2015 14:28:32 +0000 Subject: [PATCH 49/69] Remove ast.rs file left over from 41fa928b7df1acc05b16af5f4ec3edb229274957. Its content is now merged into tokenizer.rs. --- src/ast.rs | 125 ----------------------------------------------------- 1 file changed, 125 deletions(-) delete mode 100644 src/ast.rs diff --git a/src/ast.rs b/src/ast.rs deleted file mode 100644 index b07570f4..00000000 --- a/src/ast.rs +++ /dev/null @@ -1,125 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -use std::fmt; - - -#[deriving(PartialEq, Show)] -pub struct NumericValue { - pub representation: String, - pub value: f64, - pub int_value: Option, -} - - -#[deriving(PartialEq, Show, Copy)] -pub struct SourceLocation { - pub line: uint, // First line is 1 - pub column: uint, // First character of a line is at column 1 -} - - -pub type Node = (Token, SourceLocation); // TODO this is not a good name - - -#[deriving(PartialEq, Show)] -pub enum Token { - // Preserved tokens. - Ident(String), - AtKeyword(String), - Hash(String), - IDHash(String), // Hash that is a valid ID selector. - QuotedString(String), - Url(String), - Delim(char), - Number(NumericValue), - Percentage(NumericValue), - Dimension(NumericValue, String), - UnicodeRange(u32, u32), // (start, end) of range - WhiteSpace, - Colon, // : - Semicolon, // ; - Comma, // , - IncludeMatch, // ~= - DashMatch, // |= - PrefixMatch, // ^= - SuffixMatch, // $= - SubstringMatch, // *= - Column, // || - CDO, // - - // Function - Function(String), // name - - // Simple block - ParenthesisBlock, // (…) - SquareBracketBlock, // […] - CurlyBracketBlock, // {…} - - // These are always invalid - BadUrl, - BadString, - CloseParenthesis, // ) - CloseSquareBracket, // ] - CloseCurlyBracket, // } -} - - -#[deriving(PartialEq)] -pub struct Declaration { - pub location: SourceLocation, - pub name: String, - pub value: Vec, - pub important: bool, -} - -#[deriving(PartialEq)] -pub struct QualifiedRule { - pub location: SourceLocation, - pub prelude: Vec, - pub block: Vec, -} - -#[deriving(PartialEq)] -pub struct AtRule { - pub location: SourceLocation, - pub name: String, - pub prelude: Vec, - pub block: Option>, -} - -#[deriving(PartialEq)] -pub enum DeclarationListItem { - Declaration(Declaration), - AtRule(AtRule), -} - -#[deriving(PartialEq)] -pub enum Rule { - QualifiedRule(QualifiedRule), - AtRule(AtRule), -} - -#[deriving(PartialEq, Copy)] -pub struct SyntaxError { - pub location: SourceLocation, - pub reason: ErrorReason, -} - -#[deriving(PartialEq, Show, Copy)] -pub enum ErrorReason { - EmptyInput, // Parsing a single "thing", found only whitespace. - ExtraInput, // Found more non-whitespace after parsing a single "thing". - MissingQualifiedRuleBlock, // EOF in a qualified rule prelude, before '{' - InvalidDeclarationSyntax, - InvalidBangImportantSyntax, - // This is meant to be extended -} - -impl fmt::Show for SyntaxError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}:{} {}", self.location.line, self.location.column, self.reason) - } -} From 0480fa985131b53464aa7b3e2f9e002ddea15baf Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 15 Jan 2015 14:56:04 +0000 Subject: [PATCH 50/69] Change Token::Percentage to have values in 0..1 instead of 0..100 --- src/color.rs | 10 +++++----- src/lib.rs | 2 +- src/parser.rs | 4 ++-- src/serializer.rs | 9 +++++++-- src/tests.rs | 10 +++++++--- src/tokenizer.rs | 26 ++++++++++++++++++++------ 6 files changed, 42 insertions(+), 19 deletions(-) diff --git a/src/color.rs b/src/color.rs index a310491c..5420c606 100644 --- a/src/color.rs +++ b/src/color.rs @@ -313,11 +313,11 @@ fn parse_color_function(name: &str, arguments: &mut Parser) -> Result blue = try!(arguments.expect_integer()) as f32 / 255.; } Token::Percentage(ref v) => { - red = (v.value / 100.) as f32; + red = v.unit_value as f32; try!(arguments.expect_comma()); - green = (try!(arguments.expect_percentage()) / 100.) as f32; + green = try!(arguments.expect_percentage()) as f32; try!(arguments.expect_comma()); - blue = (try!(arguments.expect_percentage()) / 100.) as f32; + blue = try!(arguments.expect_percentage()) as f32; } _ => return Err(()) }; @@ -325,9 +325,9 @@ fn parse_color_function(name: &str, arguments: &mut Parser) -> Result let hue = try!(arguments.expect_number()) / 360.; let hue = hue - hue.floor(); try!(arguments.expect_comma()); - let saturation = (try!(arguments.expect_percentage()) / 100.).max(0.).min(1.); + let saturation = (try!(arguments.expect_percentage())).max(0.).min(1.); try!(arguments.expect_comma()); - let lightness = (try!(arguments.expect_percentage()) / 100.).max(0.).min(1.); + let lightness = (try!(arguments.expect_percentage())).max(0.).min(1.); // http://www.w3.org/TR/css3-color/#hsl-color fn hue_to_rgb(m1: f64, m2: f64, mut h: f64) -> f64 { diff --git a/src/lib.rs b/src/lib.rs index 0e4b6548..ed4eb96d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ extern crate text_writer; #[cfg(test)] extern crate test; #[cfg(test)] extern crate serialize; -pub use tokenizer::{Token, NumericValue, SourcePosition, SourceLocation}; +pub use tokenizer::{Token, NumericValue, PercentageValue, SourcePosition, SourceLocation}; pub use rules_and_declarations::{Priority, parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; diff --git a/src/parser.rs b/src/parser.rs index a1c5db31..0ab364ad 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,7 +5,7 @@ use std::ascii::AsciiExt; use std::str::CowString; use std::ops; -use tokenizer::{Token, NumericValue, Tokenizer, SourcePosition, SourceLocation}; +use tokenizer::{Token, NumericValue, PercentageValue, Tokenizer, SourcePosition, SourceLocation}; /// Like std::borrow::Cow, except: @@ -434,7 +434,7 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn expect_percentage(&mut self) -> Result { match try!(self.next()) { - Token::Percentage(NumericValue { value, .. }) => Ok(value), + Token::Percentage(PercentageValue { unit_value, .. }) => Ok(unit_value), token => self.unexpected(token) } } diff --git a/src/serializer.rs b/src/serializer.rs index a1784ebc..3a81178f 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -8,7 +8,7 @@ use std::num::Float; use text_writer::{mod, TextWriter}; -use super::{Token, NumericValue}; +use super::{Token, NumericValue, PercentageValue}; pub trait ToCss for Sized? { @@ -86,7 +86,12 @@ impl<'a> ToCss for Token<'a> { Token::Delim(value) => try!(dest.write_char(value)), Token::Number(value) => try!(write_numeric(value, dest)), - Token::Percentage(value) => { + Token::Percentage(PercentageValue { unit_value, int_value, signed }) => { + let value = NumericValue { + value: unit_value * 100., + int_value: int_value, + signed: signed, + }; try!(write_numeric(value, dest)); try!(dest.write_char('%')); }, diff --git a/src/tests.rs b/src/tests.rs index d6735b9b..fbf717ac 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -11,7 +11,7 @@ use test; use encoding::label::encoding_from_whatwg_label; -use super::{Parser, Token, NumericValue, SourceLocation, +use super::{Parser, Token, NumericValue, PercentageValue, SourceLocation, DeclarationListParser, DeclarationParser, RuleListParser, AtRuleType, AtRuleParser, QualifiedRuleParser, Priority, parse_one_declaration, parse_one_rule, parse_important, @@ -534,8 +534,12 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { Token::Delim(value) => String::from_char(1, value).to_json(), Token::Number(value) => Json::Array(vec!["number".to_json()] + numeric(value)), - Token::Percentage(value) => Json::Array( - vec!["percentage".to_json()] + numeric(value)), + Token::Percentage(PercentageValue { unit_value, int_value, signed }) => Json::Array( + vec!["percentage".to_json()] + numeric(NumericValue { + value: unit_value * 100., + int_value: int_value, + signed: signed, + })), Token::Dimension(value, unit) => Json::Array( vec!["dimension".to_json()] + numeric(value) + [unit.to_json()].as_slice()), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 28a1872d..fd340d7c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -26,7 +26,7 @@ pub enum Token<'a> { Url(CowString<'a>), Delim(char), Number(NumericValue), - Percentage(NumericValue), + Percentage(PercentageValue), Dimension(NumericValue, CowString<'a>), UnicodeRange(u32, u32), // (start, end) of range WhiteSpace, @@ -108,6 +108,16 @@ pub struct NumericValue { } +#[deriving(PartialEq, Show, Copy, Clone)] +pub struct PercentageValue { + /// This (but not int_value) is divided by 100 + pub unit_value: f64, + pub int_value: Option, + /// Whether the number had a `+` or `-` sign. + pub signed: bool, +} + + #[deriving(Clone)] pub struct Tokenizer<'a> { input: &'a str, @@ -616,16 +626,20 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { None }) }; + if !tokenizer.is_eof() && tokenizer.current_char() == '%' { + tokenizer.advance(1); + return Percentage(PercentageValue { + unit_value: value / 100., + int_value: int_value, + signed: signed, + }) + } let value = NumericValue { value: value, int_value: int_value, signed: signed, }; - if !tokenizer.is_eof() && tokenizer.current_char() == '%' { - tokenizer.advance(1); - Percentage(value) - } - else if is_ident_start(tokenizer) { Dimension(value, consume_name(tokenizer)) } + if is_ident_start(tokenizer) { Dimension(value, consume_name(tokenizer)) } else { Number(value) } } From 4ebb01adaa5289f2aee8803c61810dfaac6c33bc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jan 2015 17:59:30 +0000 Subject: [PATCH 51/69] Internal refactor: have "nested" parsers not consume their end token. This simplifies the code by having less special cases. --- src/parser.rs | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 0ab364ad..de6e87c7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -48,7 +48,7 @@ pub struct Parser<'i: 't, 't> { /// If `Some(_)`, .parse_nested_block() can be called. at_start_of: Option, /// If `Some(_)`, this parser is from .parse_nested_block() - parse_until_after_end_of: Option, + parse_until_before_end_of: Option, /// For parsers from `parse_until` parse_until_before: Delimiters, exhausted: bool, @@ -117,10 +117,6 @@ impl Delimiters { (self.bits & other.bits) != 0 } - fn is_none(&self) -> bool { - self.bits == 0 - } - fn from_token(token: &Token) -> Delimiters { match *token { Token::Semicolon => Delimiter::Semicolon, @@ -138,7 +134,7 @@ impl<'i, 't> Parser<'i, 't> { Parser { tokenizer: MaybeOwned::Owned(box Tokenizer::new(input)), at_start_of: None, - parse_until_after_end_of: None, + parse_until_before_end_of: None, parse_until_before: Delimiter::None, exhausted: false, } @@ -242,11 +238,9 @@ impl<'i, 't> Parser<'i, 't> { self.exhausted = true; return Err(()) } - if self.parse_until_after_end_of.is_some() && - BlockType::closing(&token) == self.parse_until_after_end_of { - if !self.parse_until_before.is_none() { - self.tokenizer.push_back(token); - } + if self.parse_until_before_end_of.is_some() && + BlockType::closing(&token) == self.parse_until_before_end_of { + self.tokenizer.push_back(token); self.exhausted = true; return Err(()) } @@ -291,23 +285,19 @@ impl<'i, 't> Parser<'i, 't> { "); debug_assert!(!self.exhausted); let result; - let nested_parser_is_exhausted; // Introduce a new scope to limit duration of nested_parser’s borrow { let mut nested_parser = Parser { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: None, - parse_until_after_end_of: Some(block_type), + parse_until_before_end_of: Some(block_type), parse_until_before: Delimiter::None, exhausted: false, }; result = nested_parser.parse_entirely(parse); - nested_parser_is_exhausted = nested_parser.exhausted; } - if !nested_parser_is_exhausted { - if consume_until_end_of_block(block_type, &mut *self.tokenizer) { - self.exhausted = true; - } + if consume_until_end_of_block(block_type, &mut *self.tokenizer) { + self.exhausted = true; } result } @@ -324,7 +314,7 @@ impl<'i, 't> Parser<'i, 't> { let mut delimited_parser = Parser { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: self.at_start_of.take(), - parse_until_after_end_of: self.parse_until_after_end_of, + parse_until_before_end_of: self.parse_until_before_end_of, parse_until_before: delimiters, exhausted: self.exhausted, }; @@ -335,8 +325,8 @@ impl<'i, 't> Parser<'i, 't> { // FIXME: have a special-purpose tokenizer method for this that does less work. while let Ok(token) = self.tokenizer.next() { if delimiters.contains(Delimiters::from_token(&token)) || ( - self.parse_until_after_end_of.is_some() && - BlockType::closing(&token) == self.parse_until_after_end_of + self.parse_until_before_end_of.is_some() && + BlockType::closing(&token) == self.parse_until_before_end_of ) { self.tokenizer.push_back(token); break From d124386006014593b43fd8b8be582a197feb71c0 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jan 2015 18:02:38 +0000 Subject: [PATCH 52/69] Internal refactor: remove the Parser::exhausted boolean flag. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It wasn’t really necessary, and confusigly had a different meaning than Parser::is_exhausted(). --- src/parser.rs | 77 +++++++++++++++++---------------------------------- 1 file changed, 26 insertions(+), 51 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index de6e87c7..09d41a44 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -51,7 +51,6 @@ pub struct Parser<'i: 't, 't> { parse_until_before_end_of: Option, /// For parsers from `parse_until` parse_until_before: Delimiters, - exhausted: bool, } @@ -136,7 +135,6 @@ impl<'i, 't> Parser<'i, 't> { at_start_of: None, parse_until_before_end_of: None, parse_until_before: Delimiter::None, - exhausted: false, } } @@ -218,38 +216,25 @@ impl<'i, 't> Parser<'i, 't> { } pub fn next_including_whitespace(&mut self) -> Result, ()> { - if self.exhausted { - return Err(()) - } if let Some(block_type) = self.at_start_of.take() { if consume_until_end_of_block(block_type, &mut *self.tokenizer) { - self.exhausted = true; return Err(()) } } - match self.tokenizer.next() { - Err(()) => { - self.exhausted = true; - Err(()) - }, - Ok(token) => { - if self.parse_until_before.contains(Delimiters::from_token(&token)) { - self.tokenizer.push_back(token); - self.exhausted = true; - return Err(()) - } - if self.parse_until_before_end_of.is_some() && - BlockType::closing(&token) == self.parse_until_before_end_of { - self.tokenizer.push_back(token); - self.exhausted = true; - return Err(()) - } - if let Some(block_type) = BlockType::opening(&token) { - self.at_start_of = Some(block_type); - } - Ok(token) - } + let token = try!(self.tokenizer.next()); + if self.parse_until_before.contains(Delimiters::from_token(&token)) { + self.tokenizer.push_back(token); + return Err(()) } + if self.parse_until_before_end_of.is_some() && + BlockType::closing(&token) == self.parse_until_before_end_of { + self.tokenizer.push_back(token); + return Err(()) + } + if let Some(block_type) = BlockType::opening(&token) { + self.at_start_of = Some(block_type); + } + Ok(token) } // FIXME: Take an unboxed `FnOnce` closure. @@ -283,7 +268,6 @@ impl<'i, 't> Parser<'i, 't> { ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \ token was just consumed.\ "); - debug_assert!(!self.exhausted); let result; // Introduce a new scope to limit duration of nested_parser’s borrow { @@ -292,13 +276,10 @@ impl<'i, 't> Parser<'i, 't> { at_start_of: None, parse_until_before_end_of: Some(block_type), parse_until_before: Delimiter::None, - exhausted: false, }; result = nested_parser.parse_entirely(parse); } - if consume_until_end_of_block(block_type, &mut *self.tokenizer) { - self.exhausted = true; - } + consume_until_end_of_block(block_type, &mut *self.tokenizer); result } @@ -308,7 +289,6 @@ impl<'i, 't> Parser<'i, 't> { -> Result { let delimiters = self.parse_until_before | delimiters; let result; - let delimited_parser_is_exhausted; // Introduce a new scope to limit duration of nested_parser’s borrow { let mut delimited_parser = Parser { @@ -316,27 +296,22 @@ impl<'i, 't> Parser<'i, 't> { at_start_of: self.at_start_of.take(), parse_until_before_end_of: self.parse_until_before_end_of, parse_until_before: delimiters, - exhausted: self.exhausted, }; result = delimited_parser.parse_entirely(parse); - delimited_parser_is_exhausted = delimited_parser.exhausted; - } - if !delimited_parser_is_exhausted { - // FIXME: have a special-purpose tokenizer method for this that does less work. - while let Ok(token) = self.tokenizer.next() { - if delimiters.contains(Delimiters::from_token(&token)) || ( - self.parse_until_before_end_of.is_some() && - BlockType::closing(&token) == self.parse_until_before_end_of - ) { - self.tokenizer.push_back(token); + } + // FIXME: have a special-purpose tokenizer method for this that does less work. + while let Ok(token) = self.tokenizer.next() { + if delimiters.contains(Delimiters::from_token(&token)) || ( + self.parse_until_before_end_of.is_some() && + BlockType::closing(&token) == self.parse_until_before_end_of + ) { + self.tokenizer.push_back(token); + break + } + if let Some(block_type) = BlockType::opening(&token) { + if consume_until_end_of_block(block_type, &mut *self.tokenizer) { break } - if let Some(block_type) = BlockType::opening(&token) { - if consume_until_end_of_block(block_type, &mut *self.tokenizer) { - self.exhausted = true; - break - } - } } } result From 12b29390390e3c6bbe3a1a1e3fa2e2ec6626b483 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jan 2015 18:06:05 +0000 Subject: [PATCH 53/69] =?UTF-8?q?Internal=20refactor:=20remove=20consume?= =?UTF-8?q?=5Funtil=5Fend=5Fof=5Fblock=E2=80=99s=20return=20value.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser.rs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 09d41a44..e8320dbb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -217,9 +217,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn next_including_whitespace(&mut self) -> Result, ()> { if let Some(block_type) = self.at_start_of.take() { - if consume_until_end_of_block(block_type, &mut *self.tokenizer) { - return Err(()) - } + consume_until_end_of_block(block_type, &mut *self.tokenizer); } let token = try!(self.tokenizer.next()); if self.parse_until_before.contains(Delimiters::from_token(&token)) { @@ -309,9 +307,7 @@ impl<'i, 't> Parser<'i, 't> { break } if let Some(block_type) = BlockType::opening(&token) { - if consume_until_end_of_block(block_type, &mut *self.tokenizer) { - break - } + consume_until_end_of_block(block_type, &mut *self.tokenizer); } } result @@ -455,17 +451,14 @@ impl<'i, 't> Parser<'i, 't> { /// Return value indicates whether the end of the input was reached. -fn consume_until_end_of_block(block_type: BlockType, tokenizer: &mut Tokenizer) -> bool { +fn consume_until_end_of_block(block_type: BlockType, tokenizer: &mut Tokenizer) { // FIXME: have a special-purpose tokenizer method for this that does less work. while let Ok(ref token) = tokenizer.next() { if BlockType::closing(token) == Some(block_type) { - return false + return } if let Some(block_type) = BlockType::opening(token) { - if consume_until_end_of_block(block_type, tokenizer) { - return true - } + consume_until_end_of_block(block_type, tokenizer); } } - true } From 8da3a37a0ffa772b79c72fcb9343708d97887b9c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jan 2015 18:40:44 +0000 Subject: [PATCH 54/69] Internal refactor: merge parse_until_before_end_of into parse_until_before. --- src/parser.rs | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index e8320dbb..a56e1f36 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -47,9 +47,7 @@ pub struct Parser<'i: 't, 't> { tokenizer: MaybeOwned<'t, Tokenizer<'i>>, /// If `Some(_)`, .parse_nested_block() can be called. at_start_of: Option, - /// If `Some(_)`, this parser is from .parse_nested_block() - parse_until_before_end_of: Option, - /// For parsers from `parse_until` + /// For parsers from `parse_until` or `parse_nested_block` parse_until_before: Delimiters, } @@ -105,6 +103,15 @@ pub mod Delimiter { pub const Comma: Delimiters = Delimiters { bits: 1 << 4 }; } +#[allow(non_upper_case_globals, non_snake_case)] +mod ClosingDelimiter { + use super::Delimiters; + + pub const CloseCurlyBracket: Delimiters = Delimiters { bits: 1 << 5 }; + pub const CloseSquareBracket: Delimiters = Delimiters { bits: 1 << 6 }; + pub const CloseParenthesis: Delimiters = Delimiters { bits: 1 << 7 }; +} + impl BitOr for Delimiters { fn bitor(&self, other: &Delimiters) -> Delimiters { Delimiters { bits: self.bits | other.bits } @@ -112,7 +119,7 @@ impl BitOr for Delimiters { } impl Delimiters { - fn contains(&self, other: Delimiters) -> bool { + fn contains(self, other: Delimiters) -> bool { (self.bits & other.bits) != 0 } @@ -122,6 +129,9 @@ impl Delimiters { Token::Comma => Delimiter::Comma, Token::Delim('!') => Delimiter::Bang, Token::CurlyBracketBlock => Delimiter::CurlyBracketBlock, + Token::CloseCurlyBracket => ClosingDelimiter::CloseCurlyBracket, + Token::CloseSquareBracket => ClosingDelimiter::CloseSquareBracket, + Token::CloseParenthesis => ClosingDelimiter::CloseParenthesis, _ => Delimiter::None, } } @@ -133,7 +143,6 @@ impl<'i, 't> Parser<'i, 't> { Parser { tokenizer: MaybeOwned::Owned(box Tokenizer::new(input)), at_start_of: None, - parse_until_before_end_of: None, parse_until_before: Delimiter::None, } } @@ -224,11 +233,6 @@ impl<'i, 't> Parser<'i, 't> { self.tokenizer.push_back(token); return Err(()) } - if self.parse_until_before_end_of.is_some() && - BlockType::closing(&token) == self.parse_until_before_end_of { - self.tokenizer.push_back(token); - return Err(()) - } if let Some(block_type) = BlockType::opening(&token) { self.at_start_of = Some(block_type); } @@ -266,14 +270,18 @@ impl<'i, 't> Parser<'i, 't> { ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \ token was just consumed.\ "); + let closing_delimiter = match block_type { + BlockType::CurlyBracket => ClosingDelimiter::CloseCurlyBracket, + BlockType::SquareBracket => ClosingDelimiter::CloseSquareBracket, + BlockType::Parenthesis => ClosingDelimiter::CloseParenthesis, + }; let result; // Introduce a new scope to limit duration of nested_parser’s borrow { let mut nested_parser = Parser { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: None, - parse_until_before_end_of: Some(block_type), - parse_until_before: Delimiter::None, + parse_until_before: closing_delimiter, }; result = nested_parser.parse_entirely(parse); } @@ -292,17 +300,13 @@ impl<'i, 't> Parser<'i, 't> { let mut delimited_parser = Parser { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: self.at_start_of.take(), - parse_until_before_end_of: self.parse_until_before_end_of, parse_until_before: delimiters, }; result = delimited_parser.parse_entirely(parse); } // FIXME: have a special-purpose tokenizer method for this that does less work. while let Ok(token) = self.tokenizer.next() { - if delimiters.contains(Delimiters::from_token(&token)) || ( - self.parse_until_before_end_of.is_some() && - BlockType::closing(&token) == self.parse_until_before_end_of - ) { + if delimiters.contains(Delimiters::from_token(&token)) { self.tokenizer.push_back(token); break } From 1389597c32c530470e3b8996293ef6dc5f9fa5d2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jan 2015 19:43:45 +0000 Subject: [PATCH 55/69] Internal refactor: rename Tokenizer::current_char to next_char. --- src/tokenizer.rs | 58 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fd340d7c..ff2d875d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -222,7 +222,7 @@ impl<'a> Tokenizer<'a> { } } - // If false, `tokenizer.current_char()` will not panic. + // If false, `tokenizer.next_char()` will not panic. #[inline] fn is_eof(&self) -> bool { !self.has_at_least(0) } @@ -236,7 +236,7 @@ impl<'a> Tokenizer<'a> { // Assumes non-EOF #[inline] - fn current_char(&self) -> char { self.char_at(0) } + fn next_char(&self) -> char { self.char_at(0) } #[inline] fn char_at(&self, offset: uint) -> char { @@ -281,11 +281,11 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { if tokenizer.is_eof() { return None } - let c = tokenizer.current_char(); + let c = tokenizer.next_char(); let token = match c { '\t' | '\n' | ' ' | '\r' | '\x0C' => { while !tokenizer.is_eof() { - match tokenizer.current_char() { + match tokenizer.next_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1), _ => break, } @@ -296,7 +296,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { '#' => { tokenizer.advance(1); if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) } - else if !tokenizer.is_eof() && match tokenizer.current_char() { + else if !tokenizer.is_eof() && match tokenizer.next_char() { 'a'...'z' | 'A'...'Z' | '0'...'9' | '-' | '_' => true, '\\' => !tokenizer.has_newline_at(1), _ => c > '\x7F', // Non-ASCII @@ -426,7 +426,7 @@ fn consume_comments(tokenizer: &mut Tokenizer) { while !tokenizer.is_eof() { if tokenizer.consume_char() == '*' && !tokenizer.is_eof() && - tokenizer.current_char() == '/' { + tokenizer.next_char() == '/' { tokenizer.advance(1); break } @@ -453,7 +453,7 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) if tokenizer.is_eof() { return Ok(Borrowed(tokenizer.slice_from(start_pos))) } - match tokenizer.current_char() { + match tokenizer.next_char() { '"' if !single_quote => { let value = tokenizer.slice_from(start_pos); tokenizer.advance(1); @@ -476,7 +476,7 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) } while !tokenizer.is_eof() { - if matches!(tokenizer.current_char(), '\n' | '\r' | '\x0C') { + if matches!(tokenizer.next_char(), '\n' | '\r' | '\x0C') { return Err(()); } match tokenizer.consume_char() { @@ -484,12 +484,12 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) '\'' if single_quote => break, '\\' => { if !tokenizer.is_eof() { - match tokenizer.current_char() { + match tokenizer.next_char() { // Escaped newline '\n' | '\x0C' => tokenizer.advance(1), '\r' => { tokenizer.advance(1); - if !tokenizer.is_eof() && tokenizer.current_char() == '\n' { + if !tokenizer.is_eof() && tokenizer.next_char() == '\n' { tokenizer.advance(1); } } @@ -508,7 +508,7 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) #[inline] fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { - !tokenizer.is_eof() && match tokenizer.current_char() { + !tokenizer.is_eof() && match tokenizer.next_char() { 'a'...'z' | 'A'...'Z' | '_' | '\0' => true, '-' => tokenizer.has_at_least(1) && match tokenizer.char_at(1) { 'a'...'z' | 'A'...'Z' | '-' | '_' | '\0' => true, @@ -523,7 +523,7 @@ fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { let value = consume_name(tokenizer); - if !tokenizer.is_eof() && tokenizer.current_char() == '(' { + if !tokenizer.is_eof() && tokenizer.next_char() == '(' { tokenizer.advance(1); if value.eq_ignore_ascii_case("url") { consume_url(tokenizer) } else { Function(value) } @@ -539,7 +539,7 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowString<'a> { if tokenizer.is_eof() { return Borrowed(tokenizer.slice_from(start_pos)) } - match tokenizer.current_char() { + match tokenizer.next_char() { 'a'...'z' | 'A'...'Z' | '0'...'9' | '_' | '-' => tokenizer.advance(1), '\\' | '\0' => { value = tokenizer.slice_from(start_pos).to_owned(); @@ -553,7 +553,7 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowString<'a> { } while !tokenizer.is_eof() { - let c = tokenizer.current_char(); + let c = tokenizer.next_char(); value.push(match c { 'a'...'z' | 'A'...'Z' | '0'...'9' | '_' | '-' => { tokenizer.advance(1); @@ -575,7 +575,7 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowString<'a> { fn consume_digits(tokenizer: &mut Tokenizer) { while !tokenizer.is_eof() { - match tokenizer.current_char() { + match tokenizer.next_char() { '0'...'9' => tokenizer.advance(1), _ => break } @@ -588,12 +588,12 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { // But this is always called so that there is at least one digit in \d*(\.\d+)? let start_pos = tokenizer.position(); let mut is_integer = true; - let signed = matches!(tokenizer.current_char(), '-' | '+'); + let signed = matches!(tokenizer.next_char(), '-' | '+'); if signed { tokenizer.advance(1); } consume_digits(tokenizer); - if tokenizer.has_at_least(1) && tokenizer.current_char() == '.' + if tokenizer.has_at_least(1) && tokenizer.next_char() == '.' && matches!(tokenizer.char_at(1), '0'...'9') { is_integer = false; tokenizer.advance(2); // '.' and first digit @@ -601,11 +601,11 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } if ( tokenizer.has_at_least(1) - && matches!(tokenizer.current_char(), 'e' | 'E') + && matches!(tokenizer.next_char(), 'e' | 'E') && matches!(tokenizer.char_at(1), '0'...'9') ) || ( tokenizer.has_at_least(2) - && matches!(tokenizer.current_char(), 'e' | 'E') + && matches!(tokenizer.next_char(), 'e' | 'E') && matches!(tokenizer.char_at(1), '+' | '-') && matches!(tokenizer.char_at(2), '0'...'9') ) { @@ -626,7 +626,7 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { None }) }; - if !tokenizer.is_eof() && tokenizer.current_char() == '%' { + if !tokenizer.is_eof() && tokenizer.next_char() == '%' { tokenizer.advance(1); return Percentage(PercentageValue { unit_value: value / 100., @@ -646,7 +646,7 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { while !tokenizer.is_eof() { - match tokenizer.current_char() { + match tokenizer.next_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1), '"' => return consume_quoted_url(tokenizer, false), '\'' => return consume_quoted_url(tokenizer, true), @@ -670,7 +670,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { if tokenizer.is_eof() { return Url(Borrowed(tokenizer.slice_from(start_pos))) } - match tokenizer.current_char() { + match tokenizer.next_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => { let value = tokenizer.slice_from(start_pos); tokenizer.advance(1); @@ -747,13 +747,13 @@ fn consume_unicode_range<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { tokenizer.advance(2); // Skip U+ let mut hex = String::new(); while hex.len() < 6 && !tokenizer.is_eof() - && matches!(tokenizer.current_char(), '0'...'9' | 'A'...'F' | 'a'...'f') { + && matches!(tokenizer.next_char(), '0'...'9' | 'A'...'F' | 'a'...'f') { hex.push(tokenizer.consume_char()); } let max_question_marks = 6u - hex.len(); let mut question_marks = 0u; while question_marks < max_question_marks && !tokenizer.is_eof() - && tokenizer.current_char() == '?' { + && tokenizer.next_char() == '?' { question_marks += 1; tokenizer.advance(1) } @@ -768,10 +768,10 @@ fn consume_unicode_range<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } else { start = first; hex.truncate(0); - if !tokenizer.is_eof() && tokenizer.current_char() == '-' { + if !tokenizer.is_eof() && tokenizer.next_char() == '-' { tokenizer.advance(1); while hex.len() < 6 && !tokenizer.is_eof() { - let c = tokenizer.current_char(); + let c = tokenizer.next_char(); match c { '0'...'9' | 'A'...'F' | 'a'...'f' => { hex.push(c); tokenizer.advance(1) }, @@ -795,7 +795,7 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char { '0'...'9' | 'A'...'F' | 'a'...'f' => { let mut hex = String::from_char(1, c); while hex.len() < 6 && !tokenizer.is_eof() { - let c = tokenizer.current_char(); + let c = tokenizer.next_char(); match c { '0'...'9' | 'A'...'F' | 'a'...'f' => { hex.push(c); tokenizer.advance(1) }, @@ -803,11 +803,11 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char { } } if !tokenizer.is_eof() { - match tokenizer.current_char() { + match tokenizer.next_char() { ' ' | '\t' | '\n' | '\x0C' => tokenizer.advance(1), '\r' => { tokenizer.advance(1); - if !tokenizer.is_eof() && tokenizer.current_char() == '\n' { + if !tokenizer.is_eof() && tokenizer.next_char() == '\n' { tokenizer.advance(1); } } From 2111d6647d823ea27fb354da483f06ef31c53ac9 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jan 2015 20:03:14 +0000 Subject: [PATCH 56/69] Remove peek/push_back/unexpect, add Parser::reset instead. The new convention is that any parsing function returning `Err(())` may or may not have consumed any number of tokens from the input. Therefore, any the caller wishes to recover and try parsing something else, it should call `parser.reset(position)` where `position` is the result of `parser.position()` that was saved beforehand. The Parser::try helper does this for you by taking a closure returns Result, and resetting the position if the result is Err. --- src/color.rs | 2 +- src/lib.rs | 4 +- src/nth.rs | 7 +- src/parser.rs | 138 +++++++++++++++++----------------- src/rules_and_declarations.rs | 33 ++++---- src/tests.rs | 33 ++++---- src/tokenizer.rs | 42 ++++------- 7 files changed, 125 insertions(+), 134 deletions(-) diff --git a/src/color.rs b/src/color.rs index 5420c606..553c3bca 100644 --- a/src/color.rs +++ b/src/color.rs @@ -74,7 +74,7 @@ impl Color { parse_color_function(name.as_slice(), arguments) }) } - token => input.unexpected(token) + _ => Err(()) } } } diff --git a/src/lib.rs b/src/lib.rs index ed4eb96d..2996e0d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ extern crate text_writer; #[cfg(test)] extern crate test; #[cfg(test)] extern crate serialize; -pub use tokenizer::{Token, NumericValue, PercentageValue, SourcePosition, SourceLocation}; +pub use tokenizer::{Token, NumericValue, PercentageValue, SourceLocation}; pub use rules_and_declarations::{Priority, parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; @@ -22,7 +22,7 @@ pub use from_bytes::{decode_stylesheet_bytes, parse_stylesheet_rules_from_bytes} pub use color::{RGBA, Color, parse_color_keyword}; pub use nth::parse_nth; pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenWriter}; -pub use parser::{Parser, Delimiter, Delimiters}; +pub use parser::{Parser, Delimiter, Delimiters, SourcePosition}; /** diff --git a/src/nth.rs b/src/nth.rs index a5d5ed97..d235c953 100644 --- a/src/nth.rs +++ b/src/nth.rs @@ -47,20 +47,21 @@ pub fn parse_nth(input: &mut Parser) -> Result<(i32, i32), ()> { } _ => Err(()) }, - token => input.unexpected(token) + _ => Err(()) } } fn parse_b(input: &mut Parser, a: i32) -> Result<(i32, i32), ()> { + let start_position = input.position(); match input.next() { Ok(Token::Delim('+')) => parse_signless_b(input, a, 1), Ok(Token::Delim('-')) => parse_signless_b(input, a, -1), Ok(Token::Number(ref value)) if value.signed => { Ok((a, try!(value.int_value.ok_or(())) as i32)) } - token => { - input.push_back_result(token); + _ => { + input.reset(start_position); Ok((a, 0)) } } diff --git a/src/parser.rs b/src/parser.rs index a56e1f36..6bd22d4f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,7 +5,14 @@ use std::ascii::AsciiExt; use std::str::CowString; use std::ops; -use tokenizer::{Token, NumericValue, PercentageValue, Tokenizer, SourcePosition, SourceLocation}; +use tokenizer::{mod, Token, NumericValue, PercentageValue, Tokenizer, SourceLocation}; + + +#[deriving(PartialEq, Eq, Show, Clone, Copy)] +pub struct SourcePosition { + position: tokenizer::SourcePosition, + at_start_of: Option, +} /// Like std::borrow::Cow, except: @@ -123,15 +130,15 @@ impl Delimiters { (self.bits & other.bits) != 0 } - fn from_token(token: &Token) -> Delimiters { - match *token { - Token::Semicolon => Delimiter::Semicolon, - Token::Comma => Delimiter::Comma, - Token::Delim('!') => Delimiter::Bang, - Token::CurlyBracketBlock => Delimiter::CurlyBracketBlock, - Token::CloseCurlyBracket => ClosingDelimiter::CloseCurlyBracket, - Token::CloseSquareBracket => ClosingDelimiter::CloseSquareBracket, - Token::CloseParenthesis => ClosingDelimiter::CloseParenthesis, + fn from_byte(byte: Option) -> Delimiters { + match byte { + Some(b';') => Delimiter::Semicolon, + Some(b',') => Delimiter::Comma, + Some(b'!') => Delimiter::Bang, + Some(b'{') => Delimiter::CurlyBracketBlock, + Some(b'}') => ClosingDelimiter::CloseCurlyBracket, + Some(b']') => ClosingDelimiter::CloseSquareBracket, + Some(b')') => ClosingDelimiter::CloseParenthesis, _ => Delimiter::None, } } @@ -154,55 +161,43 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn expect_exhausted(&mut self) -> Result<(), ()> { + let start_position = self.position(); match self.next() { Err(()) => Ok(()), - Ok(token) => self.unexpected(token), - } - } - - #[inline] - pub fn peek(&mut self) -> Result<&Token<'i>, ()> { - // Consume whatever needs to be consumed (e.g. open blocks). - let token = try!(self.next()); - self.push_back(token); - self.tokenizer.peek() - } - - #[inline] - pub fn push_back(&mut self, token: Token<'i>) { - if BlockType::opening(&token) == self.at_start_of { - self.at_start_of = None; + Ok(_) => { + self.reset(start_position); + Err(()) + } } - self.tokenizer.push_back(token) } #[inline] - pub fn push_back_result(&mut self, token_result: Result, ()>) { - if let Ok(token) = token_result { - self.push_back(token) + pub fn position(&self) -> SourcePosition { + SourcePosition { + position: self.tokenizer.position(), + at_start_of: self.at_start_of, } } #[inline] - pub fn unexpected(&mut self, token: Token<'i>) -> Result { - self.push_back(token); - Err(()) - } - - #[inline] - pub fn unexpected_ident(&mut self, value: CowString<'i>) -> Result { - self.push_back(Token::Ident(value)); - Err(()) + pub fn reset(&mut self, new_position: SourcePosition) { + self.tokenizer.reset(new_position.position); + self.at_start_of = new_position.at_start_of; } #[inline] - pub fn position(&self) -> SourcePosition { - self.tokenizer.position() + pub fn try(&mut self, thing: |&mut Parser| -> Result) -> Result { + let start_position = self.position(); + let result = thing(self); + if result.is_err() { + self.reset(start_position) + } + result } #[inline] - pub fn slice_from(&self, start_pos: SourcePosition) -> &'i str { - self.tokenizer.slice_from(start_pos) + pub fn slice_from(&self, start_position: SourcePosition) -> &'i str { + self.tokenizer.slice_from(start_position.position) } #[inline] @@ -212,7 +207,7 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn source_location(&self, target: SourcePosition) -> SourceLocation { - self.tokenizer.source_location(target) + self.tokenizer.source_location(target.position) } pub fn next(&mut self) -> Result, ()> { @@ -228,11 +223,10 @@ impl<'i, 't> Parser<'i, 't> { if let Some(block_type) = self.at_start_of.take() { consume_until_end_of_block(block_type, &mut *self.tokenizer); } - let token = try!(self.tokenizer.next()); - if self.parse_until_before.contains(Delimiters::from_token(&token)) { - self.tokenizer.push_back(token); + if self.parse_until_before.contains(Delimiters::from_byte(self.tokenizer.next_byte())) { return Err(()) } + let token = try!(self.tokenizer.next()); if let Some(block_type) = BlockType::opening(&token) { self.at_start_of = Some(block_type); } @@ -305,13 +299,16 @@ impl<'i, 't> Parser<'i, 't> { result = delimited_parser.parse_entirely(parse); } // FIXME: have a special-purpose tokenizer method for this that does less work. - while let Ok(token) = self.tokenizer.next() { - if delimiters.contains(Delimiters::from_token(&token)) { - self.tokenizer.push_back(token); + loop { + if delimiters.contains(Delimiters::from_byte(self.tokenizer.next_byte())) { break } - if let Some(block_type) = BlockType::opening(&token) { - consume_until_end_of_block(block_type, &mut *self.tokenizer); + if let Ok(token) = self.tokenizer.next() { + if let Some(block_type) = BlockType::opening(&token) { + consume_until_end_of_block(block_type, &mut *self.tokenizer); + } + } else { + break } } result @@ -322,9 +319,10 @@ impl<'i, 't> Parser<'i, 't> { parse: |&mut Parser| -> Result) -> Result { let result = self.parse_until_before(delimiters, parse); - // Expect exhausted input or a relevant delimiter (which we consume): - if let Ok(token) = self.next() { - debug_assert!(delimiters.contains(Delimiters::from_token(&token))); + let next_byte = self.tokenizer.next_byte(); + if next_byte.is_some() && !self.parse_until_before.contains(Delimiters::from_byte(next_byte)) { + debug_assert!(delimiters.contains(Delimiters::from_byte(next_byte))); + self.tokenizer.advance(1); } result } @@ -333,7 +331,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_ident(&mut self) -> Result, ()> { match try!(self.next()) { Token::Ident(value) => Ok(value), - token => self.unexpected(token) + _ => Err(()) } } @@ -342,7 +340,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_ident_matching<'a>(&mut self, expected_value: &str) -> Result<(), ()> { match try!(self.next()) { Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()), - token => self.unexpected(token) + _ => Err(()) } } @@ -350,7 +348,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_string(&mut self) -> Result, ()> { match try!(self.next()) { Token::QuotedString(value) => Ok(value), - token => self.unexpected(token) + _ => Err(()) } } @@ -359,7 +357,7 @@ impl<'i, 't> Parser<'i, 't> { match try!(self.next()) { Token::Ident(value) => Ok(value), Token::QuotedString(value) => Ok(value), - token => self.unexpected(token) + _ => Err(()) } } @@ -367,7 +365,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_url(&mut self) -> Result, ()> { match try!(self.next()) { Token::Url(value) => Ok(value), - token => self.unexpected(token) + _ => Err(()) } } @@ -376,7 +374,7 @@ impl<'i, 't> Parser<'i, 't> { match try!(self.next()) { Token::Url(value) => Ok(value), Token::QuotedString(value) => Ok(value), - token => self.unexpected(token) + _ => Err(()) } } @@ -384,7 +382,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_number(&mut self) -> Result { match try!(self.next()) { Token::Number(NumericValue { value, .. }) => Ok(value), - token => self.unexpected(token) + _ => Err(()) } } @@ -392,7 +390,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_integer(&mut self) -> Result { match try!(self.next()) { Token::Number(NumericValue { int_value, .. }) => int_value.ok_or(()), - token => self.unexpected(token) + _ => Err(()) } } @@ -400,7 +398,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_percentage(&mut self) -> Result { match try!(self.next()) { Token::Percentage(PercentageValue { unit_value, .. }) => Ok(unit_value), - token => self.unexpected(token) + _ => Err(()) } } @@ -408,7 +406,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_colon(&mut self) -> Result<(), ()> { match try!(self.next()) { Token::Colon => Ok(()), - token => self.unexpected(token) + _ => Err(()) } } @@ -416,7 +414,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_semicolon(&mut self) -> Result<(), ()> { match try!(self.next()) { Token::Semicolon => Ok(()), - token => self.unexpected(token) + _ => Err(()) } } @@ -424,7 +422,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_comma(&mut self) -> Result<(), ()> { match try!(self.next()) { Token::Comma => Ok(()), - token => self.unexpected(token) + _ => Err(()) } } @@ -432,7 +430,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_delim(&mut self, expected_value: char) -> Result<(), ()> { match try!(self.next()) { Token::Delim(value) if value == expected_value => Ok(()), - token => self.unexpected(token) + _ => Err(()) } } @@ -440,7 +438,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_curly_bracket_block(&mut self) -> Result<(), ()> { match try!(self.next()) { Token::CurlyBracketBlock => Ok(()), - token => self.unexpected(token) + _ => Err(()) } } @@ -448,7 +446,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn expect_function(&mut self) -> Result, ()> { match try!(self.next()) { Token::Function(name) => Ok(name), - token => self.unexpected(token) + _ => Err(()) } } } diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index f413e6e1..3766f7c2 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -17,17 +17,18 @@ pub enum Priority { pub fn parse_important(input: &mut Parser) -> Result { + let start_position = input.position(); match input.next() { Ok(Token::Delim('!')) => { match try!(input.next()) { Token::Ident(ref value) if value.eq_ignore_ascii_case("important") => { Ok(Priority::Important) } - token => input.unexpected(token) + _ => Err(()) } } - token => { - input.push_back_result(token); + _ => { + input.reset(start_position); Ok(Priority::Normal) } } @@ -180,13 +181,15 @@ for RuleListParser<'i, 't, 'a, R, QP, AP, P> where P: QualifiedRuleParser + AtRuleParser { fn next(&mut self) -> Option> { loop { - match self.input.next() { + let start_position = self.input.position(); + match self.input.next_including_whitespace() { + Ok(Token::WhiteSpace) => {} Ok(Token::CDO) | Ok(Token::CDC) if self.is_stylesheet => {} Ok(Token::AtKeyword(name)) => { return Some(parse_at_rule(name, self.input, &mut self.parser)) } - Ok(token) => { - self.input.push_back(token); + Ok(_) => { + self.input.reset(start_position); return Some(parse_qualified_rule(self.input, &mut self.parser)) } Err(()) => return None, @@ -210,13 +213,17 @@ pub fn parse_one_rule(input: &mut Parser, parser: &mut P) -> Result where P: QualifiedRuleParser + AtRuleParser { input.parse_entirely(|input| { - match try!(input.next()) { - Token::AtKeyword(name) => { - parse_at_rule(name, input, parser) - } - token => { - input.push_back(token); - parse_qualified_rule(input, parser) + loop { + let start_position = input.position(); + match try!(input.next_including_whitespace()) { + Token::WhiteSpace => {} + Token::AtKeyword(name) => { + return parse_at_rule(name, input, parser) + } + _ => { + input.reset(start_position); + return parse_qualified_rule(input, parser) + } } } }) diff --git a/src/tests.rs b/src/tests.rs index fbf717ac..4bd9f7ea 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -431,31 +431,28 @@ impl DeclarationParser for JsonParser { fn parse_value(&mut self, name: &str, input: &mut Parser) -> Result { let mut value = vec![]; let mut important = false; - while let Ok(mut token) = input.next_including_whitespace() { - if token == Token::Delim('!') { - input.push_back(token); - match parse_important(input) { - Ok(Priority::Important) => { + loop { + let start_position = input.position(); + if let Ok(mut token) = input.next_including_whitespace() { + // Hack to deal with css-parsing-tests assuming that + // `!important` in the middle of a declaration value is OK. + // This can never happen per spec + // (even CSS Variables forbid top-level `!`) + if token == Token::Delim('!') { + input.reset(start_position); + if parse_important(input) == Ok(Priority::Important) { if input.is_exhausted() { important = true; break } - // Hack to deal with css-parsing-tests assuming that - // `!important` in the middle of a declaration value is OK. - // This can never happen per spec - // (even CSS Variables forbid top-level `!`) - value.push("!".to_json()); - token = Token::Ident(Borrowed("important")); } - // More hacks - Ok(Priority::Normal) => { - token = input.next_including_whitespace().unwrap(); - assert!(token == Token::Delim('!')); - } - Err(()) => token = Token::Delim('!') + input.reset(start_position); + token = input.next_including_whitespace().unwrap(); } + value.push(one_component_value_to_json(token, input)); + } else { + break } - value.push(one_component_value_to_json(token, input)); } Ok(JArray![ "declaration", diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ff2d875d..915c4732 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -125,9 +125,6 @@ pub struct Tokenizer<'a> { /// Counted in bytes, not code points. From 0. position: uint, - /// For `peek()` and `push_back()` - buffer: Option>, - /// Cache for `source_location()` last_known_line_break: Cell<(uint, uint)>, } @@ -139,41 +136,23 @@ impl<'a> Tokenizer<'a> { Tokenizer { input: input, position: 0, - buffer: None, last_known_line_break: Cell::new((1, 0)), } } #[inline] pub fn next(&mut self) -> Result, ()> { - if let Some(token) = self.buffer.take() { - Ok(token) - } else { - next_token(self).ok_or(()) - } + next_token(self).ok_or(()) } #[inline] - pub fn peek(&mut self) -> Result<&Token<'a>, ()> { - match self.buffer { - Some(ref token) => Ok(token), - None => { - self.buffer = next_token(self); - self.buffer.as_ref().ok_or(()) - } - } - } - - #[inline] - pub fn push_back(&mut self, token: Token<'a>) { - assert!(self.buffer.is_none(), - "Parser::push_back can only be called after Parser::next"); - self.buffer = Some(token); + pub fn position(&self) -> SourcePosition { + SourcePosition(self.position) } #[inline] - pub fn position(&self) -> SourcePosition { - SourcePosition(self.position) + pub fn reset(&mut self, new_position: SourcePosition) { + self.position = new_position.0; } #[inline] @@ -222,6 +201,15 @@ impl<'a> Tokenizer<'a> { } } + #[inline] + pub fn next_byte(&self) -> Option { + if self.is_eof() { + None + } else { + Some(self.input.as_bytes()[self.position]) + } + } + // If false, `tokenizer.next_char()` will not panic. #[inline] fn is_eof(&self) -> bool { !self.has_at_least(0) } @@ -232,7 +220,7 @@ impl<'a> Tokenizer<'a> { fn has_at_least(&self, n: uint) -> bool { self.position + n < self.input.len() } #[inline] - fn advance(&mut self, n: uint) { self.position += n } + pub fn advance(&mut self, n: uint) { self.position += n } // Assumes non-EOF #[inline] From d40971b83f32b36a9be6abadf7e3e3abcabd15fe Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jan 2015 21:45:10 +0000 Subject: [PATCH 57/69] Internal refactor: Rename Parser::parse_until_before to stop_before. --- src/parser.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 6bd22d4f..e3c8147a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -55,7 +55,7 @@ pub struct Parser<'i: 't, 't> { /// If `Some(_)`, .parse_nested_block() can be called. at_start_of: Option, /// For parsers from `parse_until` or `parse_nested_block` - parse_until_before: Delimiters, + stop_before: Delimiters, } @@ -150,7 +150,7 @@ impl<'i, 't> Parser<'i, 't> { Parser { tokenizer: MaybeOwned::Owned(box Tokenizer::new(input)), at_start_of: None, - parse_until_before: Delimiter::None, + stop_before: Delimiter::None, } } @@ -223,7 +223,7 @@ impl<'i, 't> Parser<'i, 't> { if let Some(block_type) = self.at_start_of.take() { consume_until_end_of_block(block_type, &mut *self.tokenizer); } - if self.parse_until_before.contains(Delimiters::from_byte(self.tokenizer.next_byte())) { + if self.stop_before.contains(Delimiters::from_byte(self.tokenizer.next_byte())) { return Err(()) } let token = try!(self.tokenizer.next()); @@ -275,7 +275,7 @@ impl<'i, 't> Parser<'i, 't> { let mut nested_parser = Parser { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: None, - parse_until_before: closing_delimiter, + stop_before: closing_delimiter, }; result = nested_parser.parse_entirely(parse); } @@ -287,14 +287,14 @@ impl<'i, 't> Parser<'i, 't> { pub fn parse_until_before(&mut self, delimiters: Delimiters, parse: |&mut Parser| -> Result) -> Result { - let delimiters = self.parse_until_before | delimiters; + let delimiters = self.stop_before | delimiters; let result; // Introduce a new scope to limit duration of nested_parser’s borrow { let mut delimited_parser = Parser { tokenizer: MaybeOwned::Borrowed(&mut *self.tokenizer), at_start_of: self.at_start_of.take(), - parse_until_before: delimiters, + stop_before: delimiters, }; result = delimited_parser.parse_entirely(parse); } @@ -320,7 +320,7 @@ impl<'i, 't> Parser<'i, 't> { -> Result { let result = self.parse_until_before(delimiters, parse); let next_byte = self.tokenizer.next_byte(); - if next_byte.is_some() && !self.parse_until_before.contains(Delimiters::from_byte(next_byte)) { + if next_byte.is_some() && !self.stop_before.contains(Delimiters::from_byte(next_byte)) { debug_assert!(delimiters.contains(Delimiters::from_byte(next_byte))); self.tokenizer.advance(1); } From b8bfa5fdb2e2be3b44331d49657230b5247256c5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 19 Jan 2015 18:25:16 +0100 Subject: [PATCH 58/69] =?UTF-8?q?Enable=20Parser::try=E2=80=99s=20return?= =?UTF-8?q?=20value=20to=20borrow=20the=20input=20(e.g.=20be=20a=20token)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index e3c8147a..0395cb85 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -186,7 +186,7 @@ impl<'i, 't> Parser<'i, 't> { } #[inline] - pub fn try(&mut self, thing: |&mut Parser| -> Result) -> Result { + pub fn try(&mut self, thing: |&mut Parser<'i, 't>| -> Result) -> Result { let start_position = self.position(); let result = thing(self); if result.is_err() { From 281f911954373beca42dfb6a38ab3c3afcef48fd Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 19 Jan 2015 18:25:39 +0100 Subject: [PATCH 59/69] Add Parser::expect_{square_bracket,parethesis}_block --- src/parser.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index 0395cb85..092aaf0a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -442,6 +442,22 @@ impl<'i, 't> Parser<'i, 't> { } } + #[inline] + pub fn expect_square_bracket_block(&mut self) -> Result<(), ()> { + match try!(self.next()) { + Token::SquareBracketBlock => Ok(()), + _ => Err(()) + } + } + + #[inline] + pub fn expect_parenthesis_block(&mut self) -> Result<(), ()> { + match try!(self.next()) { + Token::ParenthesisBlock => Ok(()), + _ => Err(()) + } + } + #[inline] pub fn expect_function(&mut self) -> Result, ()> { match try!(self.next()) { From 1c6672a15af106e5f07649f27da1a60988234763 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 19 Jan 2015 23:26:42 +0100 Subject: [PATCH 60/69] Internal refactor: make some code easier to understand. --- src/parser.rs | 7 ++++--- src/tokenizer.rs | 14 +++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 092aaf0a..f6fe53f2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -162,13 +162,14 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn expect_exhausted(&mut self) -> Result<(), ()> { let start_position = self.position(); - match self.next() { + let result = match self.next() { Err(()) => Ok(()), Ok(_) => { - self.reset(start_position); Err(()) } - } + }; + self.reset(start_position); + result } #[inline] diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 915c4732..34d2ae46 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -410,13 +410,13 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { #[inline] fn consume_comments(tokenizer: &mut Tokenizer) { while tokenizer.starts_with("/*") { - tokenizer.advance(2); // +2 to consume "/*" - while !tokenizer.is_eof() { - if tokenizer.consume_char() == '*' && - !tokenizer.is_eof() && - tokenizer.next_char() == '/' { - tokenizer.advance(1); - break + tokenizer.advance(2); // consume "/*" + match tokenizer.input.slice_from(tokenizer.position).match_indices("*/").next() { + Some((_start, end)) => { + tokenizer.advance(end) + } + None => { + tokenizer.position = tokenizer.input.len() } } } From 45741e093a5a5273c717b04c2ba557ba8326341b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 19 Jan 2015 23:27:15 +0100 Subject: [PATCH 61/69] Fix bug with delimiter parser going over their limit because of CSS comments. --- src/css-parsing-tests/declaration_list.json | 2 +- src/tokenizer.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/css-parsing-tests/declaration_list.json b/src/css-parsing-tests/declaration_list.json index efdb28e9..8f171265 100644 --- a/src/css-parsing-tests/declaration_list.json +++ b/src/css-parsing-tests/declaration_list.json @@ -22,7 +22,7 @@ ["error", "invalid"] ], -"@import 'foo.css'; a:b; @import 'bar.css'", [ +"@import 'foo.css'; a:b/**/; @import 'bar.css'", [ ["at-rule", "import", [" ", ["string", "foo.css"]], null], ["declaration", "a", [["ident", "b"]], false], ["at-rule", "import", [" ", ["string", "bar.css"]], null] diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 34d2ae46..5ec8bc60 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -202,7 +202,8 @@ impl<'a> Tokenizer<'a> { } #[inline] - pub fn next_byte(&self) -> Option { + pub fn next_byte(&mut self) -> Option { + consume_comments(self); if self.is_eof() { None } else { From 0c8be7563007d497e0331b86e8d1c1d5f72d138c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 20 Jan 2015 00:11:57 +0100 Subject: [PATCH 62/69] Simplify parse_important. --- src/lib.rs | 2 +- src/rules_and_declarations.rs | 27 +++------------------------ src/tests.rs | 4 ++-- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2996e0d3..a9079b05 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ extern crate text_writer; #[cfg(test)] extern crate serialize; pub use tokenizer::{Token, NumericValue, PercentageValue, SourceLocation}; -pub use rules_and_declarations::{Priority, parse_important}; +pub use rules_and_declarations::{parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser}; diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 3766f7c2..554c83a6 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -4,34 +4,13 @@ // http://dev.w3.org/csswg/css-syntax/#parsing -use std::ascii::AsciiExt; use std::str::CowString; use super::{Token, Parser, Delimiter}; -#[deriving(Copy, Eq, PartialEq)] -pub enum Priority { - Normal, - Important, -} - - -pub fn parse_important(input: &mut Parser) -> Result { - let start_position = input.position(); - match input.next() { - Ok(Token::Delim('!')) => { - match try!(input.next()) { - Token::Ident(ref value) if value.eq_ignore_ascii_case("important") => { - Ok(Priority::Important) - } - _ => Err(()) - } - } - _ => { - input.reset(start_position); - Ok(Priority::Normal) - } - } +pub fn parse_important(input: &mut Parser) -> Result<(), ()> { + try!(input.expect_delim('!')); + input.expect_ident_matching("important") } diff --git a/src/tests.rs b/src/tests.rs index 4bd9f7ea..0a8c9e57 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -13,7 +13,7 @@ use encoding::label::encoding_from_whatwg_label; use super::{Parser, Token, NumericValue, PercentageValue, SourceLocation, DeclarationListParser, DeclarationParser, RuleListParser, - AtRuleType, AtRuleParser, QualifiedRuleParser, Priority, + AtRuleType, AtRuleParser, QualifiedRuleParser, parse_one_declaration, parse_one_rule, parse_important, parse_stylesheet_rules_from_bytes, Color, RGBA, parse_color_keyword, parse_nth, ToCss}; @@ -440,7 +440,7 @@ impl DeclarationParser for JsonParser { // (even CSS Variables forbid top-level `!`) if token == Token::Delim('!') { input.reset(start_position); - if parse_important(input) == Ok(Priority::Important) { + if parse_important(input).is_ok() { if input.is_exhausted() { important = true; break From ca3bfb9bba8639f72e6a4ddf09a5d21df8ac0bd6 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 20 Jan 2015 13:23:36 +0100 Subject: [PATCH 63/69] =?UTF-8?q?Emit=20a=20token=20for=20CSS=C2=A0comment?= =?UTF-8?q?s.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser.rs | 13 +++++++++++-- src/rules_and_declarations.rs | 8 ++++---- src/serializer.rs | 1 + src/tests.rs | 1 + src/tokenizer.rs | 34 +++++++++++++++------------------- 5 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index f6fe53f2..14716d0e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -213,14 +213,23 @@ impl<'i, 't> Parser<'i, 't> { pub fn next(&mut self) -> Result, ()> { loop { - match self.next_including_whitespace() { - Ok(Token::WhiteSpace) => {}, + match self.next_including_whitespace_and_comments() { + Ok(Token::WhiteSpace) | Ok(Token::Comment) => {}, result => return result } } } pub fn next_including_whitespace(&mut self) -> Result, ()> { + loop { + match self.next_including_whitespace_and_comments() { + Ok(Token::Comment) => {}, + result => return result + } + } + } + + pub fn next_including_whitespace_and_comments(&mut self) -> Result, ()> { if let Some(block_type) = self.at_start_of.take() { consume_until_end_of_block(block_type, &mut *self.tokenizer); } diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 554c83a6..8d299442 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -161,8 +161,8 @@ where P: QualifiedRuleParser + AtRuleParser { fn next(&mut self) -> Option> { loop { let start_position = self.input.position(); - match self.input.next_including_whitespace() { - Ok(Token::WhiteSpace) => {} + match self.input.next_including_whitespace_and_comments() { + Ok(Token::WhiteSpace) | Ok(Token::Comment) => {} Ok(Token::CDO) | Ok(Token::CDC) if self.is_stylesheet => {} Ok(Token::AtKeyword(name)) => { return Some(parse_at_rule(name, self.input, &mut self.parser)) @@ -194,8 +194,8 @@ pub fn parse_one_rule(input: &mut Parser, parser: &mut P) input.parse_entirely(|input| { loop { let start_position = input.position(); - match try!(input.next_including_whitespace()) { - Token::WhiteSpace => {} + match try!(input.next_including_whitespace_and_comments()) { + Token::WhiteSpace | Token::Comment => {} Token::AtKeyword(name) => { return parse_at_rule(name, input, parser) } diff --git a/src/serializer.rs b/src/serializer.rs index 3a81178f..f03c3000 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -117,6 +117,7 @@ impl<'a> ToCss for Token<'a> { } Token::WhiteSpace => try!(dest.write_char(' ')), + Token::Comment => try!(dest.write_str("/**/")), Token::Colon => try!(dest.write_char(':')), Token::Semicolon => try!(dest.write_char(';')), Token::Comma => try!(dest.write_char(',')), diff --git a/src/tests.rs b/src/tests.rs index 0a8c9e57..6381a46c 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -543,6 +543,7 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { Token::UnicodeRange(start, end) => JArray!["unicode-range", start, end], Token::WhiteSpace => " ".to_json(), + Token::Comment => "/**/".to_json(), Token::Colon => ":".to_json(), Token::Semicolon => ";".to_json(), Token::Comma => ",".to_json(), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5ec8bc60..5f15f555 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -30,6 +30,7 @@ pub enum Token<'a> { Dimension(NumericValue, CowString<'a>), UnicodeRange(u32, u32), // (start, end) of range WhiteSpace, + Comment, Colon, // : Semicolon, // ; Comma, // , @@ -74,6 +75,7 @@ impl<'a> Token<'a> { Token::Dimension(value, unit) => Token::Dimension(value, Cow::Owned(unit.into_owned())), Token::UnicodeRange(start, end) => Token::UnicodeRange(start, end), Token::WhiteSpace => Token::WhiteSpace, + Token::Comment => Token::Comment, Token::Colon => Token::Colon, Token::Semicolon => Token::Semicolon, Token::Comma => Token::Comma, @@ -202,8 +204,7 @@ impl<'a> Tokenizer<'a> { } #[inline] - pub fn next_byte(&mut self) -> Option { - consume_comments(self); + pub fn next_byte(&self) -> Option { if self.is_eof() { None } else { @@ -266,7 +267,6 @@ pub struct SourceLocation { fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { - consume_comments(tokenizer); if tokenizer.is_eof() { return None } @@ -349,6 +349,18 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { Delim(c) } } + '/' if tokenizer.starts_with("/*") => { + tokenizer.advance(2); // consume "/*" + match tokenizer.input.slice_from(tokenizer.position).match_indices("*/").next() { + Some((_start, end)) => { + tokenizer.advance(end) + } + None => { + tokenizer.position = tokenizer.input.len() + } + } + Comment + } '0'...'9' => consume_numeric(tokenizer), ':' => { tokenizer.advance(1); Colon }, ';' => { tokenizer.advance(1); Semicolon }, @@ -408,22 +420,6 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { } -#[inline] -fn consume_comments(tokenizer: &mut Tokenizer) { - while tokenizer.starts_with("/*") { - tokenizer.advance(2); // consume "/*" - match tokenizer.input.slice_from(tokenizer.position).match_indices("*/").next() { - Some((_start, end)) => { - tokenizer.advance(end) - } - None => { - tokenizer.position = tokenizer.input.len() - } - } - } -} - - fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> { match consume_quoted_string(tokenizer, single_quote) { Ok(value) => QuotedString(value), From 83b7cc8392d095aaacbe051841b81d86630bb0eb Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 20 Jan 2015 13:38:25 +0100 Subject: [PATCH 64/69] Keep in tokens the origin content of whitespace and comments. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s cheap, and could be useful for tools such as pre-processors. --- src/parser.rs | 4 ++-- src/rules_and_declarations.rs | 4 ++-- src/serializer.rs | 6 +++--- src/tests.rs | 10 +++++----- src/tokenizer.rs | 27 +++++++++++++++++---------- 5 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 14716d0e..b80f99bb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -214,7 +214,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn next(&mut self) -> Result, ()> { loop { match self.next_including_whitespace_and_comments() { - Ok(Token::WhiteSpace) | Ok(Token::Comment) => {}, + Ok(Token::WhiteSpace(_)) | Ok(Token::Comment(_)) => {}, result => return result } } @@ -223,7 +223,7 @@ impl<'i, 't> Parser<'i, 't> { pub fn next_including_whitespace(&mut self) -> Result, ()> { loop { match self.next_including_whitespace_and_comments() { - Ok(Token::Comment) => {}, + Ok(Token::Comment(_)) => {}, result => return result } } diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 8d299442..f7d82501 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -162,7 +162,7 @@ where P: QualifiedRuleParser + AtRuleParser { loop { let start_position = self.input.position(); match self.input.next_including_whitespace_and_comments() { - Ok(Token::WhiteSpace) | Ok(Token::Comment) => {} + Ok(Token::WhiteSpace(_)) | Ok(Token::Comment(_)) => {} Ok(Token::CDO) | Ok(Token::CDC) if self.is_stylesheet => {} Ok(Token::AtKeyword(name)) => { return Some(parse_at_rule(name, self.input, &mut self.parser)) @@ -195,7 +195,7 @@ pub fn parse_one_rule(input: &mut Parser, parser: &mut P) loop { let start_position = input.position(); match try!(input.next_including_whitespace_and_comments()) { - Token::WhiteSpace | Token::Comment => {} + Token::WhiteSpace(_) | Token::Comment(_) => {} Token::AtKeyword(name) => { return parse_at_rule(name, input, parser) } diff --git a/src/serializer.rs b/src/serializer.rs index f03c3000..0415030f 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -116,8 +116,8 @@ impl<'a> ToCss for Token<'a> { } } - Token::WhiteSpace => try!(dest.write_char(' ')), - Token::Comment => try!(dest.write_str("/**/")), + Token::WhiteSpace(content) => try!(dest.write_str(content)), + Token::Comment(content) => try!(write!(dest, "/*{}*/", content)), Token::Colon => try!(dest.write_char(':')), Token::Semicolon => try!(dest.write_char(';')), Token::Comma => try!(dest.write_char(',')), @@ -309,7 +309,7 @@ impl<'i, 'a, W> TokenWriter<'i, 'a, W> where W: TextWriter { try!(self.dest.write_str("/**/")); } // Skip whitespace when '\n' was previously written at the previous iteration. - if !matches!((previous, token), (&Delim('\\'), &WhiteSpace)) { + if !matches!((previous, token), (&Delim('\\'), &WhiteSpace(_))) { try!(token.to_css(self.dest)); } if token == &Delim('\\') { diff --git a/src/tests.rs b/src/tests.rs index 6381a46c..d25818f8 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -392,17 +392,17 @@ fn line_numbers() { assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 1 }); assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("foo")))); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 4 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace(" "))); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 5 }); assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("bar")))); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 8 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\n"))); assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 1 }); assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("baz")))); assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 4 }); let position = input.position(); - assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace)); + assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\r\n\n"))); assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 1 }); assert_eq!(input.source_location(position), SourceLocation { line: 2, column: 4 }); @@ -542,8 +542,8 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { Token::UnicodeRange(start, end) => JArray!["unicode-range", start, end], - Token::WhiteSpace => " ".to_json(), - Token::Comment => "/**/".to_json(), + Token::WhiteSpace(_) => " ".to_json(), + Token::Comment(_) => "/**/".to_json(), Token::Colon => ":".to_json(), Token::Semicolon => ";".to_json(), Token::Comma => ",".to_json(), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5f15f555..4d020886 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,8 +29,8 @@ pub enum Token<'a> { Percentage(PercentageValue), Dimension(NumericValue, CowString<'a>), UnicodeRange(u32, u32), // (start, end) of range - WhiteSpace, - Comment, + WhiteSpace(&'a str), + Comment(&'a str), Colon, // : Semicolon, // ; Comma, // , @@ -74,8 +74,8 @@ impl<'a> Token<'a> { Token::Percentage(value) => Token::Percentage(value), Token::Dimension(value, unit) => Token::Dimension(value, Cow::Owned(unit.into_owned())), Token::UnicodeRange(start, end) => Token::UnicodeRange(start, end), - Token::WhiteSpace => Token::WhiteSpace, - Token::Comment => Token::Comment, + Token::WhiteSpace(_) => Token::WhiteSpace(" "), + Token::Comment(_) => Token::Comment(""), Token::Colon => Token::Colon, Token::Semicolon => Token::Semicolon, Token::Comma => Token::Comma, @@ -273,13 +273,15 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { let c = tokenizer.next_char(); let token = match c { '\t' | '\n' | ' ' | '\r' | '\x0C' => { + let start_position = tokenizer.position(); + tokenizer.advance(1); while !tokenizer.is_eof() { match tokenizer.next_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1), _ => break, } } - WhiteSpace + WhiteSpace(tokenizer.slice_from(start_position)) }, '"' => consume_string(tokenizer, false), '#' => { @@ -351,15 +353,20 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { } '/' if tokenizer.starts_with("/*") => { tokenizer.advance(2); // consume "/*" - match tokenizer.input.slice_from(tokenizer.position).match_indices("*/").next() { - Some((_start, end)) => { - tokenizer.advance(end) + let start_position = tokenizer.position(); + let content; + match tokenizer.input.slice_from(tokenizer.position).find_str("*/") { + Some(offset) => { + tokenizer.advance(offset); + content = tokenizer.slice_from(start_position); + tokenizer.advance(2); } None => { - tokenizer.position = tokenizer.input.len() + tokenizer.position = tokenizer.input.len(); + content = tokenizer.slice_from(start_position); } } - Comment + Comment(content) } '0'...'9' => consume_numeric(tokenizer), ':' => { tokenizer.advance(1); Colon }, From 55e082470304682934c373f584e722c80a60407e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 21 Jan 2015 17:19:17 +0100 Subject: [PATCH 65/69] Serialize negative zero with a minus sign. --- src/css-parsing-tests/component_value_list.json | 8 ++++---- src/serializer.rs | 9 ++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/css-parsing-tests/component_value_list.json b/src/css-parsing-tests/component_value_list.json index e5e28713..70dc06d9 100644 --- a/src/css-parsing-tests/component_value_list.json +++ b/src/css-parsing-tests/component_value_list.json @@ -29,7 +29,7 @@ ["ident", "--red"], " ", ["ident", "--red blue"], " ", ["dimension", "0", 0, "integer", "red"], " ", - ["dimension", "0", 0, "integer", "red"], " ", + ["dimension", "-0", 0, "integer", "red"], " ", ["ident", "\uFFFD\uFFFDred"], " ", ["ident", "_Red"], " ", ".", ["ident", "red"], " ", @@ -58,7 +58,7 @@ ["function", "--rgba"], " ", ["function", "--rgba"], " ", ["dimension", "0", 0, "integer", "rgba"], ["()"], " ", - ["dimension", "0", 0, "integer", "rgba"], ["()"], " ", + ["dimension", "-0", 0, "integer", "rgba"], ["()"], " ", ["function", "_rgba"], " ", ".", ["function", "rgba"], " ", ["function", "rgbâ"], " ", @@ -74,7 +74,7 @@ ["at-keyword", "--media"], " ", ["at-keyword", "--media"], " ", "@", ["dimension", "0", 0, "integer", "media"], " ", - "@", ["dimension", "0", 0, "integer", "media"], " ", + "@", ["dimension", "-0", 0, "integer", "media"], " ", ["at-keyword", "_media"], " ", "@", ".", ["ident", "media"], " ", ["at-keyword", "medİa"], " ", @@ -318,7 +318,7 @@ ["dimension", "12", 12, "integer", "--red"], " ", ["dimension", "12", 12, "integer", "--red"], " ", ["dimension", "120", 120, "integer", "red"], " ", - ["number", "12", 12, "integer"], ["dimension", "0", 0, "integer", "red"], " ", + ["number", "12", 12, "integer"], ["dimension", "-0", 0, "integer", "red"], " ", ["dimension", "12", 12, "integer", "\uFFFDred"], " ", ["dimension", "12", 12, "integer", "_Red"], " ", ["number", "12", 12, "integer"], ".", ["ident", "red"], " ", diff --git a/src/serializer.rs b/src/serializer.rs index 0415030f..3bcb0cbb 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -51,7 +51,14 @@ where W: TextWriter { if value.signed && value.value.is_positive() { try!(dest.write_str("+")); } - try!(write!(dest, "{}", value.value)) + + if value.value == 0.0 && value.value.is_negative() { + // Negative zero. Work around #20596. + try!(dest.write_str("-0")) + } else { + try!(write!(dest, "{}", value.value)) + } + if value.int_value.is_none() && value.value.fract() == 0. { try!(dest.write_str(".0")); } From d91196271789a1f3bac88876cae86b82d405e9e4 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 21 Jan 2015 17:19:43 +0100 Subject: [PATCH 66/69] Serialize with question marks when possible. --- src/serializer.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/serializer.rs b/src/serializer.rs index 3bcb0cbb..bf308edd 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -4,7 +4,8 @@ use std::fmt; use std::mem; -use std::num::Float; +use std::cmp; +use std::num::{Float, Int}; use text_writer::{mod, TextWriter}; @@ -117,9 +118,22 @@ impl<'a> ToCss for Token<'a> { }, Token::UnicodeRange(start, end) => { - try!(dest.write_str(format!("U+{:X}", start).as_slice())); - if end != start { - try!(dest.write_str(format!("-{:X}", end).as_slice())); + try!(dest.write_str("U+")); + let bits = cmp::min(start.trailing_zeros(), (!end).trailing_zeros()); + if bits >= 4 && start >> bits == end >> bits { + let question_marks = bits / 4; + let common = start >> question_marks * 4; + if common != 0 { + try!(write!(dest, "{:X}", common)); + } + for _ in range(0, question_marks) { + try!(dest.write_str("?")); + } + } else { + try!(write!(dest, "{:X}", start)); + if end != start { + try!(write!(dest, "-{:X}", end)); + } } } From f94502ff92e035b6c4cfd1bbae5491778288a746 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 21 Jan 2015 17:20:21 +0100 Subject: [PATCH 67/69] Remove some unnecessary conversions. --- src/serializer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/serializer.rs b/src/serializer.rs index bf308edd..56471bf2 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -77,7 +77,7 @@ impl<'a> ToCss for Token<'a> { }, Token::Hash(ref value) => { try!(dest.write_char('#')); - for c in value.as_slice().chars() { + for c in value.chars() { try!(serialize_char(c, dest, /* is_identifier_start = */ false)); } }, @@ -193,7 +193,7 @@ where W:TextWriter { fn serialize_char(c: char, dest: &mut W, is_identifier_start: bool) -> text_writer::Result where W: TextWriter { match c { - '0'...'9' if is_identifier_start => try!(dest.write_str(format!("\\3{} ", c).as_slice())), + '0'...'9' if is_identifier_start => try!(write!(dest, "\\3{} ", c)), '-' if is_identifier_start => try!(dest.write_str("\\-")), '0'...'9' | 'A'...'Z' | 'a'...'z' | '_' | '-' => try!(dest.write_char(c)), _ if c > '\x7F' => try!(dest.write_char(c)), From 863662352b4a5950f3efa759e9713969b7ae6e2a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 21 Jan 2015 17:24:44 +0100 Subject: [PATCH 68/69] Remove serialization special cases now that comments are tokenized. --- src/lib.rs | 2 +- src/serializer.rs | 79 ----------------------------------------------- src/tests.rs | 15 +++++---- src/tokenizer.rs | 4 ++- 4 files changed, 11 insertions(+), 89 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a9079b05..780f33ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,7 @@ pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser}; pub use from_bytes::{decode_stylesheet_bytes, parse_stylesheet_rules_from_bytes}; pub use color::{RGBA, Color, parse_color_keyword}; pub use nth::parse_nth; -pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenWriter}; +pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string}; pub use parser::{Parser, Delimiter, Delimiters, SourcePosition}; diff --git a/src/serializer.rs b/src/serializer.rs index 56471bf2..0841db86 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -3,7 +3,6 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use std::fmt; -use std::mem; use std::cmp; use std::num::{Float, Int}; @@ -261,81 +260,3 @@ impl<'a, W> TextWriter for CssStringWriter<'a, W> where W: TextWriter { } } } - - -impl<'a> ToCss for [Token<'a>] { - fn to_css(&self, dest: &mut W) -> text_writer::Result where W: TextWriter { - let mut writer = TokenWriter::new(dest); - for token in self.iter() { - try!(writer.write(token)) - } - Ok(()) - } -} - - -pub struct TokenWriter<'i, 'a, W: 'a> { - dest: &'a mut W, - previous_token: Option>, -} - -impl<'i, 'a, W> TokenWriter<'i, 'a, W> where W: TextWriter { - pub fn new<'a>(dest: &'a mut W) -> TokenWriter<'i, 'a, W> { - TokenWriter { - dest: dest, - previous_token: None, - } - } - - pub fn write(&mut self, token: &Token<'i>) -> text_writer::Result { - use Token::*; - let previous = &mem::replace(&mut self.previous_token, Some((*token).clone())) - // A "not special" token: - .unwrap_or(Colon); - let (a, b) = (previous, token); - if ( - matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | - Dimension(..) | Delim('#') | Delim('-') | Number(..)) && - matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..) | - Number(..) | Percentage(..) | Dimension(..) | UnicodeRange(..)) - ) || ( - matches!(*a, Ident(..)) && - matches!(*b, ParenthesisBlock(..)) - ) || ( - matches!(*a, Ident(..) | AtKeyword(..) | Hash(..) | IDHash(..) | Dimension(..)) && - matches!(*b, Delim('-') | CDC) - ) || ( - matches!(*a, Delim('#') | Delim('-') | Number(..) | Delim('@')) && - matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..)) - ) || ( - matches!(*a, Delim('@')) && - matches!(*b, Ident(..) | Function(..) | Url(..) | BadUrl(..) | - UnicodeRange(..) | Delim('-')) - ) || ( - matches!(*a, UnicodeRange(..) | Delim('.') | Delim('+')) && - matches!(*b, Number(..) | Percentage(..) | Dimension(..)) - ) || ( - matches!(*a, UnicodeRange(..)) && - matches!(*b, Ident(..) | Function(..) | Delim('?')) - ) || matches!((a, b), (&Delim(a), &Delim(b)) if matches!((a, b), - ('#', '-') | - ('$', '=') | - ('*', '=') | - ('^', '=') | - ('~', '=') | - ('|', '=') | - ('|', '|') | - ('/', '*') - )) { - try!(self.dest.write_str("/**/")); - } - // Skip whitespace when '\n' was previously written at the previous iteration. - if !matches!((previous, token), (&Delim('\\'), &WhiteSpace(_))) { - try!(token.to_css(self.dest)); - } - if token == &Delim('\\') { - try!(self.dest.write_char('\n')); - } - Ok(()) - } -} diff --git a/src/tests.rs b/src/tests.rs index d25818f8..b4465b75 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -339,27 +339,26 @@ fn nth() { #[test] fn serializer() { run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| { - fn flatten(input: &mut Parser, tokens: &mut Vec>) { - while let Ok(token) = input.next_including_whitespace() { + fn write_to(input: &mut Parser, string: &mut String) { + while let Ok(token) = input.next_including_whitespace_and_comments() { + token.to_css(string).unwrap(); let closing_token = match token { Token::Function(_) | Token::ParenthesisBlock => Some(Token::CloseParenthesis), Token::SquareBracketBlock => Some(Token::CloseSquareBracket), Token::CurlyBracketBlock => Some(Token::CloseCurlyBracket), _ => None }; - tokens.push(token.into_owned()); if let Some(closing_token) = closing_token { input.parse_nested_block(|input| { - flatten(input, tokens); + write_to(input, string); Ok(()) }).unwrap(); - tokens.push(closing_token); + closing_token.to_css(string).unwrap(); } } } - let mut tokens = vec![]; - flatten(input, &mut tokens); - let serialized = tokens.to_css_string(); + let mut serialized = String::new(); + write_to(input, &mut serialized); let parser = &mut Parser::new(serialized.as_slice()); Json::Array(component_values_to_json(parser)) }); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 4d020886..4f3b35f7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -74,7 +74,9 @@ impl<'a> Token<'a> { Token::Percentage(value) => Token::Percentage(value), Token::Dimension(value, unit) => Token::Dimension(value, Cow::Owned(unit.into_owned())), Token::UnicodeRange(start, end) => Token::UnicodeRange(start, end), - Token::WhiteSpace(_) => Token::WhiteSpace(" "), + Token::WhiteSpace(content) => { + Token::WhiteSpace(if matches!(content.as_bytes()[0], b'\n' | b'\r' | b'\x0C') { "\n" } else { " " }) + } Token::Comment(_) => Token::Comment(""), Token::Colon => Token::Colon, Token::Semicolon => Token::Semicolon, From 8d1b3e220e795f7baaa940919059d5f4ef4ec28c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 21 Jan 2015 22:12:08 +0100 Subject: [PATCH 69/69] Add Parser::expect_function_matching --- src/parser.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index b80f99bb..68d3a820 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -475,6 +475,14 @@ impl<'i, 't> Parser<'i, 't> { _ => Err(()) } } + + #[inline] + pub fn expect_function_matching(&mut self, expected_name: &str) -> Result<(), ()> { + match try!(self.next()) { + Token::Function(ref name) if name.eq_ignore_ascii_case(expected_name) => Ok(()), + _ => Err(()) + } + } }