From aa3b075a94d3a0eebcd8c217a6c4fd7a055e3564 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 23 Aug 2016 15:40:00 +0200 Subject: [PATCH] =?UTF-8?q?Remove=20the=20first=20rule=20if=20it=E2=80=99s?= =?UTF-8?q?=20`@charset`=20in=20"parse=20a=20stylesheet"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per spec change https://drafts.csswg.org/css-syntax/#parse-stylesheet --- Cargo.toml | 2 +- src/css-parsing-tests/rule_list.json | 5 +++ src/css-parsing-tests/stylesheet.json | 7 ++++ src/css-parsing-tests/stylesheet_bytes.json | 36 +++++++-------------- src/parser.rs | 3 ++ src/rules_and_declarations.rs | 14 +++++++- 6 files changed, 41 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 616d7d96..e1be5d7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cssparser" -version = "0.5.7" +version = "0.5.8" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3" diff --git a/src/css-parsing-tests/rule_list.json b/src/css-parsing-tests/rule_list.json index 4edeb146..875978e6 100644 --- a/src/css-parsing-tests/rule_list.json +++ b/src/css-parsing-tests/rule_list.json @@ -6,6 +6,11 @@ "@foo", [["at-rule", "foo", [], null]], +"@charset; @foo", [ + ["at-rule", "charset", [], null], + ["at-rule", "foo", [], null] +], + "@foo bar; \t/* comment */", [["at-rule", "foo", [" ", ["ident", "bar"]], null]], " /**/ @foo bar{[(4", [["at-rule", "foo", diff --git a/src/css-parsing-tests/stylesheet.json b/src/css-parsing-tests/stylesheet.json index 6806d92f..574ad7ac 100644 --- a/src/css-parsing-tests/stylesheet.json +++ b/src/css-parsing-tests/stylesheet.json @@ -6,6 +6,13 @@ "@foo", [["at-rule", "foo", [], null]], +"@charset 4 {} @foo", [["at-rule", "foo", [], null]], + +"@foo; @charset 4 {}", [ + ["at-rule", "foo", [], null], + ["at-rule", "charset", [" ", ["number", "4", 4, "integer"], " "], []] +], + "@foo bar; \t/* comment */", [["at-rule", "foo", [" ", ["ident", "bar"]], null]], " /**/ @foo bar{[(4", [["at-rule", "foo", diff --git a/src/css-parsing-tests/stylesheet_bytes.json b/src/css-parsing-tests/stylesheet_bytes.json index 9f061997..d8b14a1b 100644 --- a/src/css-parsing-tests/stylesheet_bytes.json +++ b/src/css-parsing-tests/stylesheet_bytes.json @@ -41,32 +41,27 @@ {"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "щ", [], null]], +[[["at-rule", "щ", [], null]], "iso-8859-5"], {"css_bytes": "@Charset \"ISO-8859-5\"; @\u00E9", "comment": "@charset has to match an exact byte pattern"}, -[[["at-rule", "Charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "�", [], null]], +[[["at-rule", "�", [], null]], "utf-8"], {"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9", "comment": "@charset has to match an exact byte pattern"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "�", [], null]], +[[["at-rule", "�", [], null]], "utf-8"], {"css_bytes": "@charset 'ISO-8859-5'; @\u00E9", "comment": "@charset has to match an exact byte pattern"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "�", [], null]], +[[["at-rule", "�", [], null]], "utf-8"], {"css_bytes": "@charset \"ISO-8859-5\" ; @\u00E9", "comment": "@charset has to match an exact byte pattern"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"], " "], null], - ["at-rule", "�", [], null]], +[[["at-rule", "�", [], null]], "utf-8"], @@ -79,35 +74,30 @@ {"css_bytes": "@charset \"UTF-16LE\"; @\u00C3\u00A9", "comment": "@charset can only specify ASCII-compatible encodings"}, -[[["at-rule", "charset", [" ", ["string", "UTF-16LE"]], null], - ["at-rule", "é", [], null]], +[[["at-rule", "é", [], null]], "utf-8"], {"css_bytes": "\u00EF\u00BB\u00BF @charset \"ISO-8859-5\"; @\u00E9", "comment": "BOM takes precedence over @charset"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "�", [], null]], +[[["at-rule", "�", [], null]], "utf-8"], {"css_bytes": "\u00EF\u00BB\u00BF @charset \"ISO-8859-5\"; @\u00C3\u00A9", "comment": "BOM takes precedence over @charset"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "é", [], null]], +[[["at-rule", "é", [], null]], "utf-8"], {"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9", "protocol_encoding": " Iso-8859-2", "comment": "Protocol takes precedence over @charset"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "é", [], null]], +[[["at-rule", "é", [], null]], "iso-8859-2"], {"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9", "protocol_encoding": "kamoulox", "comment": "Unknow protocol encoding falls back to @charset"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "щ", [], null]], +[[["at-rule", "щ", [], null]], "iso-8859-5"], @@ -120,15 +110,13 @@ {"css_bytes": "@charset \"ISO-8859-5\"; @\u00E9", "environment_encoding": "ISO-8859-2", "comment": "@character takes precedence over environment"}, -[[["at-rule", "charset", [" ", ["string", "ISO-8859-5"]], null], - ["at-rule", "щ", [], null]], +[[["at-rule", "щ", [], null]], "iso-8859-5"], {"css_bytes": "@charset \"kamoulox\"; @\u00E9", "environment_encoding": "ISO-8859-2", "comment": "@character with unknown encoding falls back to environment encoding"}, -[[["at-rule", "charset", [" ", ["string", "kamoulox"]], null], - ["at-rule", "é", [], null]], +[[["at-rule", "é", [], null]], "iso-8859-2"], {"css_bytes": "@\u00E9", diff --git a/src/parser.rs b/src/parser.rs index 2240c4de..6abe2b34 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -471,6 +471,9 @@ impl<'i, 't> Parser<'i, 't> { if next_byte.is_some() && !self.stop_before.contains(Delimiters::from_byte(next_byte)) { debug_assert!(delimiters.contains(Delimiters::from_byte(next_byte))); self.tokenizer.advance(1); + if next_byte == Some(b'{') { + consume_until_end_of_block(BlockType::CurlyBracket, &mut *self.tokenizer); + } } result } diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index bcd8211b..5dda1b20 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -4,6 +4,7 @@ // https://drafts.csswg.org/css-syntax/#parsing +use std::ascii::AsciiExt; use std::ops::Range; use std::borrow::Cow; use super::{Token, Parser, Delimiter, SourcePosition}; @@ -264,6 +265,7 @@ where P: QualifiedRuleParser + AtRuleParser { pub parser: P, is_stylesheet: bool, + any_rule_so_far: bool, } @@ -285,6 +287,7 @@ where P: QualifiedRuleParser + AtRuleParser { input: input, parser: parser, is_stylesheet: true, + any_rule_so_far: false, } } @@ -300,6 +303,7 @@ where P: QualifiedRuleParser + AtRuleParser { input: input, parser: parser, is_stylesheet: false, + any_rule_so_far: false, } } } @@ -318,9 +322,17 @@ where P: QualifiedRuleParser + AtRuleParser { Ok(Token::WhiteSpace(_)) | Ok(Token::Comment(_)) => {} Ok(Token::CDO) | Ok(Token::CDC) if self.is_stylesheet => {} Ok(Token::AtKeyword(name)) => { - return Some(parse_at_rule(start_position, name, self.input, &mut self.parser)) + let first_stylesheet_rule = self.is_stylesheet && !self.any_rule_so_far; + self.any_rule_so_far = true; + if first_stylesheet_rule && name.eq_ignore_ascii_case("charset") { + let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock; + let _ = self.input.parse_until_after(delimiters, |_input| Ok(())); + } else { + return Some(parse_at_rule(start_position, name, self.input, &mut self.parser)) + } } Ok(_) => { + self.any_rule_so_far = true; self.input.reset(start_position); return Some(parse_qualified_rule(self.input, &mut self.parser) .map_err(|()| start_position..self.input.position()))