From b56e1bbc0f1197031d4aad33e54014a33dae029a Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 19 Aug 2015 17:57:22 +0200 Subject: [PATCH 1/2] Pass complete context when parsing fragments The fragment parsing algorithm requires the qualified name and the attributes of the context element. --- src/driver.rs | 29 ++++++++++++++++------------- tests/serializer.rs | 5 ++++- tests/tree_builder.rs | 5 +++-- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/driver.rs b/src/driver.rs index a488d59c..63eb6b65 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -9,13 +9,13 @@ //! High-level interface to the parser. -use tokenizer::{TokenizerOpts, Tokenizer, TokenSink}; +use tokenizer::{Attribute, TokenSink, Tokenizer, TokenizerOpts}; use tree_builder::{TreeBuilderOpts, TreeBuilder, TreeSink}; use std::option; use std::default::Default; -use string_cache::{Atom, QualName}; +use string_cache::QualName; use tendril::StrTendril; /// Convenience function to turn a single value into an iterator. @@ -28,7 +28,7 @@ pub fn one_input(x: T) -> option::IntoIter { /// ## Example /// /// ```ignore -/// let mut sink = MySink; +/// let sink = MySink; /// tokenize_to(&mut sink, one_input(my_str), Default::default()); /// ``` pub fn tokenize_to(sink: Sink, input: It, opts: TokenizerOpts) -> Sink @@ -58,8 +58,8 @@ pub struct ParseOpts { /// ## Example /// /// ```ignore -/// let mut sink = MySink; -/// parse_to(&mut sink, one_input(my_str), Default::default()); +/// let sink = MySink; +/// parse_to(sink, one_input(my_str), Default::default()); /// ``` pub fn parse_to(sink: Sink, input: It, opts: ParseOpts) -> Sink where Sink: TreeSink, @@ -79,18 +79,18 @@ pub fn parse_to(sink: Sink, input: It, opts: ParseOpts) -> Sink /// ## Example /// /// ```ignore -/// let mut sink = MySink; -/// parse_fragment_to(&mut sink, one_input(my_str), context_token, Default::default()); +/// let sink = MySink; +/// parse_fragment_to(sink, one_input(my_str), context_name, context_attrs, Default::default()); /// ``` -pub fn parse_fragment_to(sink: Sink, +pub fn parse_fragment_to(mut sink: Sink, input: It, - context: Atom, + context_name: QualName, + context_attrs: Vec, opts: ParseOpts) -> Sink where Sink: TreeSink, It: Iterator { - let mut sink = sink; - let context_elem = sink.create_element(QualName::new(ns!(HTML), context), vec!()); + let context_elem = sink.create_element(context_name, context_attrs); let tb = TreeBuilder::new_for_fragment(sink, context_elem, None, opts.tree_builder); let tok_opts = TokenizerOpts { initial_state: Some(tb.tokenizer_state_for_context_elem()), @@ -135,10 +135,13 @@ pub fn parse(input: It, opts: ParseOpts) -> Output /// ```ignore /// let dom: RcDom = parse_fragment(one_input(my_str), context_token, Default::default()); /// ``` -pub fn parse_fragment(input: It, context: Atom, opts: ParseOpts) -> Output +pub fn parse_fragment(input: It, + context_name: QualName, + context_attrs: Vec, + opts: ParseOpts) -> Output where Output: ParseResult, It: Iterator, { - let sink = parse_fragment_to(Default::default(), input, context, opts); + let sink = parse_fragment_to(Default::default(), input, context_name, context_attrs, opts); ParseResult::get_result(sink) } diff --git a/tests/serializer.rs b/tests/serializer.rs index 2515de4a..a4570e99 100644 --- a/tests/serializer.rs +++ b/tests/serializer.rs @@ -23,7 +23,10 @@ use html5ever::{parse_fragment, parse, one_input, serialize}; use html5ever::rcdom::RcDom; fn parse_and_serialize(input: StrTendril) -> StrTendril { - let dom: RcDom = parse_fragment(one_input(input), atom!(body), ParseOpts::default()); + let dom: RcDom = parse_fragment(one_input(input), + qualname!(HTML, body), + vec![], + ParseOpts::default()); let inner = &dom.document.borrow().children[0]; let mut result = vec![]; diff --git a/tests/tree_builder.rs b/tests/tree_builder.rs index 19561d44..8340aa33 100644 --- a/tests/tree_builder.rs +++ b/tests/tree_builder.rs @@ -34,7 +34,7 @@ use html5ever::{ParseOpts, parse, parse_fragment, one_input}; use html5ever::rcdom::{Comment, Document, Doctype, Element, Handle, RcDom}; use html5ever::rcdom::{Template, Text}; -use string_cache::Atom; +use string_cache::{Atom, QualName}; use tendril::StrTendril; fn parse_tests>(mut lines: It) -> Vec> { @@ -223,7 +223,8 @@ fn make_test_desc_with_scripting_flag( }, Some(ref context) => { let dom: RcDom = parse_fragment(one_input(data.clone()), - context.clone(), + QualName::new(ns!(HTML), context.clone()), + vec![], opts); // fragment case: serialize children of the html element // rather than children of the document From b4845c204140b7791b8019bc5001ac89601d7c35 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 19 Aug 2015 18:06:56 +0200 Subject: [PATCH 2/2] Properly parse the document-fragment field in tree_builder tests --- data/test/ignore | 20 -------------------- src/driver.rs | 3 ++- tests/tree_builder.rs | 15 +++++++++++++-- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/data/test/ignore b/data/test/ignore index 2873874a..af3dd507 100644 --- a/data/test/ignore +++ b/data/test/ignore @@ -21,26 +21,6 @@ tb: tests19.dat-7 tb: tests19.dat-8 tb: tests19.dat-9 tb: tests2.dat-44 -tb: foreign-fragment.dat-0 -tb: foreign-fragment.dat-1 -tb: foreign-fragment.dat-18 -tb: foreign-fragment.dat-19 -tb: foreign-fragment.dat-2 -tb: foreign-fragment.dat-22 -tb: foreign-fragment.dat-23 -tb: foreign-fragment.dat-26 -tb: foreign-fragment.dat-27 -tb: foreign-fragment.dat-3 -tb: foreign-fragment.dat-30 -tb: foreign-fragment.dat-31 -tb: foreign-fragment.dat-34 -tb: foreign-fragment.dat-35 -tb: foreign-fragment.dat-38 -tb: foreign-fragment.dat-39 -tb: foreign-fragment.dat-40 -tb: foreign-fragment.dat-41 -tb: foreign-fragment.dat-47 -tb: foreign-fragment.dat-48 tb: tests20.dat-34 tb: tests20.dat-35 tb: tests20.dat-36 diff --git a/src/driver.rs b/src/driver.rs index 63eb6b65..0b8cdb70 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -133,7 +133,8 @@ pub fn parse(input: It, opts: ParseOpts) -> Output /// ## Example /// /// ```ignore -/// let dom: RcDom = parse_fragment(one_input(my_str), context_token, Default::default()); +/// let dom: RcDom = parse_fragment( +/// one_input(my_str), context_name, context_attrs, Default::default()); /// ``` pub fn parse_fragment(input: It, context_name: QualName, diff --git a/tests/tree_builder.rs b/tests/tree_builder.rs index 8340aa33..f09486aa 100644 --- a/tests/tree_builder.rs +++ b/tests/tree_builder.rs @@ -193,7 +193,7 @@ fn make_test_desc_with_scripting_flag( data.pop(); let expected = get_field("document"); let context = fields.get("document-fragment") - .map(|field| Atom::from_slice(field.trim_right_matches('\n'))); + .map(|field| context_name(field.trim_right_matches('\n'))); let ignore = ignores.contains(name); let mut name = name.to_owned(); if scripting_enabled { @@ -223,7 +223,7 @@ fn make_test_desc_with_scripting_flag( }, Some(ref context) => { let dom: RcDom = parse_fragment(one_input(data.clone()), - QualName::new(ns!(HTML), context.clone()), + context.clone(), vec![], opts); // fragment case: serialize children of the html element @@ -246,6 +246,17 @@ fn make_test_desc_with_scripting_flag( } } +#[cfg(feature = "unstable")] +fn context_name(context: &str) -> QualName { + if context.starts_with("svg ") { + QualName::new(ns!(SVG), Atom::from_slice(&context[4..])) + } else if context.starts_with("math ") { + QualName::new(ns!(MathML), Atom::from_slice(&context[5..])) + } else { + QualName::new(ns!(HTML), Atom::from_slice(context)) + } +} + #[cfg(feature = "unstable")] fn tests(src_dir: &Path, ignores: &HashSet) -> Vec { let mut tests = vec!();