diff --git a/Cargo.toml b/Cargo.toml index ee3fbcb8..5b651714 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "html5ever" -version = "0.5.1" +version = "0.5.2" authors = [ "The html5ever Project Developers" ] license = "MIT / Apache-2.0" repository = "https://github.com/servo/html5ever" @@ -37,7 +37,7 @@ log = "0" phf = "0.7" string_cache = "0.2.0" mac = "0" -tendril = "0.2" +tendril = "0.2.2" heapsize = { version = ">=0.1.1, <0.4", optional = true } heapsize_plugin = { version = "0.1.0", optional = true } diff --git a/src/driver.rs b/src/driver.rs index fcb3c4c2..ec7e35bd 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -56,7 +56,17 @@ pub fn parse_fragment(mut sink: Sink, opts: ParseOpts, -> Parser where Sink: TreeSink { let context_elem = sink.create_element(context_name, context_attrs); - let tb = TreeBuilder::new_for_fragment(sink, context_elem, None, opts.tree_builder); + parse_fragment_for_element(sink, opts, context_elem, None) +} + +/// Like `parse_fragment`, but with an existing context element +/// and optionally a form element. +pub fn parse_fragment_for_element(sink: Sink, opts: ParseOpts, + context_element: Sink::Handle, + form_element: Option) + -> Parser + where Sink: TreeSink { + let tb = TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder); let tok_opts = TokenizerOpts { initial_state: Some(tb.tokenizer_state_for_context_elem()), .. opts.tokenizer @@ -68,7 +78,7 @@ pub fn parse_fragment(mut sink: Sink, opts: ParseOpts, /// An HTML parser, /// ready to recieve Unicode input through the `tendril::TendrilSink` trait’s methods. pub struct Parser where Sink: TreeSink { - tokenizer: Tokenizer>, + pub tokenizer: Tokenizer>, } impl TendrilSink for Parser { @@ -147,6 +157,60 @@ enum BytesParserState where Sink: TreeSink { Transient } +impl BytesParser { + /// Access the underlying Parser + pub fn str_parser(&self) -> &Parser { + match self.state { + BytesParserState::Initial { ref parser } => parser, + BytesParserState::Buffering { ref parser, .. } => parser, + BytesParserState::Parsing { ref decoder } => decoder.inner_sink(), + BytesParserState::Transient => unreachable!(), + } + } + + /// Access the underlying Parser + pub fn str_parser_mut(&mut self) -> &mut Parser { + match self.state { + BytesParserState::Initial { ref mut parser } => parser, + BytesParserState::Buffering { ref mut parser, .. } => parser, + BytesParserState::Parsing { ref mut decoder } => decoder.inner_sink_mut(), + BytesParserState::Transient => unreachable!(), + } + } + + /// Insert a Unicode chunk in the middle of the byte stream. + /// + /// This is e.g. for supporting `document.write`. + pub fn process_unicode(&mut self, t: StrTendril) { + if let BytesParserState::Parsing { ref mut decoder } = self.state { + decoder.inner_sink_mut().process(t) + } else { + match mem::replace(&mut self.state, BytesParserState::Transient) { + BytesParserState::Initial { mut parser } => { + parser.process(t); + self.start_parsing(parser, ByteTendril::new()) + } + BytesParserState::Buffering { parser, buffer } => { + self.start_parsing(parser, buffer); + if let BytesParserState::Parsing { ref mut decoder } = self.state { + decoder.inner_sink_mut().process(t) + } else { + unreachable!() + } + } + BytesParserState::Parsing { .. } | BytesParserState::Transient => unreachable!(), + } + } + } + + fn start_parsing(&mut self, parser: Parser, buffer: ByteTendril) { + let encoding = detect_encoding(&buffer, &self.opts); + let mut decoder = LossyDecoder::new(encoding, parser); + decoder.process(buffer); + self.state = BytesParserState::Parsing { decoder: decoder } + } +} + impl TendrilSink for BytesParser { fn process(&mut self, t: ByteTendril) { if let &mut BytesParserState::Parsing { ref mut decoder } = &mut self.state { @@ -161,10 +225,7 @@ impl TendrilSink for BytesParser { BytesParserState::Parsing { .. } | BytesParserState::Transient => unreachable!(), }; if buffer.len32() >= PRESCAN_BYTES { - let encoding = detect_encoding(&buffer, &self.opts); - let mut decoder = LossyDecoder::new(encoding, parser); - decoder.process(buffer); - self.state = BytesParserState::Parsing { decoder: decoder } + self.start_parsing(parser, buffer) } else { self.state = BytesParserState::Buffering { parser: parser, diff --git a/src/tree_builder/mod.rs b/src/tree_builder/mod.rs index 3c68b1a4..5bbc1027 100644 --- a/src/tree_builder/mod.rs +++ b/src/tree_builder/mod.rs @@ -57,14 +57,7 @@ pub struct TreeBuilderOpts { /// Should we drop the DOCTYPE (if any) from the tree? pub drop_doctype: bool, - /// The ``, ``, and `