diff --git a/Cargo.toml b/Cargo.toml index f413f8e6..211fdce0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "html5ever" -version = "0.4.0" +version = "0.4.1" authors = [ "The html5ever Project Developers" ] license = "MIT / Apache-2.0" repository = "https://github.com/servo/html5ever" diff --git a/src/driver.rs b/src/driver.rs index f94ef0f0..fcb3c4c2 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -162,7 +162,8 @@ impl TendrilSink for BytesParser { }; if buffer.len32() >= PRESCAN_BYTES { let encoding = detect_encoding(&buffer, &self.opts); - let decoder = LossyDecoder::new(encoding, parser); + let mut decoder = LossyDecoder::new(encoding, parser); + decoder.process(buffer); self.state = BytesParserState::Parsing { decoder: decoder } } else { self.state = BytesParserState::Buffering { @@ -188,7 +189,8 @@ impl TendrilSink for BytesParser { BytesParserState::Initial { parser } => parser.finish(), BytesParserState::Buffering { parser, buffer } => { let encoding = detect_encoding(&buffer, &self.opts); - let decoder = LossyDecoder::new(encoding, parser); + let mut decoder = LossyDecoder::new(encoding, parser); + decoder.process(buffer); decoder.finish() }, BytesParserState::Parsing { decoder } => decoder.finish(), @@ -218,3 +220,46 @@ fn detect_encoding(bytes: &ByteTendril, opts: &BytesOpts) -> EncodingRef { // FIXME: etc. return encoding::all::UTF_8 } + +#[cfg(test)] +mod tests { + use rcdom::RcDom; + use serialize::serialize; + use std::iter::repeat; + use tendril::TendrilSink; + use super::*; + + #[test] + fn from_utf8() { + assert_serialization( + parse_document(RcDom::default(), ParseOpts::default()) + .from_utf8() + .one("Test".as_bytes())); + } + + #[test] + fn from_bytes_one() { + assert_serialization( + parse_document(RcDom::default(), ParseOpts::default()) + .from_bytes(BytesOpts::default()) + .one("<title>Test".as_bytes())); + } + + #[test] + fn from_bytes_iter() { + assert_serialization( + parse_document(RcDom::default(), ParseOpts::default()) + .from_bytes(BytesOpts::default()) + .from_iter([ + "<title>Test".as_bytes(), + repeat(' ').take(1200).collect::<String>().as_bytes(), + ].iter().cloned())); + } + + fn assert_serialization(dom: RcDom) { + let mut serialized = Vec::new(); + serialize(&mut serialized, &dom.document, Default::default()).unwrap(); + assert_eq!(String::from_utf8(serialized).unwrap().replace(" ", ""), + "<html><head><title>Test"); + } +}