From a02008a33a927ae4b66d21f71106e28c80a81279 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 6 Apr 2015 14:28:15 -0700 Subject: [PATCH 1/7] Add some serializer tests --- tests/serializer.rs | 90 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 tests/serializer.rs diff --git a/tests/serializer.rs b/tests/serializer.rs new file mode 100644 index 00000000..6a40b08a --- /dev/null +++ b/tests/serializer.rs @@ -0,0 +1,90 @@ +// Copyright 2015 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(plugin)] +#![plugin(string_cache_plugin)] + +extern crate string_cache; +extern crate html5ever; + +use std::default::Default; +use std::borrow::ToOwned; + +use html5ever::sink::rcdom::RcDom; +use html5ever::driver::ParseOpts; +use html5ever::{parse_fragment, one_input, serialize}; + +fn parse_and_serialize(input: String) -> String { + let dom: RcDom = parse_fragment(one_input(input), atom!(body), ParseOpts::default()); + let inner = &dom.document.borrow().children[0]; + + let mut result = vec![]; + serialize(&mut result, inner, Default::default()).unwrap(); + String::from_utf8(result).unwrap() +} + +macro_rules! test { + ($name:ident, $input:expr, $output:expr) => { + #[test] + fn $name() { + assert_eq!($output, parse_and_serialize($input.to_owned())); + } + }; + + // Shorthand for $output = $input + ($name:ident, $input:expr) => { + test!($name, $input, $input); + }; +} + +test!(empty, r#""#); +test!(smoke_test, r#"

Hello, World!

"#); + +test!(misnest, r#"

Hello!

, World!"#, + r#"

Hello!

, World!"#); + +test!(attr_literal, r#""#); +test!(attr_escape_amp, r#""#); +test!(attr_escape_amp_2, r#""#, r#""#); +test!(attr_escape_nbsp, "", r#""#); +test!(attr_escape_quot, r#""#, r#""#); +test!(attr_escape_several, r#""#, + r#""#); + +test!(text_literal, r#"

"'"

"#); +test!(text_escape_amp, r#"

&

"#); +test!(text_escape_amp_2, r#"

&

"#, r#"

&

"#); +test!(text_escape_nbsp, "

x\u{a0}y

", r#"

x y

"#); +test!(text_escape_lt, r#"

<

"#); +test!(text_escape_gt, r#"

>

"#); +test!(text_escape_gt2, r#"

>

"#, r#"

>

"#); + +test!(script_literal, r#""#); +test!(style_literal, r#""#); +test!(xmp_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"#); +test!(iframe_literal, r#""#); +test!(noembed_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"#); +test!(noframes_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"#); + +test!(pre_lf_0, "
foo bar
"); +test!(pre_lf_1, "
\nfoo bar
", "
foo bar
"); +test!(pre_lf_2, "
\n\nfoo bar
"); + +test!(textarea_lf_0, ""); +test!(textarea_lf_1, "", ""); +test!(textarea_lf_2, ""); + +test!(listing_lf_0, "foo bar"); +test!(listing_lf_1, "\nfoo bar", "foo bar"); +test!(listing_lf_2, "\n\nfoo bar"); + +test!(comment_1, r#"

hi

"#); +test!(comment_2, r#"

hi

"#); +test!(comment_3, r#"

hi

"#); +test!(comment_4, r#"

hi

"#); From 107a20771965c1d67b9fce7ee17db937ca891e49 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 6 Apr 2015 16:33:29 -0700 Subject: [PATCH 2/7] Warning police --- benches/tokenizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/tokenizer.rs b/benches/tokenizer.rs index 86b35116..8da759d4 100644 --- a/benches/tokenizer.rs +++ b/benches/tokenizer.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![feature(box_syntax, std_misc, start, test, convert)] +#![feature(box_syntax, std_misc, start, test)] extern crate test; extern crate html5ever; From 6b83c943ccbbdb033b0704e40665902038e73fb6 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 6 Apr 2015 16:43:48 -0700 Subject: [PATCH 3/7] Partial support for tag and attribute namespaces in serialization Fixes #14. --- src/serialize/mod.rs | 40 +++++++++++++++++++++++++++++++++------- tests/serializer.rs | 8 ++++++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs index 49c8d52a..a1a2f058 100644 --- a/src/serialize/mod.rs +++ b/src/serialize/mod.rs @@ -66,6 +66,18 @@ pub struct Serializer<'wr, Wr:'wr> { stack: Vec, } +fn tagname(name: &QualName) -> Atom { + match name.ns { + ns!(HTML) | ns!(MathML) | ns!(SVG) => (), + ref ns => { + // FIXME(#122) + h5e_warn!("node with weird namespace {:?}", &*ns.0); + } + } + + name.local.clone() +} + impl<'wr, Wr: Write> Serializer<'wr, Wr> { fn new(writer: &'wr mut Wr, opts: SerializeOpts) -> Serializer<'wr, Wr> { Serializer { @@ -104,7 +116,7 @@ impl<'wr, Wr: Write> Serializer<'wr, Wr> { let html_name = match name.ns { ns!(HTML) => Some(name.local.clone()), - _ => panic!("FIXME(#14): Handle qualified tag names"), + _ => None, }; if self.parent().ignore_children { @@ -117,12 +129,27 @@ impl<'wr, Wr: Write> Serializer<'wr, Wr> { } try!(self.writer.write_all(b"<")); - try!(self.writer.write_all(name.local.as_slice().as_bytes())); + try!(self.writer.write_all(tagname(&name).as_bytes())); for (name, value) in attrs { try!(self.writer.write_all(b" ")); - // FIXME(#14): qualified names - assert!(name.ns == ns!("")); - try!(self.writer.write_all(name.local.as_slice().as_bytes())); + + match name.ns { + ns!("") => (), + ns!(XML) => try!(self.writer.write_all(b"xml:")), + ns!(XMLNS) => { + if name.local != atom!(xmlns) { + try!(self.writer.write_all(b"xmlns:")); + } + } + ns!(XLink) => try!(self.writer.write_all(b"xlink:")), + ref ns => { + // FIXME(#122) + h5e_warn!("attr with weird namespace {:?}", &*ns.0); + try!(self.writer.write_all(b"unknown_namespace:")); + } + } + + try!(self.writer.write_all(name.local.as_bytes())); try!(self.writer.write_all(b"=\"")); try!(self.write_escaped(value, true)); try!(self.writer.write_all(b"\"")); @@ -155,9 +182,8 @@ impl<'wr, Wr: Write> Serializer<'wr, Wr> { return Ok(()); } - // FIXME: Handle qualified tag names try!(self.writer.write_all(b"") } diff --git a/tests/serializer.rs b/tests/serializer.rs index 6a40b08a..3e880a04 100644 --- a/tests/serializer.rs +++ b/tests/serializer.rs @@ -88,3 +88,11 @@ test!(comment_1, r#"

hi

"#); test!(comment_2, r#"

hi

"#); test!(comment_3, r#"

hi

"#); test!(comment_4, r#"

hi

"#); + +// FIXME: test serialization of qualified tag/attribute names that can't be +// parsed from HTML + +test!(attr_ns_1, r#""#); +test!(attr_ns_2, r#""#); +test!(attr_ns_3, r#""#); +test!(attr_ns_4, r#""#); From 1e1c0611a8cb40f3ab14a8ac45bc10345652f24a Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 6 Apr 2015 16:49:10 -0700 Subject: [PATCH 4/7] Remove unneeded as_slice --- examples/print-rcdom.rs | 4 ++-- examples/tokenize.rs | 6 +++--- src/tokenizer/mod.rs | 4 ++-- tests/tree_builder.rs | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/print-rcdom.rs b/examples/print-rcdom.rs index dc46164a..25ea51ad 100644 --- a/examples/print-rcdom.rs +++ b/examples/print-rcdom.rs @@ -45,10 +45,10 @@ fn walk(indent: usize, handle: Handle) { Element(ref name, ref attrs) => { assert!(name.ns == ns!(html)); - print!("<{}", name.local.as_slice()); + print!("<{}", name.local); for attr in attrs.iter() { assert!(attr.name.ns == ns!("")); - print!(" {}=\"{}\"", attr.name.local.as_slice(), attr.value); + print!(" {}=\"{}\"", attr.name.local, attr.value); } println!(">"); } diff --git a/examples/tokenize.rs b/examples/tokenize.rs index 5635b038..636a1905 100644 --- a/examples/tokenize.rs +++ b/examples/tokenize.rs @@ -52,12 +52,12 @@ impl TokenSink for TokenPrinter { self.is_char(false); // This is not proper HTML serialization, of course. match tag.kind { - StartTag => print!("TAG : <\x1b[32m{}\x1b[0m", tag.name.as_slice()), - EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name.as_slice()), + StartTag => print!("TAG : <\x1b[32m{}\x1b[0m", tag.name), + EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name), } for attr in tag.attrs.iter() { print!(" \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'", - attr.name.local.as_slice(), attr.value); + attr.name.local, attr.value); } if tag.self_closing { print!(" \x1b[31m/\x1b[0m"); diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 1fc9c153..e47ddc18 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -445,7 +445,7 @@ impl Tokenizer { match self.last_start_tag_name.as_ref() { Some(last) => (self.current_tag_kind == EndTag) - && (self.current_tag_name == last.as_slice()), + && (*self.current_tag_name == **last), None => false, } } @@ -466,7 +466,7 @@ impl Tokenizer { // FIXME: linear time search, do we care? let dup = { let name = &self.current_attr_name[..]; - self.current_tag_attrs.iter().any(|a| a.name.local.as_slice() == name) + self.current_tag_attrs.iter().any(|a| &*a.name.local == name) }; if dup { diff --git a/tests/tree_builder.rs b/tests/tree_builder.rs index c2149987..6285a961 100644 --- a/tests/tree_builder.rs +++ b/tests/tree_builder.rs @@ -116,7 +116,7 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { ns!(MathML) => buf.push_str("math "), _ => (), } - buf.push_str(name.local.as_slice()); + buf.push_str(&*name.local); buf.push_str(">\n"); let mut attrs = attrs.clone(); @@ -133,7 +133,7 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { _ => (), } buf.push_str(&format!("{}=\"{}\"\n", - attr.name.local.as_slice(), attr.value)); + attr.name.local, attr.value)); } } } From 5614706160978e89c2c73ddeb2530d4c1be9db33 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 6 Apr 2015 16:54:32 -0700 Subject: [PATCH 5/7] Remove most special-casing of the for_c build Fixes #103. --- src/lib.rs | 4 ---- src/macros.rs | 21 --------------------- src/serialize/mod.rs | 4 ++-- src/tokenizer/char_ref/mod.rs | 2 +- src/tokenizer/mod.rs | 26 +++++++------------------- src/tree_builder/actions.rs | 4 ++-- src/tree_builder/mod.rs | 9 +-------- src/tree_builder/rules.rs | 2 +- 8 files changed, 14 insertions(+), 58 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 89115c49..8f799172 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,7 +35,6 @@ extern crate libc; #[macro_use] extern crate collections; -#[cfg(not(for_c))] #[macro_use] extern crate log; @@ -52,7 +51,6 @@ extern crate time; pub use tokenizer::Attribute; pub use driver::{one_input, ParseOpts, parse_to, parse_fragment_to, parse, parse_fragment}; -#[cfg(not(for_c))] pub use serialize::serialize; #[macro_use] @@ -67,11 +65,9 @@ mod util { pub mod tokenizer; pub mod tree_builder; -#[cfg(not(for_c))] pub mod serialize; /// Consumers of the parser API. -#[cfg(not(for_c))] pub mod sink { pub mod common; pub mod rcdom; diff --git a/src/macros.rs b/src/macros.rs index 0b49e918..15eeff59 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -30,24 +30,3 @@ macro_rules! time { (result, dt) }} } - -// Disable logging when building without the runtime. -#[cfg(for_c)] -#[macro_use] -mod log { - macro_rules! h5e_log (($($x:tt)*) => (())); - macro_rules! h5e_debug (($($x:tt)*) => (())); - macro_rules! h5e_info (($($x:tt)*) => (())); - macro_rules! h5e_warn (($($x:tt)*) => (())); - macro_rules! h5e_error (($($x:tt)*) => (())); -} - -#[cfg(not(for_c))] -#[macro_use] -mod log { - macro_rules! h5e_log (($($x:tt)*) => (log!($($x)*))); - macro_rules! h5e_debug (($($x:tt)*) => (debug!($($x)*))); - macro_rules! h5e_info (($($x:tt)*) => (info!($($x)*))); - macro_rules! h5e_warn (($($x:tt)*) => (warn!($($x)*))); - macro_rules! h5e_error (($($x:tt)*) => (error!($($x)*))); -} diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs index a1a2f058..57dc91d9 100644 --- a/src/serialize/mod.rs +++ b/src/serialize/mod.rs @@ -71,7 +71,7 @@ fn tagname(name: &QualName) -> Atom { ns!(HTML) | ns!(MathML) | ns!(SVG) => (), ref ns => { // FIXME(#122) - h5e_warn!("node with weird namespace {:?}", &*ns.0); + warn!("node with weird namespace {:?}", &*ns.0); } } @@ -144,7 +144,7 @@ impl<'wr, Wr: Write> Serializer<'wr, Wr> { ns!(XLink) => try!(self.writer.write_all(b"xlink:")), ref ns => { // FIXME(#122) - h5e_warn!("attr with weird namespace {:?}", &*ns.0); + warn!("attr with weird namespace {:?}", &*ns.0); try!(self.writer.write_all(b"unknown_namespace:")); } } diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs index 25b1c373..acbb0968 100644 --- a/src/tokenizer/char_ref/mod.rs +++ b/src/tokenizer/char_ref/mod.rs @@ -119,7 +119,7 @@ impl CharRefTokenizer { return Done; } - h5e_debug!("char ref tokenizer stepping in state {:?}", self.state); + debug!("char ref tokenizer stepping in state {:?}", self.state); match self.state { Begin => self.do_begin(tokenizer), Octothorpe => self.do_octothorpe(tokenizer), diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index e47ddc18..ff0ea49f 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -176,10 +176,6 @@ pub struct Tokenizer { impl Tokenizer { /// Create a new tokenizer which feeds tokens to a particular `TokenSink`. pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer { - if opts.profile && cfg!(for_c) { - panic!("Can't profile tokenizer when built as a C library"); - } - let start_tag_name = opts.last_start_tag_name.take() .map(|s| Atom::from_slice(&s)); let state = opts.initial_state.unwrap_or(states::Data); @@ -269,13 +265,11 @@ impl Tokenizer { n if (n & 0xFFFE) == 0xFFFE => true, _ => false, } { - // format_if!(true) will still use the static error when built for C. - let msg = format_if!(true, "Bad character", - "Bad character {}", c); - self.emit_error(msg); + let msg = format!("Bad character {}", c); + self.emit_error(Cow::Owned(msg)); } - h5e_debug!("got character {}", c); + debug!("got character {}", c); self.current_char = c; Some(c) } @@ -302,7 +296,7 @@ impl Tokenizer { } let d = self.input_buffers.pop_except_from(set); - h5e_debug!("got characters {:?}", d); + debug!("got characters {:?}", d); match d { Some(FromSet(c)) => self.get_preprocessed_char(c).map(|x| FromSet(x)), @@ -568,7 +562,7 @@ macro_rules! shorthand ( // so it's behind a cfg flag. #[cfg(trace_tokenizer)] macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ({ - h5e_debug!(" {:s}", stringify!($($cmds)*)); + debug!(" {:s}", stringify!($($cmds)*)); shorthand!($me:expr : $($cmds)*); })); @@ -644,7 +638,7 @@ impl Tokenizer { return self.step_char_ref_tokenizer(); } - h5e_debug!("processing in state {:?}", self.state); + debug!("processing in state {:?}", self.state); match self.state { // Reachable only through `query_state_change`. The tree builder wants // the tokenizer to suspend processing. @@ -1245,12 +1239,6 @@ impl Tokenizer { } } - #[cfg(for_c)] - fn dump_profile(&self) { - unreachable!(); - } - - #[cfg(not(for_c))] fn dump_profile(&self) { let mut results: Vec<(states::State, u64)> = self.state_profile.iter().map(|(s, t)| (*s, *t)).collect(); @@ -1268,7 +1256,7 @@ impl Tokenizer { } fn eof_step(&mut self) -> bool { - h5e_debug!("processing EOF in state {:?}", self.state); + debug!("processing EOF in state {:?}", self.state); match self.state { states::Data | states::RawData(Rcdata) | states::RawData(Rawtext) | states::RawData(ScriptData) | states::Plaintext | states::Quiescent diff --git a/src/tree_builder/actions.rs b/src/tree_builder/actions.rs index 5364abe3..60f4d786 100644 --- a/src/tree_builder/actions.rs +++ b/src/tree_builder/actions.rs @@ -168,7 +168,7 @@ impl TreeBuilderActions } fn stop_parsing(&mut self) -> ProcessResult { - h5e_warn!("stop_parsing not implemented, full speed ahead!"); + warn!("stop_parsing not implemented, full speed ahead!"); Done } @@ -660,7 +660,7 @@ impl TreeBuilderActions } fn foster_parent_in_body(&mut self, token: Token) -> ProcessResult { - h5e_warn!("foster parenting not implemented"); + warn!("foster parenting not implemented"); self.foster_parenting = true; let res = self.step(InBody, token); // FIXME: what if res is Reprocess? diff --git a/src/tree_builder/mod.rs b/src/tree_builder/mod.rs index e18b80c2..2e304803 100644 --- a/src/tree_builder/mod.rs +++ b/src/tree_builder/mod.rs @@ -279,8 +279,6 @@ impl TreeBuilder self.context_elem.as_ref().map(|h| tracer.trace_handle(h.clone())); } - // Debug helper - #[cfg(not(for_c))] #[allow(dead_code)] fn dump_state(&self, label: String) { use string_cache::QualName; @@ -311,14 +309,9 @@ impl TreeBuilder println!(""); } - #[cfg(for_c)] - fn debug_step(&self, _mode: InsertionMode, _token: &Token) { - } - - #[cfg(not(for_c))] fn debug_step(&self, mode: InsertionMode, token: &Token) { use util::str::to_escaped_string; - h5e_debug!("processing {} in insertion mode {:?}", to_escaped_string(token), mode); + debug!("processing {} in insertion mode {:?}", to_escaped_string(token), mode); } fn process_to_completion(&mut self, mut token: Token) { diff --git a/src/tree_builder/rules.rs b/src/tree_builder/rules.rs index 94578a41..0bb30e14 100644 --- a/src/tree_builder/rules.rs +++ b/src/tree_builder/rules.rs @@ -725,7 +725,7 @@ impl TreeBuilderStep tag @ => { let node = self.pop(); if tag.name == atom!(script) { - h5e_warn!("FIXME: not fully implemented"); + warn!("FIXME: not fully implemented"); if self.sink.complete_script(node) == NextParserState::Suspend { self.next_tokenizer_state = Some(Quiescent); } From 5293725a434bcd6edde1474f55d60a08617c03e3 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 6 Apr 2015 17:04:56 -0700 Subject: [PATCH 6/7] Switch back to using std Fixes #63. --- src/driver.rs | 7 ++----- src/for_c/common.rs | 9 ++------- src/for_c/tokenizer.rs | 9 +++------ src/lib.rs | 16 +--------------- src/serialize/mod.rs | 5 +---- src/sink/common.rs | 2 -- src/sink/owned_dom.rs | 14 ++++---------- src/sink/rcdom.rs | 10 +++------- src/tokenizer/buffer_queue.rs | 9 ++------- src/tokenizer/char_ref/data.rs | 2 -- src/tokenizer/char_ref/mod.rs | 5 +---- src/tokenizer/interface.rs | 6 ------ src/tokenizer/mod.rs | 15 ++------------- src/tokenizer/states.rs | 2 -- src/tree_builder/actions.rs | 15 +++++---------- src/tree_builder/data.rs | 4 ---- src/tree_builder/interface.rs | 4 ---- src/tree_builder/mod.rs | 10 +++------- src/tree_builder/rules.rs | 5 +---- src/tree_builder/types.rs | 4 ---- src/util/smallcharset.rs | 6 +----- src/util/str.rs | 11 +++-------- 22 files changed, 34 insertions(+), 136 deletions(-) diff --git a/src/driver.rs b/src/driver.rs index d81e9897..6fb359de 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -9,14 +9,11 @@ //! High-level interface to the parser. -use core::prelude::*; - use tokenizer::{TokenizerOpts, Tokenizer, TokenSink}; use tree_builder::{TreeBuilderOpts, TreeBuilder, TreeSink}; -use core::default::Default; -use core::option; -use collections::string::String; +use std::option; +use std::default::Default; use string_cache::{Atom, QualName}; diff --git a/src/for_c/common.rs b/src/for_c/common.rs index d469a484..03a7d74e 100644 --- a/src/for_c/common.rs +++ b/src/for_c/common.rs @@ -7,14 +7,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::prelude::*; - -use core::ptr; -use core::slice; -use core::str; -use core::marker::PhantomData; +use std::{ptr, slice, str}; +use std::marker::PhantomData; use std::borrow::Cow; -use collections::string::String; use libc::{size_t, c_int, c_char, strlen}; diff --git a/src/for_c/tokenizer.rs b/src/for_c/tokenizer.rs index bdb02fb0..dac46cbf 100644 --- a/src/for_c/tokenizer.rs +++ b/src/for_c/tokenizer.rs @@ -9,18 +9,15 @@ #![allow(non_camel_case_types)] -use core::prelude::*; - use for_c::common::{LifetimeBuf, AsLifetimeBuf, h5e_buf, c_bool}; use tokenizer::{TokenSink, Token, Doctype, Tag, ParseError, DoctypeToken}; use tokenizer::{CommentToken, CharacterTokens, NullCharacterToken}; use tokenizer::{TagToken, StartTag, EndTag, EOFToken, Tokenizer}; -use core::mem; -use core::default::Default; -use alloc::boxed::Box; -use collections::String; +use std::mem; +use std::default::Default; + use libc::{c_void, c_int, size_t}; #[repr(C)] diff --git a/src/lib.rs b/src/lib.rs index 8f799172..a2ae4936 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name="html5ever"] #![crate_type="dylib"] -#![feature(plugin, box_syntax, no_std, core, collections, alloc, str_char, slice_patterns)] +#![feature(plugin, box_syntax, core, collections, alloc, str_char, slice_patterns)] #![deny(warnings)] #![allow(unused_parens)] @@ -18,23 +18,9 @@ #![plugin(string_cache_plugin)] #![plugin(html5ever_macros)] -// FIXME(#63): switch back to using std -#![no_std] - -extern crate alloc; - -#[macro_use] -extern crate core; - -#[macro_use] -extern crate std; - #[cfg(for_c)] extern crate libc; -#[macro_use] -extern crate collections; - #[macro_use] extern crate log; diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs index 57dc91d9..6501ca18 100644 --- a/src/serialize/mod.rs +++ b/src/serialize/mod.rs @@ -7,11 +7,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::prelude::*; - use std::io::{self, Write}; -use core::default::Default; -use collections::vec::Vec; +use std::default::Default; use string_cache::{Atom, QualName}; diff --git a/src/sink/common.rs b/src/sink/common.rs index 54b1f7bd..b6e14b61 100644 --- a/src/sink/common.rs +++ b/src/sink/common.rs @@ -9,8 +9,6 @@ use tokenizer::Attribute; -use collections::vec::Vec; -use collections::string::String; use string_cache::QualName; pub use self::NodeEnum::{Document, Doctype, Text, Comment, Element}; diff --git a/src/sink/owned_dom.rs b/src/sink/owned_dom.rs index 320edad5..670b5fbb 100644 --- a/src/sink/owned_dom.rs +++ b/src/sink/owned_dom.rs @@ -19,8 +19,6 @@ //! This is believed to be memory safe, but if you want to be extra //! careful you can use `RcDom` instead. -use core::prelude::*; - use sink::common::{NodeEnum, Document, Doctype, Text, Comment, Element}; use tokenizer::Attribute; @@ -31,14 +29,10 @@ use serialize::TraversalScope; use serialize::TraversalScope::{IncludeNode, ChildrenOnly}; use driver::ParseResult; -use core::cell::UnsafeCell; -use core::default::Default; -use core::mem::transmute; -use core::mem; -use core::ptr; -use alloc::boxed::Box; -use collections::vec::Vec; -use collections::string::String; +use std::{mem, ptr}; +use std::cell::UnsafeCell; +use std::default::Default; +use std::mem::transmute; use std::borrow::Cow; use std::io::{self, Write}; use std::collections::HashSet; diff --git a/src/sink/rcdom.rs b/src/sink/rcdom.rs index ed6ec00b..67f4296e 100644 --- a/src/sink/rcdom.rs +++ b/src/sink/rcdom.rs @@ -12,8 +12,6 @@ //! This is sufficient as a static parse tree, but don't build a //! web browser using it. :) -use core::prelude::*; - use sink::common::{NodeEnum, Document, Doctype, Text, Comment, Element}; use tokenizer::Attribute; @@ -24,11 +22,9 @@ use serialize::TraversalScope; use serialize::TraversalScope::{IncludeNode, ChildrenOnly}; use driver::ParseResult; -use core::cell::RefCell; -use core::default::Default; -use alloc::rc::{Rc, Weak}; -use collections::vec::Vec; -use collections::string::String; +use std::cell::RefCell; +use std::default::Default; +use std::rc::{Rc, Weak}; use std::borrow::Cow; use std::io::{self, Write}; use std::ops::DerefMut; diff --git a/src/tokenizer/buffer_queue.rs b/src/tokenizer/buffer_queue.rs index b3efd478..791e99ae 100644 --- a/src/tokenizer/buffer_queue.rs +++ b/src/tokenizer/buffer_queue.rs @@ -7,14 +7,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::prelude::*; - use util::str::AsciiCast; use util::smallcharset::SmallCharSet; -use core::str::CharRange; -use collections::string::String; -use collections::VecDeque; +use std::str::CharRange; +use std::collections::VecDeque; pub use self::SetResult::{FromSet, NotFromSet}; @@ -177,8 +174,6 @@ impl BufferQueue { #[cfg(test)] #[allow(non_snake_case)] mod test { - use core::prelude::*; - use collections::string::String; use super::{BufferQueue, FromSet, NotFromSet}; #[test] diff --git a/src/tokenizer/char_ref/data.rs b/src/tokenizer/char_ref/data.rs index 462a8063..aa2a3624 100644 --- a/src/tokenizer/char_ref/data.rs +++ b/src/tokenizer/char_ref/data.rs @@ -7,8 +7,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::prelude::*; - use phf::Map; /// The spec replaces most characters in the ISO-2022 C1 control code range diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs index acbb0968..5141de4f 100644 --- a/src/tokenizer/char_ref/mod.rs +++ b/src/tokenizer/char_ref/mod.rs @@ -7,15 +7,12 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::prelude::*; - use super::{Tokenizer, TokenSink}; use util::str::{is_ascii_alnum, empty_str}; -use core::char::from_u32; +use std::char::from_u32; use std::borrow::Cow::Borrowed; -use collections::string::String; pub use self::Status::*; use self::State::*; diff --git a/src/tokenizer/interface.rs b/src/tokenizer/interface.rs index 1eb91f00..28123a67 100644 --- a/src/tokenizer/interface.rs +++ b/src/tokenizer/interface.rs @@ -7,15 +7,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::option::Option::{self, None}; -use core::clone::Clone; - use tokenizer::states; -use collections::vec::Vec; -use collections::string::String; use std::borrow::Cow; -use std::marker::Send; use string_cache::{Atom, QualName}; diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index ff0ea49f..476db92e 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -11,11 +11,6 @@ #![allow(unused_imports)] -use core::clone::Clone; -use core::cmp::Ord; -use core::iter::Iterator; -use core::option::Option::{self, Some, None}; - pub use self::interface::{Doctype, Attribute, TagKind, StartTag, EndTag, Tag}; pub use self::interface::{Token, DoctypeToken, TagToken, CommentToken}; pub use self::interface::{CharacterTokens, NullCharacterToken, EOFToken, ParseError}; @@ -34,11 +29,8 @@ use self::buffer_queue::{BufferQueue, SetResult, FromSet, NotFromSet}; use util::str::{lower_ascii, lower_ascii_letter, empty_str}; use util::smallcharset::SmallCharSet; -use core::mem::replace; -use core::default::Default; -use alloc::boxed::Box; -use collections::vec::Vec; -use collections::string::{String, ToString}; +use std::mem::replace; +use std::default::Default; use std::borrow::Cow::{self, Borrowed}; use std::collections::BTreeMap; @@ -1329,9 +1321,6 @@ impl Tokenizer { #[cfg(test)] #[allow(non_snake_case)] mod test { - use core::prelude::*; - use collections::vec::Vec; - use collections::string::String; use super::{option_push, append_strings}; // private items #[test] diff --git a/src/tokenizer/states.rs b/src/tokenizer/states.rs index f45d1cd5..1f04075c 100644 --- a/src/tokenizer/states.rs +++ b/src/tokenizer/states.rs @@ -12,8 +12,6 @@ //! This is public for use by the tokenizer tests. Other library //! users should not have to care about this. -use core::prelude::*; - pub use self::ScriptEscapeKind::*; pub use self::DoctypeIdKind::*; pub use self::RawKind::*; diff --git a/src/tree_builder/actions.rs b/src/tree_builder/actions.rs index 60f4d786..25bc99ca 100644 --- a/src/tree_builder/actions.rs +++ b/src/tree_builder/actions.rs @@ -12,8 +12,6 @@ //! Many of these are named within the spec, e.g. "reset the insertion //! mode appropriately". -use core::prelude::*; - use tree_builder::types::*; use tree_builder::tag_sets::*; use tree_builder::interface::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText}; @@ -24,12 +22,9 @@ use tokenizer::states::{RawData, RawKind}; use util::str::{AsciiExt, to_escaped_string}; -use core::mem::replace; -use core::iter::{Rev, Enumerate}; -use core::slice; -use core::fmt::Debug; -use collections::vec::Vec; -use collections::string::String; +use std::{slice, fmt}; +use std::mem::replace; +use std::iter::{Rev, Enumerate}; use std::borrow::Cow::Borrowed; use string_cache::{Atom, Namespace, QualName}; @@ -62,7 +57,7 @@ enum Bookmark { // These go in a trait so that we can control visibility. pub trait TreeBuilderActions { - fn unexpected(&mut self, thing: &T) -> ProcessResult; + fn unexpected(&mut self, thing: &T) -> ProcessResult; fn assert_named(&mut self, node: Handle, name: Atom); fn clear_active_formatting_to_marker(&mut self); fn create_formatting_element_for(&mut self, tag: Tag) -> Handle; @@ -131,7 +126,7 @@ impl TreeBuilderActions where Handle: Clone, Sink: TreeSink, { - fn unexpected(&mut self, _thing: &T) -> ProcessResult { + fn unexpected(&mut self, _thing: &T) -> ProcessResult { self.sink.parse_error(format_if!( self.opts.exact_errors, "Unexpected token", diff --git a/src/tree_builder/data.rs b/src/tree_builder/data.rs index 36818639..a38f2f9a 100644 --- a/src/tree_builder/data.rs +++ b/src/tree_builder/data.rs @@ -7,14 +7,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::prelude::*; - use tokenizer::Doctype; use tree_builder::interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks}; use util::str::AsciiExt; -use collections::string::String; - // These should all be lowercase, for ASCII-case-insensitive matching. static QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", diff --git a/src/tree_builder/interface.rs b/src/tree_builder/interface.rs index 4b948c35..a6086af5 100644 --- a/src/tree_builder/interface.rs +++ b/src/tree_builder/interface.rs @@ -10,12 +10,8 @@ //! The interface for consumers of the tree builder (and thus the //! parser overall). -use core::prelude::*; - use tokenizer::Attribute; -use collections::vec::Vec; -use collections::string::String; use std::borrow::Cow; use string_cache::QualName; diff --git a/src/tree_builder/mod.rs b/src/tree_builder/mod.rs index 2e304803..3d9e88b5 100644 --- a/src/tree_builder/mod.rs +++ b/src/tree_builder/mod.rs @@ -9,8 +9,6 @@ //! The HTML5 tree builder. -use core::prelude::*; - pub use self::interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks}; pub use self::interface::{NodeOrText, AppendNode, AppendText}; pub use self::interface::{TreeSink, Tracer, NextParserState}; @@ -28,12 +26,10 @@ use tokenizer::states as tok_state; use util::str::{is_ascii_whitespace, char_run}; -use core::default::Default; -use core::mem::replace; -use collections::vec::Vec; -use collections::string::String; +use std::default::Default; +use std::mem::replace; use std::borrow::Cow::Borrowed; -use collections::VecDeque; +use std::collections::VecDeque; #[macro_use] mod tag_sets; // "pub" is a workaround for rust#18241 (?) diff --git a/src/tree_builder/rules.rs b/src/tree_builder/rules.rs index 0bb30e14..ce01bfab 100644 --- a/src/tree_builder/rules.rs +++ b/src/tree_builder/rules.rs @@ -9,8 +9,6 @@ //! The tree builder rules, as a single, enormous nested match expression. -use core::prelude::*; - use tree_builder::types::*; use tree_builder::tag_sets::*; use tree_builder::actions::TreeBuilderActions; @@ -21,8 +19,7 @@ use tokenizer::states::{Rcdata, Rawtext, ScriptData, Plaintext, Quiescent}; use util::str::{AsciiExt, is_ascii_whitespace}; -use core::mem::replace; -use collections::string::String; +use std::mem::replace; use std::borrow::Cow::Borrowed; use std::borrow::ToOwned; diff --git a/src/tree_builder/types.rs b/src/tree_builder/types.rs index dfca9ce7..5ed8ab12 100644 --- a/src/tree_builder/types.rs +++ b/src/tree_builder/types.rs @@ -9,12 +9,8 @@ //! Types used within the tree builder code. Not exported to users. -use core::prelude::*; - use tokenizer::Tag; -use collections::string::String; - pub use self::InsertionMode::*; pub use self::SplitStatus::*; pub use self::Token::*; diff --git a/src/util/smallcharset.rs b/src/util/smallcharset.rs index 53f07e1d..673fd2b3 100644 --- a/src/util/smallcharset.rs +++ b/src/util/smallcharset.rs @@ -7,8 +7,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::prelude::*; - /// Represents a set of "small characters", those with Unicode scalar /// values less than 64. pub struct SmallCharSet { @@ -45,9 +43,7 @@ macro_rules! small_char_set ( ($($e:expr)+) => ( #[cfg(test)] mod test { - use core::prelude::*; - use core::iter::repeat; - use collections::string::String; + use std::iter::repeat; #[test] fn nonmember_prefix() { diff --git a/src/util/str.rs b/src/util/str.rs index 892d9707..70a5c19b 100644 --- a/src/util/str.rs +++ b/src/util/str.rs @@ -7,14 +7,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::prelude::*; +use std::fmt; -use collections::vec::Vec; -use collections::string::String; -use core::fmt::Debug; - -pub fn to_escaped_string(x: &T) -> String { - use core::fmt::Write; +pub fn to_escaped_string(x: &T) -> String { + use std::fmt::Write; // FIXME: don't allocate twice let mut buf = String::new(); @@ -206,7 +202,6 @@ pub fn char_run(mut pred: Pred, buf: &str) -> Option<(usize, bool)> #[cfg(test)] #[allow(non_snake_case)] mod test { - use core::prelude::*; use super::{char_run, is_ascii_whitespace, is_ascii_alnum, lower_ascii, lower_ascii_letter}; test_eq!(lower_letter_a_is_a, lower_ascii_letter('a'), Some('a')); From bf2124710be7fb8b7ff092ed1b8a66495e713a29 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sun, 19 Apr 2015 13:03:35 -0700 Subject: [PATCH 7/7] Build the C bindings via Cargo Fixes #95. --- .gitignore | 5 +- Makefile.in | 65 -------------------------- README.md | 11 ++--- STRUCTURE.md | 4 +- capi/Cargo.toml | 20 ++++++++ capi/{ => include}/html5ever.h | 0 src/for_c/common.rs => capi/src/lib.rs | 6 ++- {src/for_c => capi/src}/tokenizer.rs | 8 ++-- configure | 4 -- scripts/travis-build.sh | 13 ++++-- src/lib.rs | 9 ---- 11 files changed, 43 insertions(+), 102 deletions(-) delete mode 100644 Makefile.in create mode 100644 capi/Cargo.toml rename capi/{ => include}/html5ever.h (100%) rename src/for_c/common.rs => capi/src/lib.rs (94%) rename {src/for_c => capi/src}/tokenizer.rs (94%) delete mode 100755 configure diff --git a/.gitignore b/.gitignore index 0fdc5e52..7fc3c6bc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -/build /data/bench/uncommitted -/target -/Cargo.lock +target +Cargo.lock diff --git a/Makefile.in b/Makefile.in deleted file mode 100644 index 01729d76..00000000 --- a/Makefile.in +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2014 The html5ever Project Developers. See the -# COPYRIGHT file at the top-level directory of this distribution. -# -# Licensed under the Apache License, Version 2.0 or the MIT license -# , at your -# option. This file may not be copied, modified, or distributed -# except according to those terms. - -VPATH := %VPATH% - -RUSTC ?= rustc -RUST_DIRS := -L $(VPATH)/target/debug -L $(VPATH)/target/debug/deps - -RUSTC_CMD := $(RUSTC) -D warnings -C rpath $(RUST_DIRS) \ - --extern time=`find $(VPATH)/target/debug/deps -name 'libtime-*.rlib'` \ - --extern log=`find $(VPATH)/target/debug/deps -name 'liblog-*.rlib'` \ - --extern libc=`find $(VPATH)/target/debug/deps -name 'liblibc-*.rlib'` \ - $(RUSTFLAGS) - -# We build the library itself using Cargo. -CARGO_SOURCES := $(shell find $(VPATH)/src $(VPATH)/macros/src -type f -name '*.rs') - -LIB := libhtml5ever.dummy - -.PHONY: all -all: $(LIB) - -.PHONY: examples -examples: - (cd $(VPATH) && cargo test --no-run) - -$(LIB): $(CARGO_SOURCES) - (cd $(VPATH) && cargo build) - touch $(LIB) - -.PHONY: for_c -for_c: libhtml5ever_for_c.a - -libhtml5ever_for_c.a: $(LIB) $(CARGO_SOURCES) - $(RUSTC_CMD) -o $@ --cfg for_c --crate-type staticlib $(VPATH)/src/lib.rs - -.PHONY: check -check: check-build - (cd $(VPATH) && cargo test) - -.PHONY: check-build -check-build: all examples check-build-bench - -.PHONY: check-build-bench -check-build-bench: - (cd $(VPATH) && cargo bench --no-run) - -.PHONY: bench -bench: - (cd $(VPATH) && cargo bench) - -.PHONY: clean -clean: - (cd $(VPATH) && cargo clean) - rm -f *.o *.a *.so *.dylib *.dll *.dummy - -.PHONY: docs -docs: - (cd $(VPATH) && cargo doc) diff --git a/README.md b/README.md index a6c64001..8a4c9e6b 100644 --- a/README.md +++ b/README.md @@ -33,18 +33,13 @@ Bindings for Python and other languages are much desired. ## Working on html5ever -To build examples and tests, do something like +To fetch the test suite, you need to run ``` -git submodule update --init # to fetch html5lib-tests -mkdir build && cd build -../configure -make examples check bench +git submodule update --init ``` -This will invoke Cargo when necessary. - -Run `cargo doc` in the repository root (or `make docs` in the build directory) to build local documentation under `target/doc/`. +Run `cargo doc` in the repository root to build local documentation under `target/doc/`. ## Details diff --git a/STRUCTURE.md b/STRUCTURE.md index 8a4b2bac..fd30013a 100644 --- a/STRUCTURE.md +++ b/STRUCTURE.md @@ -14,11 +14,9 @@ The module structure is also documented in the output produced by `cargo doc`, a `src/sink/`: Types that html5ever can use to represent the DOM, if you do not provide your own DOM implementation. -`src/for_c/`: Implementation of the C API for html5ever (as yet incomplete) - `macros/`: Rust syntax extensions used within html5ever. Users of the library do not need this crate. -`capi/html5ever.h`: C header for the C API +`capi/`: Implementation of the C API for html5ever (as yet incomplete) `tests/`: Integration tests. This is a single executable crate that runs html5ever on the various [html5lib-tests](https://github.com/html5lib/html5lib-tests). There are also unit tests throughout the library code. See `README.md` for information on running tests. diff --git a/capi/Cargo.toml b/capi/Cargo.toml new file mode 100644 index 00000000..d665d752 --- /dev/null +++ b/capi/Cargo.toml @@ -0,0 +1,20 @@ +[package] + +name = "html5ever_capi" +version = "0.0.0" +authors = [ "The html5ever Project Developers" ] + +[lib] +name = "html5ever_capi" +crate-type = ["staticlib"] + +[dependencies] +libc = "0" + +[dependencies.html5ever] +path = "../" + +[dependencies.string_cache] +git = "https://github.com/servo/string-cache" +[dependencies.string_cache_plugin] +git = "https://github.com/servo/string-cache" diff --git a/capi/html5ever.h b/capi/include/html5ever.h similarity index 100% rename from capi/html5ever.h rename to capi/include/html5ever.h diff --git a/src/for_c/common.rs b/capi/src/lib.rs similarity index 94% rename from src/for_c/common.rs rename to capi/src/lib.rs index 03a7d74e..2f1dbc62 100644 --- a/src/for_c/common.rs +++ b/capi/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2014 The html5ever Project Developers. See the +// Copyright 2014-2015 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 Makefile diff --git a/scripts/travis-build.sh b/scripts/travis-build.sh index 8a0148c1..7d927ac9 100755 --- a/scripts/travis-build.sh +++ b/scripts/travis-build.sh @@ -10,8 +10,11 @@ set -ex -mkdir build -cd build -../configure -make check docs for_c | ../scripts/shrink-test-output.py -exit ${PIPESTATUS[0]} +cargo doc +cargo test --no-run +cargo test | ./scripts/shrink-test-output.py +r=${PIPESTATUS[0]} +if [ $r -ne 0 ]; then exit $r; fi + +cd capi +cargo test diff --git a/src/lib.rs b/src/lib.rs index a2ae4936..ec94ce6b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,9 +18,6 @@ #![plugin(string_cache_plugin)] #![plugin(html5ever_macros)] -#[cfg(for_c)] -extern crate libc; - #[macro_use] extern crate log; @@ -61,9 +58,3 @@ pub mod sink { } pub mod driver; - -#[cfg(for_c)] -pub mod for_c { - pub mod common; - pub mod tokenizer; -}