From 1babdb823dddbed777e90de179b75c37801920ea Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Wed, 11 Mar 2015 13:38:18 -0400 Subject: [PATCH] Add a quiescent state to the tokenizer to allow interrupting the parser and resuming it later. --- src/tokenizer/interface.rs | 2 +- src/tokenizer/mod.rs | 21 +++++++++++++-------- src/tokenizer/states.rs | 1 + src/tree_builder/interface.rs | 13 ++++++++++++- src/tree_builder/mod.rs | 2 +- src/tree_builder/rules.rs | 8 +++++--- 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/tokenizer/interface.rs b/src/tokenizer/interface.rs index d06094a2..77f63f37 100644 --- a/src/tokenizer/interface.rs +++ b/src/tokenizer/interface.rs @@ -108,7 +108,7 @@ pub trait TokenSink { /// Process a token. fn process_token(&mut self, token: Token); - /// The tokenizer will call this after emitting any start tag. + /// The tokenizer will call this after emitting any tag. /// This allows the tree builder to change the tokenizer's state. /// By default no state changes occur. fn query_state_change(&mut self) -> Option { diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index c923a427..0a2addc9 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -328,8 +328,8 @@ impl Tokenizer { } } - // Run the state machine for as long as we can. - fn run(&mut self) { + /// Run the state machine for as long as we can. + pub fn run(&mut self) { if self.opts.profile { loop { let state = self.state; @@ -410,11 +410,9 @@ impl Tokenizer { }); self.process_token(token); - if self.current_tag_kind == StartTag { - match self.sink.query_state_change() { - None => (), - Some(s) => self.state = s, - } + match self.sink.query_state_change() { + None => (), + Some(s) => self.state = s, } } @@ -651,6 +649,13 @@ impl Tokenizer { h5e_debug!("processing in state {:?}", self.state); match self.state { + // Reachable only through `query_state_change`. The tree builder wants + // the tokenizer to suspend processing. + states::Quiescent => { + self.state = states::Data; + return false; + } + //ยง data-state states::Data => loop { match pop_except_from!(self, small_char_set!('\r' '\0' '&' '<')) { @@ -1271,7 +1276,7 @@ impl Tokenizer { h5e_debug!("processing EOF in state {:?}", self.state); match self.state { states::Data | states::RawData(Rcdata) | states::RawData(Rawtext) - | states::RawData(ScriptData) | states::Plaintext + | states::RawData(ScriptData) | states::Plaintext | states::Quiescent => go!(self: eof), states::TagName | states::RawData(ScriptDataEscaped(_)) diff --git a/src/tokenizer/states.rs b/src/tokenizer/states.rs index e99ad033..f45d1cd5 100644 --- a/src/tokenizer/states.rs +++ b/src/tokenizer/states.rs @@ -90,4 +90,5 @@ pub enum State { BetweenDoctypePublicAndSystemIdentifiers, BogusDoctype, CdataSection, + Quiescent, } diff --git a/src/tree_builder/interface.rs b/src/tree_builder/interface.rs index 0a872b7d..4b948c35 100644 --- a/src/tree_builder/interface.rs +++ b/src/tree_builder/interface.rs @@ -40,6 +40,15 @@ pub enum NodeOrText { AppendText(String), } +/// Whether to interrupt further parsing of the current input until +/// the next explicit resumption of the tokenizer, or continue without +/// any interruption. +#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] +pub enum NextParserState { + Suspend, + Continue, +} + /// Types which can process tree modifications from the tree builder. pub trait TreeSink { /// `Handle` is a reference to a DOM node. The tree builder requires @@ -108,7 +117,9 @@ pub trait TreeSink { fn mark_script_already_started(&mut self, node: Self::Handle); /// Indicate that a ` not fully implemented"); - self.sink.complete_script(node); + if self.sink.complete_script(node) == NextParserState::Suspend { + self.next_tokenizer_state = Some(Quiescent); + } } self.mode = self.orig_mode.take().unwrap(); Done