From e058a7787efc6eb603cf94bf2f3999806a3a6cd9 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sun, 23 Oct 2016 12:58:57 +0200 Subject: [PATCH] Properly encode the states for CDATA --- Cargo.toml | 2 +- src/tokenizer/mod.rs | 37 ++++++++++++++++++++++++++----------- src/tokenizer/states.rs | 2 ++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c4384911..5b9429bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "html5ever" -version = "0.6.1" +version = "0.7.0" authors = [ "The html5ever Project Developers" ] license = "MIT / Apache-2.0" repository = "https://github.com/servo/html5ever" diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index ea23e490..18a90ae6 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -1167,16 +1167,25 @@ impl Tokenizer { }, //§ cdata-section-state - states::CdataSection => loop { - if eat_exact!(self, "]]>") { - go!(self: emit_temp; to Data); - } else { - match get_char!(self) { - '\0' => go!(self: emit_temp; emit '\0'), - c => go!(self: push_temp c) - } - } - } + states::CdataSection => loop { match get_char!(self) { + ']' => go!(self: to CdataSectionBracket), + '\0' => go!(self: emit_temp; emit '\0'), + c => go!(self: push_temp c), + }}, + + //§ cdata-section-bracket + states::CdataSectionBracket => match get_char!(self) { + ']' => go!(self: to CdataSectionEnd), + _ => go!(self: push_temp ']'; reconsume CdataSection), + }, + + //§ cdata-section-end + states::CdataSectionEnd => loop { match get_char!(self) { + ']' => go!(self: push_temp ']'), + '>' => go!(self: emit_temp; to Data), + _ => go!(self: push_temp ']'; push_temp ']'; reconsume CdataSection), + }}, + //§ END } } @@ -1331,7 +1340,13 @@ impl Tokenizer { => go!(self: error; to BogusComment), states::CdataSection - => go!(self: emit_temp; to Data), + => go!(self: emit_temp; error_eof; to Data), + + states::CdataSectionBracket + => go!(self: push_temp ']'; to CdataSection), + + states::CdataSectionEnd + => go!(self: push_temp ']'; push_temp ']'; to CdataSection), } } } diff --git a/src/tokenizer/states.rs b/src/tokenizer/states.rs index 1f04075c..4b0da1a0 100644 --- a/src/tokenizer/states.rs +++ b/src/tokenizer/states.rs @@ -88,5 +88,7 @@ pub enum State { BetweenDoctypePublicAndSystemIdentifiers, BogusDoctype, CdataSection, + CdataSectionBracket, + CdataSectionEnd, Quiescent, }