From adb5f7e485f3870be6dba550daae2f630a0804d2 Mon Sep 17 00:00:00 2001 From: Luke Wilde Date: Sun, 29 May 2022 22:25:43 +0100 Subject: LibXML+Tests: Consume `>` in the character data ending `]]>` and test it For example, with this input: ```xml ]]> ``` After seeing ``, the parser will start parsing the content of the element. The content parser will then parse any character data it sees. The character parser would see the first two `]]` and consume them. Then, it would see the `>` and set the state machine to say we have seen this, but it did _not_ consume it and would instead tell GenericLexer that it should stop consuming characters. Therefore, we only consumed 2 characters. Then, it would see that we are in the state where we've seen the full `]]>` and try to take off three characters from the end of the consumed input when we only have 2 characters, causing an assertion failure as we are asking to take off more characters than there really is. --- Userland/Libraries/LibXML/Parser/Parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Userland/Libraries/LibXML') diff --git a/Userland/Libraries/LibXML/Parser/Parser.cpp b/Userland/Libraries/LibXML/Parser/Parser.cpp index 0940d76fab..d32ca51c75 100644 --- a/Userland/Libraries/LibXML/Parser/Parser.cpp +++ b/Userland/Libraries/LibXML/Parser/Parser.cpp @@ -891,7 +891,7 @@ ErrorOr Parser::parse_char_data() // CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) auto cend_state = 0; // 1: ], 2: ], 3: > auto text = m_lexer.consume_while([&](auto ch) { - if (ch == '<' || ch == '&') + if (ch == '<' || ch == '&' || cend_state == 3) return false; switch (cend_state) { case 0: @@ -904,7 +904,7 @@ ErrorOr Parser::parse_char_data() case 2: if (ch == '>') { cend_state++; - return false; + return true; } cend_state = 0; return true; -- cgit v1.2.3