summaryrefslogtreecommitdiff
path: root/Tests
diff options
context:
space:
mode:
authorLuke Wilde <lukew@serenityos.org>2022-05-29 22:25:43 +0100
committerLinus Groh <mail@linusgroh.de>2022-05-30 00:16:17 +0100
commitadb5f7e485f3870be6dba550daae2f630a0804d2 (patch)
tree99f97307c4bdc8e0eb4982eb01a98beb3c7a6032 /Tests
parent9a97ffe8837727ce339bada865be1ee08d0d7859 (diff)
downloadserenity-adb5f7e485f3870be6dba550daae2f630a0804d2.zip
LibXML+Tests: Consume `>` in the character data ending `]]>` and test it
For example, with this input: ```xml <C>]]> ``` After seeing `<C>`, the parser will start parsing the content of the element. The content parser will then parse any character data it sees. The character parser would see the first two `]]` and consume them. Then, it would see the `>` and set the state machine to say we have seen this, but it did _not_ consume it and would instead tell GenericLexer that it should stop consuming characters. Therefore, we only consumed 2 characters. Then, it would see that we are in the state where we've seen the full `]]>` and try to take off three characters from the end of the consumed input when we only have 2 characters, causing an assertion failure as we are asking to take off more characters than there really is.
Diffstat (limited to 'Tests')
-rw-r--r--Tests/CMakeLists.txt1
-rw-r--r--Tests/LibXML/CMakeLists.txt7
-rw-r--r--Tests/LibXML/TestParser.cpp22
3 files changed, 30 insertions, 0 deletions
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index 8bd6281bc3..d0180ef086 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -23,6 +23,7 @@ add_subdirectory(LibTimeZone)
add_subdirectory(LibUnicode)
add_subdirectory(LibWasm)
add_subdirectory(LibWeb)
+add_subdirectory(LibXML)
if (${SERENITY_ARCH} STREQUAL "i686")
add_subdirectory(UserspaceEmulator)
endif()
diff --git a/Tests/LibXML/CMakeLists.txt b/Tests/LibXML/CMakeLists.txt
new file mode 100644
index 0000000000..107f39bc11
--- /dev/null
+++ b/Tests/LibXML/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(TEST_SOURCES
+ TestParser.cpp
+)
+
+foreach(source IN LISTS TEST_SOURCES)
+ serenity_test("${source}" LibXML LIBS LibXML)
+endforeach()
diff --git a/Tests/LibXML/TestParser.cpp b/Tests/LibXML/TestParser.cpp
new file mode 100644
index 0000000000..54a9621a22
--- /dev/null
+++ b/Tests/LibXML/TestParser.cpp
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022, Luke Wilde <lukew@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibTest/TestCase.h>
+#include <LibXML/Parser/Parser.h>
+
+TEST_CASE(char_data_ending)
+{
+ EXPECT_NO_CRASH("parsing character data ending by itself should not crash", [] {
+ // After seeing `<C>`, the parser will start parsing the content of the element. The content parser will then parse any character data it sees.
+ // The character parser would see the first two `]]` and consume them. Then, it would see the `>` and set the state machine to say we have seen this,
+ // but it did _not_ consume it and would instead tell GenericLexer that it should stop consuming characters. Therefore, we only consumed 2 characters.
+ // Then, it would see that we are in the state where we've seen the full `]]>` and try to take off three characters from the end of the consumed
+ // input when we only have 2 characters, causing an assertion failure as we are asking to take off more characters than there really is.
+ XML::Parser parser("<C>]]>");
+ (void)parser.parse();
+ return Test::Crash::Failure::DidNotCrash;
+ });
+}