summaryrefslogtreecommitdiff
path: root/Tests/LibJS
diff options
context:
space:
mode:
authordavidot <davidot@serenityos.org>2021-10-03 13:10:35 +0200
committerAndreas Kling <kling@serenityos.org>2021-10-03 17:42:05 +0200
commitac2c3a73b136787d5dad0e00a43c1fcbed8f1ff3 (patch)
tree0da429bab9e03696b5d4f22db1a94d9732e44b99 /Tests/LibJS
parent962298b04005f59e34d93b6db088e98efc8a3ba1 (diff)
downloadserenity-ac2c3a73b136787d5dad0e00a43c1fcbed8f1ff3.zip
LibJS: Add a specific test for invalid unicode characters in the lexer
Also fixes that it tried to make substrings past the end of the source if we overran the source length.
Diffstat (limited to 'Tests/LibJS')
-rw-r--r--Tests/LibJS/CMakeLists.txt3
-rw-r--r--Tests/LibJS/test-invalid-unicode-js.cpp76
2 files changed, 79 insertions, 0 deletions
diff --git a/Tests/LibJS/CMakeLists.txt b/Tests/LibJS/CMakeLists.txt
index 7bd2772c9f..b800d6ca21 100644
--- a/Tests/LibJS/CMakeLists.txt
+++ b/Tests/LibJS/CMakeLists.txt
@@ -1,2 +1,5 @@
serenity_testjs_test(test-js.cpp test-js)
+
install(TARGETS test-js RUNTIME DESTINATION bin OPTIONAL)
+
+serenity_test(test-invalid-unicode-js.cpp LibJS LIBS LibJS)
diff --git a/Tests/LibJS/test-invalid-unicode-js.cpp b/Tests/LibJS/test-invalid-unicode-js.cpp
new file mode 100644
index 0000000000..9e209f29d5
--- /dev/null
+++ b/Tests/LibJS/test-invalid-unicode-js.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2021, David Tuin <davidot@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibJS/Parser.h>
+#include <LibTest/TestCase.h>
+
+TEST_CASE(invalid_unicode_only)
+{
+ char const* code = "\xEA\xFD";
+ auto lexer = JS::Lexer(code);
+ auto token = lexer.next();
+ EXPECT_EQ(token.type(), JS::TokenType::Invalid);
+
+ // After this we can get as many eof tokens as we like.
+ for (auto i = 0; i < 10; i++) {
+ auto eof_token = lexer.next();
+ EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
+ }
+}
+
+TEST_CASE(long_invalid_unicode)
+{
+ char const* code = "\xF7";
+ auto lexer = JS::Lexer(code);
+ auto token = lexer.next();
+ EXPECT_EQ(token.type(), JS::TokenType::Invalid);
+
+ // After this we can get as many eof tokens as we like.
+ for (auto i = 0; i < 10; i++) {
+ auto eof_token = lexer.next();
+ EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
+ }
+}
+
+TEST_CASE(invalid_unicode_and_valid_code)
+{
+ char const* code = "\xEA\xFDthrow 1;";
+ auto lexer = JS::Lexer(code);
+ auto invalid_token = lexer.next();
+ EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
+ // 0xEA is the start of a three character unicode code point thus it consumes the 't'.
+ auto token_after = lexer.next();
+ EXPECT_EQ(token_after.value(), "hrow");
+}
+
+TEST_CASE(long_invalid_unicode_and_valid_code)
+{
+ char const* code = "\xF7throw 1;";
+ auto lexer = JS::Lexer(code);
+ auto invalid_token = lexer.next();
+ EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
+ // 0xF7 is the start of a four character unicode code point thus it consumes 'thr'.
+ auto token_after = lexer.next();
+ EXPECT_EQ(token_after.value(), "ow");
+}
+
+TEST_CASE(invalid_unicode_after_valid_code_and_before_eof)
+{
+ char const* code = "let \xEA\xFD;";
+ auto lexer = JS::Lexer(code);
+ auto let_token = lexer.next();
+ EXPECT_EQ(let_token.type(), JS::TokenType::Let);
+ auto invalid_token = lexer.next();
+ EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
+ // It should still get the valid trivia in front.
+ EXPECT_EQ(invalid_token.trivia(), " ");
+
+ // After this we can get as many eof tokens as we like.
+ for (auto i = 0; i < 10; i++) {
+ auto eof_token = lexer.next();
+ EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
+ }
+}