summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2022-02-19 17:34:55 +0330
committerAndreas Kling <kling@serenityos.org>2022-02-20 11:53:59 +0100
commit4be7239626c02709a34a825be44a5a73a2c1ea77 (patch)
tree4f676af2654edead6bc0985f99ce8d839d6d3a97
parent627bbee05580163b869d45fa67df0a4529eb3ca5 (diff)
downloadserenity-4be7239626c02709a34a825be44a5a73a2c1ea77.zip
LibRegex: Make parse_disjunction() consume all disjunctions in one frame
This helps us not blow up when too many disjunctions are chained togther in the regex we're parsing. Fixes #12615.
-rw-r--r--Tests/LibRegex/Regex.cpp7
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.cpp47
2 files changed, 33 insertions, 21 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index 2b11be4d37..97aeec48c4 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -498,6 +498,8 @@ TEST_CASE(posix_extended_nested_capture_group)
EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv);
}
+auto parse_test_case_long_disjunction_chain = String::repeated("a|"sv, 10000);
+
TEST_CASE(ECMA262_parse)
{
struct _test {
@@ -506,7 +508,7 @@ TEST_CASE(ECMA262_parse)
regex::ECMAScriptFlags flags {};
};
- constexpr _test tests[] {
+ _test const tests[] {
{ "^hello.$"sv },
{ "^(hello.)$"sv },
{ "^h{0,1}ello.$"sv },
@@ -599,7 +601,8 @@ TEST_CASE(ECMA262_parse)
{ "(?<$$_$$>a)"sv },
{ "(?<ΓΏ>a)"sv },
{ "(?<𝓑𝓻𝓸𝔀𝓷>a)"sv },
- { "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended } // #12373, quantifiable assertions.
+ { "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // #12373, quantifiable assertions.
+ { parse_test_case_long_disjunction_chain.view() }, // A whole lot of disjunctions, should not overflow the stack.
};
for (auto& test : tests) {
diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp
index 60d960a655..530570057c 100644
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@@ -956,28 +956,37 @@ bool ECMA262Parser::parse_pattern(ByteCode& stack, size_t& match_length_minimum,
bool ECMA262Parser::parse_disjunction(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
{
- ByteCode left_alternative_stack;
- size_t left_alternative_min_length = 0;
- auto alt_ok = parse_alternative(left_alternative_stack, left_alternative_min_length, unicode, named);
- if (!alt_ok)
- return false;
+ size_t total_match_length_minimum = NumericLimits<size_t>::max();
+ Vector<ByteCode> alternatives;
+ do {
+ ByteCode alternative_stack;
+ size_t alternative_minimum_length = 0;
+ auto alt_ok = parse_alternative(alternative_stack, alternative_minimum_length, unicode, named);
+ if (!alt_ok)
+ return false;
- if (!match(TokenType::Pipe)) {
- stack.extend(left_alternative_stack);
- match_length_minimum = left_alternative_min_length;
- return alt_ok;
- }
+ alternatives.append(move(alternative_stack));
+ total_match_length_minimum = min(alternative_minimum_length, total_match_length_minimum);
- consume();
- ByteCode right_alternative_stack;
- size_t right_alternative_min_length = 0;
- auto continuation_ok = parse_disjunction(right_alternative_stack, right_alternative_min_length, unicode, named);
- if (!continuation_ok)
- return false;
+ if (!match(TokenType::Pipe))
+ break;
+ consume();
+ } while (true);
+
+ Optional<ByteCode> alternative_stack {};
+ for (auto& alternative : alternatives) {
+ if (alternative_stack.has_value()) {
+ ByteCode target_stack;
+ target_stack.insert_bytecode_alternation(alternative_stack.release_value(), move(alternative));
+ alternative_stack = move(target_stack);
+ } else {
+ alternative_stack = move(alternative);
+ }
+ }
- stack.insert_bytecode_alternation(move(left_alternative_stack), move(right_alternative_stack));
- match_length_minimum = min(left_alternative_min_length, right_alternative_min_length);
- return continuation_ok;
+ stack.extend(alternative_stack.release_value());
+ match_length_minimum = total_match_length_minimum;
+ return true;
}
bool ECMA262Parser::parse_alternative(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)