diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2022-02-19 17:34:55 +0330 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2022-02-20 11:53:59 +0100 |
commit | 4be7239626c02709a34a825be44a5a73a2c1ea77 (patch) | |
tree | 4f676af2654edead6bc0985f99ce8d839d6d3a97 | |
parent | 627bbee05580163b869d45fa67df0a4529eb3ca5 (diff) | |
download | serenity-4be7239626c02709a34a825be44a5a73a2c1ea77.zip |
LibRegex: Make parse_disjunction() consume all disjunctions in one frame
This helps us not blow up when too many disjunctions are chained togther
in the regex we're parsing.
Fixes #12615.
-rw-r--r-- | Tests/LibRegex/Regex.cpp | 7 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexParser.cpp | 47 |
2 files changed, 33 insertions, 21 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 2b11be4d37..97aeec48c4 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -498,6 +498,8 @@ TEST_CASE(posix_extended_nested_capture_group) EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv); } +auto parse_test_case_long_disjunction_chain = String::repeated("a|"sv, 10000); + TEST_CASE(ECMA262_parse) { struct _test { @@ -506,7 +508,7 @@ TEST_CASE(ECMA262_parse) regex::ECMAScriptFlags flags {}; }; - constexpr _test tests[] { + _test const tests[] { { "^hello.$"sv }, { "^(hello.)$"sv }, { "^h{0,1}ello.$"sv }, @@ -599,7 +601,8 @@ TEST_CASE(ECMA262_parse) { "(?<$$_$$>a)"sv }, { "(?<ΓΏ>a)"sv }, { "(?<ππ»πΈππ·>a)"sv }, - { "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended } // #12373, quantifiable assertions. + { "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // #12373, quantifiable assertions. + { parse_test_case_long_disjunction_chain.view() }, // A whole lot of disjunctions, should not overflow the stack. }; for (auto& test : tests) { diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index 60d960a655..530570057c 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -956,28 +956,37 @@ bool ECMA262Parser::parse_pattern(ByteCode& stack, size_t& match_length_minimum, bool ECMA262Parser::parse_disjunction(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named) { - ByteCode left_alternative_stack; - size_t left_alternative_min_length = 0; - auto alt_ok = parse_alternative(left_alternative_stack, left_alternative_min_length, unicode, named); - if (!alt_ok) - return false; + size_t total_match_length_minimum = NumericLimits<size_t>::max(); + Vector<ByteCode> alternatives; + do { + ByteCode alternative_stack; + size_t alternative_minimum_length = 0; + auto alt_ok = parse_alternative(alternative_stack, alternative_minimum_length, unicode, named); + if (!alt_ok) + return false; - if (!match(TokenType::Pipe)) { - stack.extend(left_alternative_stack); - match_length_minimum = left_alternative_min_length; - return alt_ok; - } + alternatives.append(move(alternative_stack)); + total_match_length_minimum = min(alternative_minimum_length, total_match_length_minimum); - consume(); - ByteCode right_alternative_stack; - size_t right_alternative_min_length = 0; - auto continuation_ok = parse_disjunction(right_alternative_stack, right_alternative_min_length, unicode, named); - if (!continuation_ok) - return false; + if (!match(TokenType::Pipe)) + break; + consume(); + } while (true); + + Optional<ByteCode> alternative_stack {}; + for (auto& alternative : alternatives) { + if (alternative_stack.has_value()) { + ByteCode target_stack; + target_stack.insert_bytecode_alternation(alternative_stack.release_value(), move(alternative)); + alternative_stack = move(target_stack); + } else { + alternative_stack = move(alternative); + } + } - stack.insert_bytecode_alternation(move(left_alternative_stack), move(right_alternative_stack)); - match_length_minimum = min(left_alternative_min_length, right_alternative_min_length); - return continuation_ok; + stack.extend(alternative_stack.release_value()); + match_length_minimum = total_match_length_minimum; + return true; } bool ECMA262Parser::parse_alternative(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named) |