diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2022-02-19 17:34:55 +0330 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2022-02-20 11:53:59 +0100 |
commit | 4be7239626c02709a34a825be44a5a73a2c1ea77 (patch) | |
tree | 4f676af2654edead6bc0985f99ce8d839d6d3a97 /Userland | |
parent | 627bbee05580163b869d45fa67df0a4529eb3ca5 (diff) | |
download | serenity-4be7239626c02709a34a825be44a5a73a2c1ea77.zip |
LibRegex: Make parse_disjunction() consume all disjunctions in one frame
This helps us not blow up when too many disjunctions are chained togther
in the regex we're parsing.
Fixes #12615.
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibRegex/RegexParser.cpp | 47 |
1 files changed, 28 insertions, 19 deletions
diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index 60d960a655..530570057c 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -956,28 +956,37 @@ bool ECMA262Parser::parse_pattern(ByteCode& stack, size_t& match_length_minimum, bool ECMA262Parser::parse_disjunction(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named) { - ByteCode left_alternative_stack; - size_t left_alternative_min_length = 0; - auto alt_ok = parse_alternative(left_alternative_stack, left_alternative_min_length, unicode, named); - if (!alt_ok) - return false; + size_t total_match_length_minimum = NumericLimits<size_t>::max(); + Vector<ByteCode> alternatives; + do { + ByteCode alternative_stack; + size_t alternative_minimum_length = 0; + auto alt_ok = parse_alternative(alternative_stack, alternative_minimum_length, unicode, named); + if (!alt_ok) + return false; - if (!match(TokenType::Pipe)) { - stack.extend(left_alternative_stack); - match_length_minimum = left_alternative_min_length; - return alt_ok; - } + alternatives.append(move(alternative_stack)); + total_match_length_minimum = min(alternative_minimum_length, total_match_length_minimum); - consume(); - ByteCode right_alternative_stack; - size_t right_alternative_min_length = 0; - auto continuation_ok = parse_disjunction(right_alternative_stack, right_alternative_min_length, unicode, named); - if (!continuation_ok) - return false; + if (!match(TokenType::Pipe)) + break; + consume(); + } while (true); + + Optional<ByteCode> alternative_stack {}; + for (auto& alternative : alternatives) { + if (alternative_stack.has_value()) { + ByteCode target_stack; + target_stack.insert_bytecode_alternation(alternative_stack.release_value(), move(alternative)); + alternative_stack = move(target_stack); + } else { + alternative_stack = move(alternative); + } + } - stack.insert_bytecode_alternation(move(left_alternative_stack), move(right_alternative_stack)); - match_length_minimum = min(left_alternative_min_length, right_alternative_min_length); - return continuation_ok; + stack.extend(alternative_stack.release_value()); + match_length_minimum = total_match_length_minimum; + return true; } bool ECMA262Parser::parse_alternative(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named) |