From 4be7239626c02709a34a825be44a5a73a2c1ea77 Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Sat, 19 Feb 2022 17:34:55 +0330 Subject: LibRegex: Make parse_disjunction() consume all disjunctions in one frame This helps us not blow up when too many disjunctions are chained togther in the regex we're parsing. Fixes #12615. --- Userland/Libraries/LibRegex/RegexParser.cpp | 47 +++++++++++++++++------------ 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'Userland/Libraries/LibRegex') diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index 60d960a655..530570057c 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -956,28 +956,37 @@ bool ECMA262Parser::parse_pattern(ByteCode& stack, size_t& match_length_minimum, bool ECMA262Parser::parse_disjunction(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named) { - ByteCode left_alternative_stack; - size_t left_alternative_min_length = 0; - auto alt_ok = parse_alternative(left_alternative_stack, left_alternative_min_length, unicode, named); - if (!alt_ok) - return false; + size_t total_match_length_minimum = NumericLimits::max(); + Vector alternatives; + do { + ByteCode alternative_stack; + size_t alternative_minimum_length = 0; + auto alt_ok = parse_alternative(alternative_stack, alternative_minimum_length, unicode, named); + if (!alt_ok) + return false; - if (!match(TokenType::Pipe)) { - stack.extend(left_alternative_stack); - match_length_minimum = left_alternative_min_length; - return alt_ok; - } + alternatives.append(move(alternative_stack)); + total_match_length_minimum = min(alternative_minimum_length, total_match_length_minimum); - consume(); - ByteCode right_alternative_stack; - size_t right_alternative_min_length = 0; - auto continuation_ok = parse_disjunction(right_alternative_stack, right_alternative_min_length, unicode, named); - if (!continuation_ok) - return false; + if (!match(TokenType::Pipe)) + break; + consume(); + } while (true); + + Optional alternative_stack {}; + for (auto& alternative : alternatives) { + if (alternative_stack.has_value()) { + ByteCode target_stack; + target_stack.insert_bytecode_alternation(alternative_stack.release_value(), move(alternative)); + alternative_stack = move(target_stack); + } else { + alternative_stack = move(alternative); + } + } - stack.insert_bytecode_alternation(move(left_alternative_stack), move(right_alternative_stack)); - match_length_minimum = min(left_alternative_min_length, right_alternative_min_length); - return continuation_ok; + stack.extend(alternative_stack.release_value()); + match_length_minimum = total_match_length_minimum; + return true; } bool ECMA262Parser::parse_alternative(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named) -- cgit v1.2.3