summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibRegex
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2022-02-19 17:34:55 +0330
committerAndreas Kling <kling@serenityos.org>2022-02-20 11:53:59 +0100
commit4be7239626c02709a34a825be44a5a73a2c1ea77 (patch)
tree4f676af2654edead6bc0985f99ce8d839d6d3a97 /Userland/Libraries/LibRegex
parent627bbee05580163b869d45fa67df0a4529eb3ca5 (diff)
downloadserenity-4be7239626c02709a34a825be44a5a73a2c1ea77.zip
LibRegex: Make parse_disjunction() consume all disjunctions in one frame
This helps us not blow up when too many disjunctions are chained togther in the regex we're parsing. Fixes #12615.
Diffstat (limited to 'Userland/Libraries/LibRegex')
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.cpp47
1 files changed, 28 insertions, 19 deletions
diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp
index 60d960a655..530570057c 100644
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@@ -956,28 +956,37 @@ bool ECMA262Parser::parse_pattern(ByteCode& stack, size_t& match_length_minimum,
bool ECMA262Parser::parse_disjunction(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
{
- ByteCode left_alternative_stack;
- size_t left_alternative_min_length = 0;
- auto alt_ok = parse_alternative(left_alternative_stack, left_alternative_min_length, unicode, named);
- if (!alt_ok)
- return false;
+ size_t total_match_length_minimum = NumericLimits<size_t>::max();
+ Vector<ByteCode> alternatives;
+ do {
+ ByteCode alternative_stack;
+ size_t alternative_minimum_length = 0;
+ auto alt_ok = parse_alternative(alternative_stack, alternative_minimum_length, unicode, named);
+ if (!alt_ok)
+ return false;
- if (!match(TokenType::Pipe)) {
- stack.extend(left_alternative_stack);
- match_length_minimum = left_alternative_min_length;
- return alt_ok;
- }
+ alternatives.append(move(alternative_stack));
+ total_match_length_minimum = min(alternative_minimum_length, total_match_length_minimum);
- consume();
- ByteCode right_alternative_stack;
- size_t right_alternative_min_length = 0;
- auto continuation_ok = parse_disjunction(right_alternative_stack, right_alternative_min_length, unicode, named);
- if (!continuation_ok)
- return false;
+ if (!match(TokenType::Pipe))
+ break;
+ consume();
+ } while (true);
+
+ Optional<ByteCode> alternative_stack {};
+ for (auto& alternative : alternatives) {
+ if (alternative_stack.has_value()) {
+ ByteCode target_stack;
+ target_stack.insert_bytecode_alternation(alternative_stack.release_value(), move(alternative));
+ alternative_stack = move(target_stack);
+ } else {
+ alternative_stack = move(alternative);
+ }
+ }
- stack.insert_bytecode_alternation(move(left_alternative_stack), move(right_alternative_stack));
- match_length_minimum = min(left_alternative_min_length, right_alternative_min_length);
- return continuation_ok;
+ stack.extend(alternative_stack.release_value());
+ match_length_minimum = total_match_length_minimum;
+ return true;
}
bool ECMA262Parser::parse_alternative(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)