diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2021-07-31 16:03:45 +0430 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2021-08-02 17:22:50 +0430 |
commit | 5f342e4fa967e2cec84f958f3266ff6d51d591ff (patch) | |
tree | 8dffdd17d60b555da7a7ee864d754d8a713b5b0b | |
parent | a08870cc192ebf32583df61090a87af65480a82c (diff) | |
download | serenity-5f342e4fa967e2cec84f958f3266ff6d51d591ff.zip |
LibRegex: Make Fork{Jump,Stay} non-recursive
This makes very fork-heavy expressions (like `(aa)*`) not run out of
stack space when matching very long strings.
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatch.h | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatcher.cpp | 77 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatcher.h | 3 |
3 files changed, 29 insertions, 52 deletions
diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index b6cd0e0793..28e68b6daf 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -469,6 +469,7 @@ struct MatchState { Vector<Match> matches; Vector<Vector<Match>> capture_group_matches; Vector<HashMap<String, Match>> named_capture_group_matches; + size_t recursion_level { 0 }; }; struct MatchOutput { diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp index c980dd726c..b24ecf6b96 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.cpp +++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp @@ -7,11 +7,12 @@ #include <AK/Debug.h> #include <AK/String.h> #include <AK/StringBuilder.h> +#include <LibRegex/RegexMatcher.h> +#include <LibRegex/RegexParser.h> + #if REGEX_DEBUG # include <LibRegex/RegexDebug.h> #endif -#include <LibRegex/RegexMatcher.h> -#include <LibRegex/RegexParser.h> namespace regex { @@ -210,10 +211,10 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional state.string_position = view_index; state.instruction_position = 0; - auto success = execute(input, state, temp_output, 0); + auto success = execute(input, state, temp_output); // This success is acceptable only if it doesn't read anything from the input (input length is 0). if (state.string_position <= view_index) { - if (success.value()) { + if (success.has_value() && success.value()) { output = move(temp_output); if (!match_count) { // Nothing was *actually* matched, so append an empty match. @@ -241,7 +242,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional state.string_position = view_index; state.instruction_position = 0; - auto success = execute(input, state, output, 0); + auto success = execute(input, state, output); if (!success.has_value()) return { false, 0, {}, {}, {}, output.operations }; @@ -334,14 +335,11 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional } template<class Parser> -Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const +Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output) const { - if (recursion_level > c_max_recursion) - return false; + state.recursion_level = 0; - Vector<MatchState, 64> reversed_fork_low_prio_states; - MatchState fork_high_prio_state; - Optional<bool> success; + Vector<MatchState, 64> states_to_try_next; auto& bytecode = m_pattern->parser_result.bytecode; @@ -350,7 +348,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta auto& opcode = bytecode.get_opcode(state); #if REGEX_DEBUG - s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false); + s_regex_dbg.print_opcode("VM", opcode, state, state.recursion_level, false); #endif ExecutionResult result; @@ -369,33 +367,33 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta switch (result) { case ExecutionResult::Fork_PrioLow: - reversed_fork_low_prio_states.append(state); + states_to_try_next.append(state); + states_to_try_next.last().instruction_position = state.fork_at_position; continue; case ExecutionResult::Fork_PrioHigh: - fork_high_prio_state = state; - fork_high_prio_state.instruction_position = fork_high_prio_state.fork_at_position; - success = execute(input, fork_high_prio_state, output, ++recursion_level); - if (!success.has_value()) - return {}; - - if (success.value()) { - state = fork_high_prio_state; - return true; - } - + states_to_try_next.append(state); + state.instruction_position = state.fork_at_position; + ++state.recursion_level; continue; case ExecutionResult::Continue: continue; case ExecutionResult::Succeeded: return true; case ExecutionResult::Failed: + if (!states_to_try_next.is_empty()) { + state = states_to_try_next.take_last(); + continue; + } return false; case ExecutionResult::Failed_ExecuteLowPrioForks: { - Vector<MatchState> fork_low_prio_states; - fork_low_prio_states.ensure_capacity(reversed_fork_low_prio_states.size()); - for (ssize_t i = reversed_fork_low_prio_states.size() - 1; i >= 0; i--) - fork_low_prio_states.unchecked_append(move(reversed_fork_low_prio_states[i])); - return execute_low_prio_forks(input, state, output, move(fork_low_prio_states), recursion_level + 1); + if (states_to_try_next.is_empty()) { + if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) + return {}; + return false; + } + state = states_to_try_next.take_last(); + ++state.recursion_level; + continue; } } } @@ -403,27 +401,6 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta VERIFY_NOT_REACHED(); } -template<class Parser> -ALWAYS_INLINE Optional<bool> Matcher<Parser>::execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const -{ - for (auto& state : states) { - - state.instruction_position = state.fork_at_position; - dbgln_if(REGEX_DEBUG, "Forkstay... ip = {}, sp = {}", state.instruction_position, state.string_position); - auto success = execute(input, state, output, recursion_level); - if (!success.has_value()) - return {}; - if (success.value()) { - dbgln_if(REGEX_DEBUG, "Forkstay succeeded... ip = {}, sp = {}", state.instruction_position, state.string_position); - original_state = state; - return true; - } - } - - original_state.string_position = 0; - return false; -} - template class Matcher<PosixBasicParser>; template class Regex<PosixBasicParser>; diff --git a/Userland/Libraries/LibRegex/RegexMatcher.h b/Userland/Libraries/LibRegex/RegexMatcher.h index 1229bacc94..4524fa4dc0 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.h +++ b/Userland/Libraries/LibRegex/RegexMatcher.h @@ -65,8 +65,7 @@ public: } private: - Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const; - ALWAYS_INLINE Optional<bool> execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const; + Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output) const; Regex<Parser> const* m_pattern; typename ParserTraits<Parser>::OptionsType const m_regex_options; |