diff options
-rw-r--r-- | Userland/Libraries/LibRegex/RegexByteCode.cpp | 32 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexByteCode.h | 34 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatch.h | 7 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatcher.cpp | 55 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatcher.h | 2 |
5 files changed, 60 insertions, 70 deletions
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index 3d9b1d3882..146a0bbd1d 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -202,7 +202,7 @@ OpCode& ByteCode::get_opcode(MatchState& state) const return opcode; } -ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, MatchState& state) const { if (state.string_position > input.view.length() || state.instruction_position >= m_bytecode->size()) return ExecutionResult::Succeeded; @@ -210,20 +210,20 @@ ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, Matc return ExecutionResult::Failed; } -ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state) const { save_string_position(input, state); return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_Restore::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_Restore::execute(MatchInput const& input, MatchState& state) const { if (!restore_string_position(input, state)) return ExecutionResult::Failed; return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const&, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const&, MatchState& state) const { if (count() > state.string_position) return ExecutionResult::Failed_ExecuteLowPrioForks; @@ -232,7 +232,7 @@ ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const&, MatchSta return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState&, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState&) const { VERIFY(count() > 0); @@ -240,25 +240,25 @@ ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, return ExecutionResult::Failed_ExecuteLowPrioForks; } -ALWAYS_INLINE ExecutionResult OpCode_Jump::execute(MatchInput const&, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_Jump::execute(MatchInput const&, MatchState& state) const { state.instruction_position += offset(); return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state) const { state.fork_at_position = state.instruction_position + size() + offset(); return ExecutionResult::Fork_PrioHigh; } -ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state) const { state.fork_at_position = state.instruction_position + size() + offset(); return ExecutionResult::Fork_PrioLow; } -ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input, MatchState& state) const { if (0 == state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine)) return ExecutionResult::Failed_ExecuteLowPrioForks; @@ -271,7 +271,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input return ExecutionResult::Failed_ExecuteLowPrioForks; } -ALWAYS_INLINE ExecutionResult OpCode_CheckBoundary::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_CheckBoundary::execute(MatchInput const& input, MatchState& state) const { auto isword = [](auto ch) { return is_ascii_alphanumeric(ch) || ch == '_'; }; auto is_word_boundary = [&] { @@ -305,7 +305,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBoundary::execute(MatchInput const& in VERIFY_NOT_REACHED(); } -ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input, MatchState& state) const { if (state.string_position == input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine)) return ExecutionResult::Failed_ExecuteLowPrioForks; @@ -317,7 +317,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input, return ExecutionResult::Failed_ExecuteLowPrioForks; } -ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const& input, MatchState& state) const { if (input.match_index < state.capture_group_matches.size()) { auto& group = state.capture_group_matches[input.match_index]; @@ -327,7 +327,7 @@ ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput const& input, MatchState& state) const { if (input.match_index >= state.capture_group_matches.size()) { state.capture_group_matches.ensure_capacity(input.match_index); @@ -347,7 +347,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput co return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput const& input, MatchState& state) const { auto& match = state.capture_group_matches.at(input.match_index).at(id()); auto start_position = match.left_column; @@ -372,7 +372,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state) const { auto& match = state.capture_group_matches.at(input.match_index).at(id()); auto start_position = match.left_column; @@ -397,7 +397,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, MatchState& state, MatchOutput&) const +ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, MatchState& state) const { bool inverse { false }; bool temporary_inverse { false }; diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h index ab305785e2..475ce61bd6 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.h +++ b/Userland/Libraries/LibRegex/RegexByteCode.h @@ -471,7 +471,7 @@ public: virtual OpCodeId opcode_id() const = 0; virtual size_t size() const = 0; - virtual ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const = 0; + virtual ExecutionResult execute(MatchInput const& input, MatchState& state) const = 0; ALWAYS_INLINE ByteCodeValueType argument(size_t offset) const { @@ -508,7 +508,7 @@ protected: class OpCode_Exit final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Exit; } ALWAYS_INLINE size_t size() const override { return 1; } String const arguments_string() const override { return ""; } @@ -516,7 +516,7 @@ public: class OpCode_FailForks final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE size_t count() const { return argument(0); } @@ -525,7 +525,7 @@ public: class OpCode_Save final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Save; } ALWAYS_INLINE size_t size() const override { return 1; } String const arguments_string() const override { return ""; } @@ -533,7 +533,7 @@ public: class OpCode_Restore final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Restore; } ALWAYS_INLINE size_t size() const override { return 1; } String const arguments_string() const override { return ""; } @@ -541,7 +541,7 @@ public: class OpCode_GoBack final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::GoBack; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE size_t count() const { return argument(0); } @@ -550,7 +550,7 @@ public: class OpCode_Jump final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Jump; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE ssize_t offset() const { return argument(0); } @@ -562,7 +562,7 @@ public: class OpCode_ForkJump final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkJump; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE ssize_t offset() const { return argument(0); } @@ -574,7 +574,7 @@ public: class OpCode_ForkStay final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkStay; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE ssize_t offset() const { return argument(0); } @@ -586,7 +586,7 @@ public: class OpCode_CheckBegin final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBegin; } ALWAYS_INLINE size_t size() const override { return 1; } String const arguments_string() const override { return ""; } @@ -594,7 +594,7 @@ public: class OpCode_CheckEnd final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckEnd; } ALWAYS_INLINE size_t size() const override { return 1; } String const arguments_string() const override { return ""; } @@ -602,7 +602,7 @@ public: class OpCode_CheckBoundary final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBoundary; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE size_t arguments_count() const { return 1; } @@ -612,7 +612,7 @@ public: class OpCode_ClearCaptureGroup final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearCaptureGroup; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE size_t id() const { return argument(0); } @@ -621,7 +621,7 @@ public: class OpCode_SaveLeftCaptureGroup final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftCaptureGroup; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE size_t id() const { return argument(0); } @@ -630,7 +630,7 @@ public: class OpCode_SaveRightCaptureGroup final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightCaptureGroup; } ALWAYS_INLINE size_t size() const override { return 2; } ALWAYS_INLINE size_t id() const { return argument(0); } @@ -639,7 +639,7 @@ public: class OpCode_SaveRightNamedCaptureGroup final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; } ALWAYS_INLINE size_t size() const override { return 4; } ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; } @@ -653,7 +653,7 @@ public: class OpCode_Compare final : public OpCode { public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; + ExecutionResult execute(MatchInput const& input, MatchState& state) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Compare; } ALWAYS_INLINE size_t size() const override { return arguments_size() + 3; } ALWAYS_INLINE size_t arguments_count() const { return argument(0); } diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index 98c324b359..dc0b7bb52a 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -507,13 +507,6 @@ struct MatchState { Vector<Vector<Match>> capture_group_matches; }; -struct MatchOutput { - size_t operations; - Vector<Match> matches; - Vector<Vector<Match>> capture_group_matches; - Vector<HashMap<String, Match>> named_capture_group_matches; -}; - } using regex::RegexStringView; diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp index 0d41c9af42..4fed9ec691 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.cpp +++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp @@ -121,11 +121,10 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona MatchInput input; MatchState state; - MatchOutput output; + size_t operations = 0; input.regex_options = m_regex_options | regex_options.value_or({}).value(); input.start_offset = m_pattern->start_offset; - output.operations = 0; size_t lines_to_skip = 0; bool unicode = input.regex_options.has_flag_set(AllFlags::Unicode); @@ -199,7 +198,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona // Run the code until it tries to consume something. // This allows non-consuming code to run on empty strings, for instance // e.g. "Exit" - MatchOutput temp_output { output }; + size_t temp_operations = operations; input.column = match_count; input.match_index = match_count; @@ -208,11 +207,11 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona state.string_position_in_code_units = view_index; state.instruction_position = 0; - auto success = execute(input, state, temp_output); + auto success = execute(input, state, temp_operations); // This success is acceptable only if it doesn't read anything from the input (input length is 0). if (state.string_position <= view_index) { if (success.has_value() && success.value()) { - output = move(temp_output); + operations = temp_operations; if (!match_count) { // Nothing was *actually* matched, so append an empty match. append_match(input, state, view_index); @@ -240,9 +239,9 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona state.string_position_in_code_units = view_index; state.instruction_position = 0; - auto success = execute(input, state, output); + auto success = execute(input, state, operations); if (!success.has_value()) - return { false, 0, {}, {}, {}, output.operations }; + return { false, 0, {}, {}, {}, operations }; if (success.value()) { succeeded = true; @@ -275,7 +274,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona break; } else if (state.string_position < view_length) { - return { false, 0, {}, {}, {}, output.operations }; + return { false, 0, {}, {}, {}, operations }; } append_match(input, state, view_index); @@ -296,33 +295,31 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona break; } - MatchOutput output_copy; + RegexResult result { + match_count != 0, + match_count, + move(state.matches), + move(state.capture_group_matches), + operations, + m_pattern->parser_result.capture_groups_count, + m_pattern->parser_result.named_capture_groups_count, + }; + if (match_count) { - output_copy.capture_group_matches = state.capture_group_matches; // Make sure there are as many capture matches as there are actual matches. - if (output_copy.capture_group_matches.size() < match_count) - output_copy.capture_group_matches.resize(match_count); - for (auto& matches : output_copy.capture_group_matches) + if (result.capture_group_matches.size() < match_count) + result.capture_group_matches.resize(match_count); + for (auto& matches : result.capture_group_matches) matches.resize(m_pattern->parser_result.capture_groups_count + 1); if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) { - for (auto& matches : output_copy.capture_group_matches) + for (auto& matches : result.capture_group_matches) matches.template remove_all_matching([](auto& match) { return match.view.is_null(); }); } - - output_copy.matches = state.matches; } else { - output_copy.capture_group_matches.clear_with_capacity(); + result.capture_group_matches.clear_with_capacity(); } - return { - match_count != 0, - match_count, - move(output_copy.matches), - move(output_copy.capture_group_matches), - output.operations, - m_pattern->parser_result.capture_groups_count, - m_pattern->parser_result.named_capture_groups_count, - }; + return result; } template<typename T> @@ -384,7 +381,7 @@ private: }; template<class Parser> -Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output) const +Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, size_t& operations) const { BumpAllocatedLinkedList<MatchState> states_to_try_next; size_t recursion_level = 0; @@ -392,8 +389,8 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta auto& bytecode = m_pattern->parser_result.bytecode; for (;;) { - ++output.operations; auto& opcode = bytecode.get_opcode(state); + ++operations; #if REGEX_DEBUG s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false); @@ -404,7 +401,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta --input.fail_counter; result = ExecutionResult::Failed_ExecuteLowPrioForks; } else { - result = opcode.execute(input, state, output); + result = opcode.execute(input, state); } #if REGEX_DEBUG diff --git a/Userland/Libraries/LibRegex/RegexMatcher.h b/Userland/Libraries/LibRegex/RegexMatcher.h index 4a1e64b8f4..9b91fcff4d 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.h +++ b/Userland/Libraries/LibRegex/RegexMatcher.h @@ -64,7 +64,7 @@ public: } private: - Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output) const; + Optional<bool> execute(MatchInput const& input, MatchState& state, size_t& operations) const; Regex<Parser> const* m_pattern; typename ParserTraits<Parser>::OptionsType const m_regex_options; |