diff options
-rw-r--r-- | Tests/LibRegex/Regex.cpp | 12 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp | 22 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexByteCode.cpp | 80 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexByteCode.h | 69 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatch.h | 15 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatcher.cpp | 22 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatcher.h | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexParser.cpp | 39 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexParser.h | 11 |
9 files changed, 69 insertions, 202 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 9529ce790c..ea8707ccf7 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -422,9 +422,11 @@ TEST_CASE(named_capture_group) EXPECT_EQ(re.search(haystack, result, PosixFlags::Multiline), true); EXPECT_EQ(result.count, 2u); EXPECT_EQ(result.matches.at(0).view, "Opacity=255"); - EXPECT_EQ(result.named_capture_group_matches.at(0).ensure("Test").view, "255"); + EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255"); + EXPECT_EQ(result.capture_group_matches.at(0).at(0).capture_group_name, "Test"); EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0"); - EXPECT_EQ(result.named_capture_group_matches.at(1).ensure("Test").view, "0"); + EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0"); + EXPECT_EQ(result.capture_group_matches.at(1).at(0).capture_group_name, "Test"); } TEST_CASE(ecma262_named_capture_group_with_dollar_sign) @@ -443,9 +445,11 @@ TEST_CASE(ecma262_named_capture_group_with_dollar_sign) EXPECT_EQ(re.search(haystack, result, ECMAScriptFlags::Multiline), true); EXPECT_EQ(result.count, 2u); EXPECT_EQ(result.matches.at(0).view, "Opacity=255"); - EXPECT_EQ(result.named_capture_group_matches.at(0).ensure("$Test$").view, "255"); + EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255"); + EXPECT_EQ(result.capture_group_matches.at(0).at(0).capture_group_name, "$Test$"); EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0"); - EXPECT_EQ(result.named_capture_group_matches.at(1).ensure("$Test$").view, "0"); + EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0"); + EXPECT_EQ(result.capture_group_matches.at(1).at(0).capture_group_name, "$Test$"); } TEST_CASE(a_star) diff --git a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp index 7a5c4fad99..ca3f4593b5 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp @@ -263,13 +263,16 @@ static Value regexp_builtin_exec(GlobalObject& global_object, RegExpObject& rege return {}; } - auto* array = Array::create(global_object, result.n_capture_groups + 1); + auto* array = Array::create(global_object, result.n_named_capture_groups + 1); if (vm.exception()) return {}; Vector<Optional<Match>> indices { Match::create(match) }; HashMap<String, Match> group_names; + bool has_groups = result.n_named_capture_groups != 0; + Object* groups_object = has_groups ? Object::create(global_object, nullptr) : nullptr; + for (size_t i = 0; i < result.n_capture_groups; ++i) { auto capture_value = js_undefined(); auto& capture = result.capture_group_matches[0][i + 1]; @@ -280,22 +283,15 @@ static Value regexp_builtin_exec(GlobalObject& global_object, RegExpObject& rege indices.append(Match::create(capture)); } array->create_data_property_or_throw(i + 1, capture_value); - } - - bool has_groups = result.n_named_capture_groups > 0; - Value groups = js_undefined(); - if (has_groups) { - auto groups_object = Object::create(global_object, nullptr); - - for (auto& entry : result.named_capture_group_matches[0]) { - groups_object->create_data_property_or_throw(entry.key, js_string(vm, entry.value.view.u16_view())); - group_names.set(entry.key, Match::create(entry.value)); + if (capture.capture_group_name.has_value()) { + auto group_name = capture.capture_group_name->to_string(); + groups_object->create_data_property_or_throw(group_name, js_string(vm, capture.view.u16_view())); + group_names.set(move(group_name), Match::create(capture)); } - - groups = move(groups_object); } + Value groups = has_groups ? groups_object : js_undefined(); array->create_data_property_or_throw(vm.names.groups, groups); if (has_indices) { diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index 7cad14ba92..3d9b1d3882 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -166,18 +166,12 @@ void ByteCode::ensure_opcodes_initialized() case OpCodeId::ClearCaptureGroup: s_opcodes[i] = make<OpCode_ClearCaptureGroup>(); break; - case OpCodeId::ClearNamedCaptureGroup: - s_opcodes[i] = make<OpCode_ClearNamedCaptureGroup>(); - break; case OpCodeId::SaveLeftCaptureGroup: s_opcodes[i] = make<OpCode_SaveLeftCaptureGroup>(); break; case OpCodeId::SaveRightCaptureGroup: s_opcodes[i] = make<OpCode_SaveRightCaptureGroup>(); break; - case OpCodeId::SaveLeftNamedCaptureGroup: - s_opcodes[i] = make<OpCode_SaveLeftNamedCaptureGroup>(); - break; case OpCodeId::SaveRightNamedCaptureGroup: s_opcodes[i] = make<OpCode_SaveRightNamedCaptureGroup>(); break; @@ -378,52 +372,26 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_ClearNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const -{ - if (input.match_index < state.capture_group_matches.size()) { - auto& group = state.named_capture_group_matches[input.match_index]; - if (auto it = group.find(name()); it != group.end()) - it->value.reset(); - } - return ExecutionResult::Continue; -} - -ALWAYS_INLINE ExecutionResult OpCode_SaveLeftNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const -{ - if (input.match_index >= state.named_capture_group_matches.size()) { - state.named_capture_group_matches.ensure_capacity(input.match_index); - auto capacity = state.named_capture_group_matches.capacity(); - for (size_t i = state.named_capture_group_matches.size(); i <= capacity; ++i) - state.named_capture_group_matches.empend(); - } - state.named_capture_group_matches.at(input.match_index).ensure(name()).column = state.string_position; - return ExecutionResult::Continue; -} - ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const { - StringView capture_group_name = name(); + auto& match = state.capture_group_matches.at(input.match_index).at(id()); + auto start_position = match.left_column; + if (state.string_position < start_position) + return ExecutionResult::Failed_ExecuteLowPrioForks; + + auto length = state.string_position - start_position; - if (state.named_capture_group_matches.at(input.match_index).contains(capture_group_name)) { - auto start_position = state.named_capture_group_matches.at(input.match_index).ensure(capture_group_name).column; - auto length = state.string_position - start_position; + if (start_position < match.column) + return ExecutionResult::Continue; - auto& map = state.named_capture_group_matches.at(input.match_index); + VERIFY(start_position + length <= input.view.length()); - if constexpr (REGEX_DEBUG) { - VERIFY(start_position + length <= input.view.length()); - dbgln("Save named capture group with name={} and content='{}'", capture_group_name, input.view.substring_view(start_position, length)); - } + auto view = input.view.substring_view(start_position, length); - VERIFY(start_position + length <= input.view.length()); - auto view = input.view.substring_view(start_position, length); - if (input.regex_options & AllFlags::StringCopyMatches) { - map.set(capture_group_name, { view.to_string(), input.line, start_position, input.global_offset + start_position }); // create a copy of the original string - } else { - map.set(capture_group_name, { view, input.line, start_position, input.global_offset + start_position }); // take view to original string - } + if (input.regex_options & AllFlags::StringCopyMatches) { + match = { view.to_string(), name(), input.line, start_position, input.global_offset + start_position }; // create a copy of the original string } else { - warnln("Didn't find corresponding capture group match for name={}, match_index={}", capture_group_name.to_string(), input.match_index); + match = { view, name(), input.line, start_position, input.global_offset + start_position }; // take view to original string } return ExecutionResult::Continue; @@ -543,24 +511,6 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M if (!compare_string(input, state, str, had_zero_length_match)) return ExecutionResult::Failed_ExecuteLowPrioForks; - } else if (compare_type == CharacterCompareType::NamedReference) { - auto ptr = (char const*)m_bytecode->at(offset++); - auto length = (size_t)m_bytecode->at(offset++); - StringView name { ptr, length }; - - auto group = state.named_capture_group_matches.at(input.match_index).get(name); - if (!group.has_value()) - return ExecutionResult::Failed_ExecuteLowPrioForks; - - auto str = group.value().view; - - // We want to compare a string that is definitely longer than the available string - if (input.view.length() < state.string_position + str.length()) - return ExecutionResult::Failed_ExecuteLowPrioForks; - - if (!compare_string(input, state, str, had_zero_length_match)) - return ExecutionResult::Failed_ExecuteLowPrioForks; - } else if (compare_type == CharacterCompareType::Property) { auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++)); compare_property(input, state, property, current_inversion_state(), inverse_matched); @@ -869,10 +819,6 @@ Vector<String> const OpCode_Compare::variable_arguments_to_string(Optional<Match buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])); } } - } else if (compare_type == CharacterCompareType::NamedReference) { - auto ptr = (char const*)m_bytecode->at(offset++); - auto length = m_bytecode->at(offset++); - result.empend(String::formatted("name='{}'", StringView { ptr, (size_t)length })); } else if (compare_type == CharacterCompareType::Reference) { auto ref = m_bytecode->at(offset++); result.empend(String::formatted("number={}", ref)); diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h index c1c7af235b..ab305785e2 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.h +++ b/Userland/Libraries/LibRegex/RegexByteCode.h @@ -32,7 +32,6 @@ using ByteCodeValueType = u64; __ENUMERATE_OPCODE(FailForks) \ __ENUMERATE_OPCODE(SaveLeftCaptureGroup) \ __ENUMERATE_OPCODE(SaveRightCaptureGroup) \ - __ENUMERATE_OPCODE(SaveLeftNamedCaptureGroup) \ __ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \ __ENUMERATE_OPCODE(CheckBegin) \ __ENUMERATE_OPCODE(CheckEnd) \ @@ -41,7 +40,6 @@ using ByteCodeValueType = u64; __ENUMERATE_OPCODE(Restore) \ __ENUMERATE_OPCODE(GoBack) \ __ENUMERATE_OPCODE(ClearCaptureGroup) \ - __ENUMERATE_OPCODE(ClearNamedCaptureGroup) \ __ENUMERATE_OPCODE(Exit) // clang-format off @@ -65,7 +63,6 @@ enum class OpCodeId : ByteCodeValueType { __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \ __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \ __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(NamedReference) \ __ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \ __ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \ __ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \ @@ -159,7 +156,6 @@ public: VERIFY(value.type != CharacterCompareType::RangeExpressionDummy); VERIFY(value.type != CharacterCompareType::Undefined); VERIFY(value.type != CharacterCompareType::String); - VERIFY(value.type != CharacterCompareType::NamedReference); arguments.append((ByteCodeValueType)value.type); if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar && value.type != CharacterCompareType::TemporaryInverse) @@ -187,13 +183,6 @@ public: empend(index); } - void insert_bytecode_clear_named_capture_group(StringView name) - { - empend(static_cast<ByteCodeValueType>(OpCodeId::ClearNamedCaptureGroup)); - empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination())); - empend(name.length()); - } - void insert_bytecode_compare_string(StringView view) { ByteCode bytecode; @@ -212,49 +201,24 @@ public: extend(move(bytecode)); } - void insert_bytecode_compare_named_reference(StringView name) - { - ByteCode bytecode; - - bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare)); - bytecode.empend(static_cast<u64>(1)); // number of arguments - - ByteCode arguments; - - arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::NamedReference)); - arguments.empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination())); - arguments.empend(name.length()); - - bytecode.empend(arguments.size()); // size of arguments - bytecode.extend(move(arguments)); - - extend(move(bytecode)); - } - void insert_bytecode_group_capture_left(size_t capture_groups_count) { empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup)); empend(capture_groups_count); } - void insert_bytecode_group_capture_left(StringView const& name) - { - empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftNamedCaptureGroup)); - empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination())); - empend(name.length()); - } - void insert_bytecode_group_capture_right(size_t capture_groups_count) { empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup)); empend(capture_groups_count); } - void insert_bytecode_group_capture_right(StringView const& name) + void insert_bytecode_group_capture_right(size_t capture_groups_count, StringView const& name) { empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup)); empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination())); empend(name.length()); + empend(capture_groups_count); } enum class LookAroundType { @@ -655,19 +619,6 @@ public: String const arguments_string() const override { return String::formatted("id={}", id()); } }; -class OpCode_ClearNamedCaptureGroup final : public OpCode { -public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; - ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearNamedCaptureGroup; } - ALWAYS_INLINE size_t size() const override { return 3; } - ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; } - ALWAYS_INLINE size_t length() const { return argument(1); } - String const arguments_string() const override - { - return String::formatted("name={}, length={}", name(), length()); - } -}; - class OpCode_SaveLeftCaptureGroup final : public OpCode { public: ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; @@ -686,26 +637,14 @@ public: String const arguments_string() const override { return String::formatted("id={}", id()); } }; -class OpCode_SaveLeftNamedCaptureGroup final : public OpCode { -public: - ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; - ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftNamedCaptureGroup; } - ALWAYS_INLINE size_t size() const override { return 3; } - ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; } - ALWAYS_INLINE size_t length() const { return argument(1); } - String const arguments_string() const override - { - return String::formatted("name={}, length={}", name(), length()); - } -}; - class OpCode_SaveRightNamedCaptureGroup final : public OpCode { public: ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override; ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; } - ALWAYS_INLINE size_t size() const override { return 3; } + ALWAYS_INLINE size_t size() const override { return 4; } ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; } ALWAYS_INLINE size_t length() const { return argument(1); } + ALWAYS_INLINE size_t id() const { return argument(2); } String const arguments_string() const override { return String::formatted("name={}, length={}", name(), length()); diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index 599e68451a..98c324b359 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -442,11 +442,20 @@ public: } Match(String const string_, size_t const line_, size_t const column_, size_t const global_offset_) - : string(string_) + : string(move(string_)) , view(string.value().view()) , line(line_) , column(column_) , global_offset(global_offset_) + { + } + + Match(RegexStringView const view_, StringView capture_group_name_, size_t const line_, size_t const column_, size_t const global_offset_) + : view(view_) + , capture_group_name(capture_group_name_) + , line(line_) + , column(column_) + , global_offset(global_offset_) , left_column(column_) { } @@ -454,6 +463,7 @@ public: void reset() { view = view.typed_null_view(); + capture_group_name.clear(); line = 0; column = 0; global_offset = 0; @@ -461,6 +471,7 @@ public: } RegexStringView view { nullptr }; + Optional<StringView> capture_group_name {}; size_t line { 0 }; size_t column { 0 }; size_t global_offset { 0 }; @@ -494,8 +505,6 @@ struct MatchState { size_t fork_at_position { 0 }; Vector<Match> matches; Vector<Vector<Match>> capture_group_matches; - Vector<HashMap<String, Match>> named_capture_group_matches; - size_t recursion_level { 0 }; }; struct MatchOutput { diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp index c312ca3c81..0d41c9af42 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.cpp +++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp @@ -149,10 +149,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona if (c_match_preallocation_count) { state.matches.ensure_capacity(c_match_preallocation_count); state.capture_group_matches.ensure_capacity(c_match_preallocation_count); - state.named_capture_group_matches.ensure_capacity(c_match_preallocation_count); - auto& capture_groups_count = m_pattern->parser_result.capture_groups_count; - auto& named_capture_groups_count = m_pattern->parser_result.named_capture_groups_count; for (size_t j = 0; j < c_match_preallocation_count; ++j) { state.matches.empend(); @@ -160,9 +157,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona state.capture_group_matches.at(j).ensure_capacity(capture_groups_count); for (size_t k = 0; k < capture_groups_count; ++k) state.capture_group_matches.at(j).unchecked_append({}); - - state.named_capture_group_matches.unchecked_append({}); - state.named_capture_group_matches.at(j).ensure_capacity(named_capture_groups_count); } } @@ -315,15 +309,9 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona matches.template remove_all_matching([](auto& match) { return match.view.is_null(); }); } - output_copy.named_capture_group_matches = state.named_capture_group_matches; - // Make sure there are as many capture matches as there are actual matches. - if (output_copy.named_capture_group_matches.size() < match_count) - output_copy.named_capture_group_matches.resize(match_count); - output_copy.matches = state.matches; } else { output_copy.capture_group_matches.clear_with_capacity(); - output_copy.named_capture_group_matches.clear_with_capacity(); } return { @@ -331,7 +319,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona match_count, move(output_copy.matches), move(output_copy.capture_group_matches), - move(output_copy.named_capture_group_matches), output.operations, m_pattern->parser_result.capture_groups_count, m_pattern->parser_result.named_capture_groups_count, @@ -399,9 +386,8 @@ private: template<class Parser> Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output) const { - state.recursion_level = 0; - BumpAllocatedLinkedList<MatchState> states_to_try_next; + size_t recursion_level = 0; auto& bytecode = m_pattern->parser_result.bytecode; @@ -410,7 +396,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta auto& opcode = bytecode.get_opcode(state); #if REGEX_DEBUG - s_regex_dbg.print_opcode("VM", opcode, state, state.recursion_level, false); + s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false); #endif ExecutionResult result; @@ -435,7 +421,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta case ExecutionResult::Fork_PrioHigh: states_to_try_next.append(state); state.instruction_position = state.fork_at_position; - ++state.recursion_level; + ++recursion_level; continue; case ExecutionResult::Continue: continue; @@ -454,7 +440,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta return false; } state = states_to_try_next.take_last(); - ++state.recursion_level; + ++recursion_level; continue; } } diff --git a/Userland/Libraries/LibRegex/RegexMatcher.h b/Userland/Libraries/LibRegex/RegexMatcher.h index 8e1ce64e6d..4a1e64b8f4 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.h +++ b/Userland/Libraries/LibRegex/RegexMatcher.h @@ -31,7 +31,6 @@ struct RegexResult final { size_t count { 0 }; Vector<Match> matches; Vector<Vector<Match>> capture_group_matches; - Vector<HashMap<String, Match>> named_capture_group_matches; size_t n_operations { 0 }; size_t n_capture_groups { 0 }; size_t n_named_capture_groups { 0 }; diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index 84768cf121..db08d836fc 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -150,7 +150,6 @@ ALWAYS_INLINE void Parser::reset() m_parser_state.capture_group_minimum_lengths.clear(); m_parser_state.capture_groups_count = 0; m_parser_state.named_capture_groups_count = 0; - m_parser_state.named_capture_group_minimum_lengths.clear(); m_parser_state.named_capture_groups.clear(); } @@ -780,12 +779,8 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si } } - if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) { - if (capture_group_name.has_value()) - bytecode.insert_bytecode_group_capture_left(capture_group_name.value()); - else - bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count); - } + if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) + bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count); ByteCode capture_group_bytecode; @@ -814,12 +809,12 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) { if (capture_group_name.has_value()) { - bytecode.insert_bytecode_group_capture_right(capture_group_name.value()); + bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count, capture_group_name.value()); ++m_parser_state.named_capture_groups_count; } else { bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count); - ++m_parser_state.capture_groups_count; } + ++m_parser_state.capture_groups_count; } should_parse_repetition_symbol = true; break; @@ -1564,14 +1559,14 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini set_error(Error::InvalidNameForCaptureGroup); return false; } - auto maybe_length = m_parser_state.named_capture_group_minimum_lengths.get(name); - if (!maybe_length.has_value()) { + auto maybe_capture_group = m_parser_state.named_capture_groups.get(name); + if (!maybe_capture_group.has_value()) { set_error(Error::InvalidNameForCaptureGroup); return false; } - match_length_minimum += maybe_length.value(); + match_length_minimum += maybe_capture_group->minimum_length; - stack.insert_bytecode_compare_named_reference(name); + stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group->group_index } }); return true; } @@ -2121,15 +2116,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi m_capture_groups_in_scope.last().empend(identifier); }; auto clear_all_capture_groups_in_scope = [&] { - for (auto& entry : m_capture_groups_in_scope.last()) { - entry.visit( - [&](size_t index) { - stack.insert_bytecode_clear_capture_group(index); - }, - [&](String const& name) { - stack.insert_bytecode_clear_named_capture_group(name); - }); - } + for (auto& index : m_capture_groups_in_scope.last()) + stack.insert_bytecode_clear_capture_group(index); }; if (match(TokenType::Questionmark)) { @@ -2172,21 +2160,18 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi clear_all_capture_groups_in_scope(); exit_capture_group_scope(); - register_capture_group_in_current_scope(name); register_capture_group_in_current_scope(group_index); consume(TokenType::RightParen, Error::MismatchingParen); - stack.insert_bytecode_group_capture_left(name); stack.insert_bytecode_group_capture_left(group_index); stack.extend(move(capture_group_bytecode)); - stack.insert_bytecode_group_capture_right(name); - stack.insert_bytecode_group_capture_right(group_index); + stack.insert_bytecode_group_capture_right(group_index, name); match_length_minimum += length; - m_parser_state.named_capture_group_minimum_lengths.set(name, length); m_parser_state.capture_group_minimum_lengths.set(group_index, length); + m_parser_state.named_capture_groups.set(name, { group_index, length }); return true; } diff --git a/Userland/Libraries/LibRegex/RegexParser.h b/Userland/Libraries/LibRegex/RegexParser.h index 25a9677189..c1f54be69c 100644 --- a/Userland/Libraries/LibRegex/RegexParser.h +++ b/Userland/Libraries/LibRegex/RegexParser.h @@ -88,6 +88,11 @@ protected: ALWAYS_INLINE bool done() const; ALWAYS_INLINE bool set_error(Error error); + struct NamedCaptureGroup { + size_t group_index { 0 }; + size_t minimum_length { 0 }; + }; + struct ParserState { Lexer& lexer; Token current_token; @@ -99,8 +104,7 @@ protected: size_t match_length_minimum { 0 }; AllOptions regex_options; HashMap<int, size_t> capture_group_minimum_lengths; - HashMap<FlyString, size_t> named_capture_group_minimum_lengths; - HashMap<size_t, FlyString> named_capture_groups; + HashMap<FlyString, NamedCaptureGroup> named_capture_groups; explicit ParserState(Lexer& lexer) : lexer(lexer) @@ -258,8 +262,7 @@ private: // ECMA-262 basically requires that we clear the inner captures of a capture group before trying to match it, // by requiring that (...)+ only contain the matches for the last iteration. // To do that, we have to keep track of which capture groups are "in scope", so we can clear them as needed. - using CaptureGroup = Variant<size_t, String>; - Vector<Vector<CaptureGroup>> m_capture_groups_in_scope; + Vector<Vector<size_t>> m_capture_groups_in_scope; }; using PosixExtended = PosixExtendedParser; |