summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibRegex
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-08-14 16:28:54 -0400
committerLinus Groh <mail@linusgroh.de>2021-08-15 11:43:45 +0100
commitf1ce998d73979b80352675d6d3170399b2311913 (patch)
treeab15c8d1713571bbbf87591c7d3a8e1b0e7b5f58 /Userland/Libraries/LibRegex
parentfea181bde35b115de33adb5f80c3f6abe46cec72 (diff)
downloadserenity-f1ce998d73979b80352675d6d3170399b2311913.zip
LibRegex+LibJS: Combine named and unnamed capture groups in MatchState
Combining these into one list helps reduce the size of MatchState, and as a result, reduces the amount of memory consumed during execution of very large regex matches. Doing this also allows us to remove a few regex byte code instructions: ClearNamedCaptureGroup, SaveLeftNamedCaptureGroup, and NamedReference. Named groups now behave the same as unnamed groups for these operations. Note that SaveRightNamedCaptureGroup still exists to cache the matched group name. This also removes the recursion level from the MatchState, as it can exist as a local variable in Matcher::execute instead.
Diffstat (limited to 'Userland/Libraries/LibRegex')
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.cpp80
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.h69
-rw-r--r--Userland/Libraries/LibRegex/RegexMatch.h15
-rw-r--r--Userland/Libraries/LibRegex/RegexMatcher.cpp22
-rw-r--r--Userland/Libraries/LibRegex/RegexMatcher.h1
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.cpp39
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.h11
7 files changed, 52 insertions, 185 deletions
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp
index 7cad14ba92..3d9b1d3882 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp
@@ -166,18 +166,12 @@ void ByteCode::ensure_opcodes_initialized()
case OpCodeId::ClearCaptureGroup:
s_opcodes[i] = make<OpCode_ClearCaptureGroup>();
break;
- case OpCodeId::ClearNamedCaptureGroup:
- s_opcodes[i] = make<OpCode_ClearNamedCaptureGroup>();
- break;
case OpCodeId::SaveLeftCaptureGroup:
s_opcodes[i] = make<OpCode_SaveLeftCaptureGroup>();
break;
case OpCodeId::SaveRightCaptureGroup:
s_opcodes[i] = make<OpCode_SaveRightCaptureGroup>();
break;
- case OpCodeId::SaveLeftNamedCaptureGroup:
- s_opcodes[i] = make<OpCode_SaveLeftNamedCaptureGroup>();
- break;
case OpCodeId::SaveRightNamedCaptureGroup:
s_opcodes[i] = make<OpCode_SaveRightNamedCaptureGroup>();
break;
@@ -378,52 +372,26 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
return ExecutionResult::Continue;
}
-ALWAYS_INLINE ExecutionResult OpCode_ClearNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
-{
- if (input.match_index < state.capture_group_matches.size()) {
- auto& group = state.named_capture_group_matches[input.match_index];
- if (auto it = group.find(name()); it != group.end())
- it->value.reset();
- }
- return ExecutionResult::Continue;
-}
-
-ALWAYS_INLINE ExecutionResult OpCode_SaveLeftNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
-{
- if (input.match_index >= state.named_capture_group_matches.size()) {
- state.named_capture_group_matches.ensure_capacity(input.match_index);
- auto capacity = state.named_capture_group_matches.capacity();
- for (size_t i = state.named_capture_group_matches.size(); i <= capacity; ++i)
- state.named_capture_group_matches.empend();
- }
- state.named_capture_group_matches.at(input.match_index).ensure(name()).column = state.string_position;
- return ExecutionResult::Continue;
-}
-
ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
{
- StringView capture_group_name = name();
+ auto& match = state.capture_group_matches.at(input.match_index).at(id());
+ auto start_position = match.left_column;
+ if (state.string_position < start_position)
+ return ExecutionResult::Failed_ExecuteLowPrioForks;
+
+ auto length = state.string_position - start_position;
- if (state.named_capture_group_matches.at(input.match_index).contains(capture_group_name)) {
- auto start_position = state.named_capture_group_matches.at(input.match_index).ensure(capture_group_name).column;
- auto length = state.string_position - start_position;
+ if (start_position < match.column)
+ return ExecutionResult::Continue;
- auto& map = state.named_capture_group_matches.at(input.match_index);
+ VERIFY(start_position + length <= input.view.length());
- if constexpr (REGEX_DEBUG) {
- VERIFY(start_position + length <= input.view.length());
- dbgln("Save named capture group with name={} and content='{}'", capture_group_name, input.view.substring_view(start_position, length));
- }
+ auto view = input.view.substring_view(start_position, length);
- VERIFY(start_position + length <= input.view.length());
- auto view = input.view.substring_view(start_position, length);
- if (input.regex_options & AllFlags::StringCopyMatches) {
- map.set(capture_group_name, { view.to_string(), input.line, start_position, input.global_offset + start_position }); // create a copy of the original string
- } else {
- map.set(capture_group_name, { view, input.line, start_position, input.global_offset + start_position }); // take view to original string
- }
+ if (input.regex_options & AllFlags::StringCopyMatches) {
+ match = { view.to_string(), name(), input.line, start_position, input.global_offset + start_position }; // create a copy of the original string
} else {
- warnln("Didn't find corresponding capture group match for name={}, match_index={}", capture_group_name.to_string(), input.match_index);
+ match = { view, name(), input.line, start_position, input.global_offset + start_position }; // take view to original string
}
return ExecutionResult::Continue;
@@ -543,24 +511,6 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
if (!compare_string(input, state, str, had_zero_length_match))
return ExecutionResult::Failed_ExecuteLowPrioForks;
- } else if (compare_type == CharacterCompareType::NamedReference) {
- auto ptr = (char const*)m_bytecode->at(offset++);
- auto length = (size_t)m_bytecode->at(offset++);
- StringView name { ptr, length };
-
- auto group = state.named_capture_group_matches.at(input.match_index).get(name);
- if (!group.has_value())
- return ExecutionResult::Failed_ExecuteLowPrioForks;
-
- auto str = group.value().view;
-
- // We want to compare a string that is definitely longer than the available string
- if (input.view.length() < state.string_position + str.length())
- return ExecutionResult::Failed_ExecuteLowPrioForks;
-
- if (!compare_string(input, state, str, had_zero_length_match))
- return ExecutionResult::Failed_ExecuteLowPrioForks;
-
} else if (compare_type == CharacterCompareType::Property) {
auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
compare_property(input, state, property, current_inversion_state(), inverse_matched);
@@ -869,10 +819,6 @@ Vector<String> const OpCode_Compare::variable_arguments_to_string(Optional<Match
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]));
}
}
- } else if (compare_type == CharacterCompareType::NamedReference) {
- auto ptr = (char const*)m_bytecode->at(offset++);
- auto length = m_bytecode->at(offset++);
- result.empend(String::formatted("name='{}'", StringView { ptr, (size_t)length }));
} else if (compare_type == CharacterCompareType::Reference) {
auto ref = m_bytecode->at(offset++);
result.empend(String::formatted("number={}", ref));
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h
index c1c7af235b..ab305785e2 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.h
+++ b/Userland/Libraries/LibRegex/RegexByteCode.h
@@ -32,7 +32,6 @@ using ByteCodeValueType = u64;
__ENUMERATE_OPCODE(FailForks) \
__ENUMERATE_OPCODE(SaveLeftCaptureGroup) \
__ENUMERATE_OPCODE(SaveRightCaptureGroup) \
- __ENUMERATE_OPCODE(SaveLeftNamedCaptureGroup) \
__ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \
__ENUMERATE_OPCODE(CheckBegin) \
__ENUMERATE_OPCODE(CheckEnd) \
@@ -41,7 +40,6 @@ using ByteCodeValueType = u64;
__ENUMERATE_OPCODE(Restore) \
__ENUMERATE_OPCODE(GoBack) \
__ENUMERATE_OPCODE(ClearCaptureGroup) \
- __ENUMERATE_OPCODE(ClearNamedCaptureGroup) \
__ENUMERATE_OPCODE(Exit)
// clang-format off
@@ -65,7 +63,6 @@ enum class OpCodeId : ByteCodeValueType {
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(NamedReference) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
@@ -159,7 +156,6 @@ public:
VERIFY(value.type != CharacterCompareType::RangeExpressionDummy);
VERIFY(value.type != CharacterCompareType::Undefined);
VERIFY(value.type != CharacterCompareType::String);
- VERIFY(value.type != CharacterCompareType::NamedReference);
arguments.append((ByteCodeValueType)value.type);
if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar && value.type != CharacterCompareType::TemporaryInverse)
@@ -187,13 +183,6 @@ public:
empend(index);
}
- void insert_bytecode_clear_named_capture_group(StringView name)
- {
- empend(static_cast<ByteCodeValueType>(OpCodeId::ClearNamedCaptureGroup));
- empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
- empend(name.length());
- }
-
void insert_bytecode_compare_string(StringView view)
{
ByteCode bytecode;
@@ -212,49 +201,24 @@ public:
extend(move(bytecode));
}
- void insert_bytecode_compare_named_reference(StringView name)
- {
- ByteCode bytecode;
-
- bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
- bytecode.empend(static_cast<u64>(1)); // number of arguments
-
- ByteCode arguments;
-
- arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::NamedReference));
- arguments.empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
- arguments.empend(name.length());
-
- bytecode.empend(arguments.size()); // size of arguments
- bytecode.extend(move(arguments));
-
- extend(move(bytecode));
- }
-
void insert_bytecode_group_capture_left(size_t capture_groups_count)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup));
empend(capture_groups_count);
}
- void insert_bytecode_group_capture_left(StringView const& name)
- {
- empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftNamedCaptureGroup));
- empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
- empend(name.length());
- }
-
void insert_bytecode_group_capture_right(size_t capture_groups_count)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup));
empend(capture_groups_count);
}
- void insert_bytecode_group_capture_right(StringView const& name)
+ void insert_bytecode_group_capture_right(size_t capture_groups_count, StringView const& name)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
empend(name.length());
+ empend(capture_groups_count);
}
enum class LookAroundType {
@@ -655,19 +619,6 @@ public:
String const arguments_string() const override { return String::formatted("id={}", id()); }
};
-class OpCode_ClearNamedCaptureGroup final : public OpCode {
-public:
- ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
- ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearNamedCaptureGroup; }
- ALWAYS_INLINE size_t size() const override { return 3; }
- ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
- ALWAYS_INLINE size_t length() const { return argument(1); }
- String const arguments_string() const override
- {
- return String::formatted("name={}, length={}", name(), length());
- }
-};
-
class OpCode_SaveLeftCaptureGroup final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
@@ -686,26 +637,14 @@ public:
String const arguments_string() const override { return String::formatted("id={}", id()); }
};
-class OpCode_SaveLeftNamedCaptureGroup final : public OpCode {
-public:
- ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
- ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftNamedCaptureGroup; }
- ALWAYS_INLINE size_t size() const override { return 3; }
- ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
- ALWAYS_INLINE size_t length() const { return argument(1); }
- String const arguments_string() const override
- {
- return String::formatted("name={}, length={}", name(), length());
- }
-};
-
class OpCode_SaveRightNamedCaptureGroup final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; }
- ALWAYS_INLINE size_t size() const override { return 3; }
+ ALWAYS_INLINE size_t size() const override { return 4; }
ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
ALWAYS_INLINE size_t length() const { return argument(1); }
+ ALWAYS_INLINE size_t id() const { return argument(2); }
String const arguments_string() const override
{
return String::formatted("name={}, length={}", name(), length());
diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h
index 599e68451a..98c324b359 100644
--- a/Userland/Libraries/LibRegex/RegexMatch.h
+++ b/Userland/Libraries/LibRegex/RegexMatch.h
@@ -442,11 +442,20 @@ public:
}
Match(String const string_, size_t const line_, size_t const column_, size_t const global_offset_)
- : string(string_)
+ : string(move(string_))
, view(string.value().view())
, line(line_)
, column(column_)
, global_offset(global_offset_)
+ {
+ }
+
+ Match(RegexStringView const view_, StringView capture_group_name_, size_t const line_, size_t const column_, size_t const global_offset_)
+ : view(view_)
+ , capture_group_name(capture_group_name_)
+ , line(line_)
+ , column(column_)
+ , global_offset(global_offset_)
, left_column(column_)
{
}
@@ -454,6 +463,7 @@ public:
void reset()
{
view = view.typed_null_view();
+ capture_group_name.clear();
line = 0;
column = 0;
global_offset = 0;
@@ -461,6 +471,7 @@ public:
}
RegexStringView view { nullptr };
+ Optional<StringView> capture_group_name {};
size_t line { 0 };
size_t column { 0 };
size_t global_offset { 0 };
@@ -494,8 +505,6 @@ struct MatchState {
size_t fork_at_position { 0 };
Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches;
- Vector<HashMap<String, Match>> named_capture_group_matches;
- size_t recursion_level { 0 };
};
struct MatchOutput {
diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp
index c312ca3c81..0d41c9af42 100644
--- a/Userland/Libraries/LibRegex/RegexMatcher.cpp
+++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp
@@ -149,10 +149,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
if (c_match_preallocation_count) {
state.matches.ensure_capacity(c_match_preallocation_count);
state.capture_group_matches.ensure_capacity(c_match_preallocation_count);
- state.named_capture_group_matches.ensure_capacity(c_match_preallocation_count);
-
auto& capture_groups_count = m_pattern->parser_result.capture_groups_count;
- auto& named_capture_groups_count = m_pattern->parser_result.named_capture_groups_count;
for (size_t j = 0; j < c_match_preallocation_count; ++j) {
state.matches.empend();
@@ -160,9 +157,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
state.capture_group_matches.at(j).ensure_capacity(capture_groups_count);
for (size_t k = 0; k < capture_groups_count; ++k)
state.capture_group_matches.at(j).unchecked_append({});
-
- state.named_capture_group_matches.unchecked_append({});
- state.named_capture_group_matches.at(j).ensure_capacity(named_capture_groups_count);
}
}
@@ -315,15 +309,9 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
matches.template remove_all_matching([](auto& match) { return match.view.is_null(); });
}
- output_copy.named_capture_group_matches = state.named_capture_group_matches;
- // Make sure there are as many capture matches as there are actual matches.
- if (output_copy.named_capture_group_matches.size() < match_count)
- output_copy.named_capture_group_matches.resize(match_count);
-
output_copy.matches = state.matches;
} else {
output_copy.capture_group_matches.clear_with_capacity();
- output_copy.named_capture_group_matches.clear_with_capacity();
}
return {
@@ -331,7 +319,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
match_count,
move(output_copy.matches),
move(output_copy.capture_group_matches),
- move(output_copy.named_capture_group_matches),
output.operations,
m_pattern->parser_result.capture_groups_count,
m_pattern->parser_result.named_capture_groups_count,
@@ -399,9 +386,8 @@ private:
template<class Parser>
Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output) const
{
- state.recursion_level = 0;
-
BumpAllocatedLinkedList<MatchState> states_to_try_next;
+ size_t recursion_level = 0;
auto& bytecode = m_pattern->parser_result.bytecode;
@@ -410,7 +396,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
auto& opcode = bytecode.get_opcode(state);
#if REGEX_DEBUG
- s_regex_dbg.print_opcode("VM", opcode, state, state.recursion_level, false);
+ s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false);
#endif
ExecutionResult result;
@@ -435,7 +421,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
case ExecutionResult::Fork_PrioHigh:
states_to_try_next.append(state);
state.instruction_position = state.fork_at_position;
- ++state.recursion_level;
+ ++recursion_level;
continue;
case ExecutionResult::Continue:
continue;
@@ -454,7 +440,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
return false;
}
state = states_to_try_next.take_last();
- ++state.recursion_level;
+ ++recursion_level;
continue;
}
}
diff --git a/Userland/Libraries/LibRegex/RegexMatcher.h b/Userland/Libraries/LibRegex/RegexMatcher.h
index 8e1ce64e6d..4a1e64b8f4 100644
--- a/Userland/Libraries/LibRegex/RegexMatcher.h
+++ b/Userland/Libraries/LibRegex/RegexMatcher.h
@@ -31,7 +31,6 @@ struct RegexResult final {
size_t count { 0 };
Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches;
- Vector<HashMap<String, Match>> named_capture_group_matches;
size_t n_operations { 0 };
size_t n_capture_groups { 0 };
size_t n_named_capture_groups { 0 };
diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp
index 84768cf121..db08d836fc 100644
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@@ -150,7 +150,6 @@ ALWAYS_INLINE void Parser::reset()
m_parser_state.capture_group_minimum_lengths.clear();
m_parser_state.capture_groups_count = 0;
m_parser_state.named_capture_groups_count = 0;
- m_parser_state.named_capture_group_minimum_lengths.clear();
m_parser_state.named_capture_groups.clear();
}
@@ -780,12 +779,8 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
}
}
- if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
- if (capture_group_name.has_value())
- bytecode.insert_bytecode_group_capture_left(capture_group_name.value());
- else
- bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count);
- }
+ if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group))
+ bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count);
ByteCode capture_group_bytecode;
@@ -814,12 +809,12 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
if (capture_group_name.has_value()) {
- bytecode.insert_bytecode_group_capture_right(capture_group_name.value());
+ bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count, capture_group_name.value());
++m_parser_state.named_capture_groups_count;
} else {
bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count);
- ++m_parser_state.capture_groups_count;
}
+ ++m_parser_state.capture_groups_count;
}
should_parse_repetition_symbol = true;
break;
@@ -1564,14 +1559,14 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
- auto maybe_length = m_parser_state.named_capture_group_minimum_lengths.get(name);
- if (!maybe_length.has_value()) {
+ auto maybe_capture_group = m_parser_state.named_capture_groups.get(name);
+ if (!maybe_capture_group.has_value()) {
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
- match_length_minimum += maybe_length.value();
+ match_length_minimum += maybe_capture_group->minimum_length;
- stack.insert_bytecode_compare_named_reference(name);
+ stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group->group_index } });
return true;
}
@@ -2121,15 +2116,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
m_capture_groups_in_scope.last().empend(identifier);
};
auto clear_all_capture_groups_in_scope = [&] {
- for (auto& entry : m_capture_groups_in_scope.last()) {
- entry.visit(
- [&](size_t index) {
- stack.insert_bytecode_clear_capture_group(index);
- },
- [&](String const& name) {
- stack.insert_bytecode_clear_named_capture_group(name);
- });
- }
+ for (auto& index : m_capture_groups_in_scope.last())
+ stack.insert_bytecode_clear_capture_group(index);
};
if (match(TokenType::Questionmark)) {
@@ -2172,21 +2160,18 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
clear_all_capture_groups_in_scope();
exit_capture_group_scope();
- register_capture_group_in_current_scope(name);
register_capture_group_in_current_scope(group_index);
consume(TokenType::RightParen, Error::MismatchingParen);
- stack.insert_bytecode_group_capture_left(name);
stack.insert_bytecode_group_capture_left(group_index);
stack.extend(move(capture_group_bytecode));
- stack.insert_bytecode_group_capture_right(name);
- stack.insert_bytecode_group_capture_right(group_index);
+ stack.insert_bytecode_group_capture_right(group_index, name);
match_length_minimum += length;
- m_parser_state.named_capture_group_minimum_lengths.set(name, length);
m_parser_state.capture_group_minimum_lengths.set(group_index, length);
+ m_parser_state.named_capture_groups.set(name, { group_index, length });
return true;
}
diff --git a/Userland/Libraries/LibRegex/RegexParser.h b/Userland/Libraries/LibRegex/RegexParser.h
index 25a9677189..c1f54be69c 100644
--- a/Userland/Libraries/LibRegex/RegexParser.h
+++ b/Userland/Libraries/LibRegex/RegexParser.h
@@ -88,6 +88,11 @@ protected:
ALWAYS_INLINE bool done() const;
ALWAYS_INLINE bool set_error(Error error);
+ struct NamedCaptureGroup {
+ size_t group_index { 0 };
+ size_t minimum_length { 0 };
+ };
+
struct ParserState {
Lexer& lexer;
Token current_token;
@@ -99,8 +104,7 @@ protected:
size_t match_length_minimum { 0 };
AllOptions regex_options;
HashMap<int, size_t> capture_group_minimum_lengths;
- HashMap<FlyString, size_t> named_capture_group_minimum_lengths;
- HashMap<size_t, FlyString> named_capture_groups;
+ HashMap<FlyString, NamedCaptureGroup> named_capture_groups;
explicit ParserState(Lexer& lexer)
: lexer(lexer)
@@ -258,8 +262,7 @@ private:
// ECMA-262 basically requires that we clear the inner captures of a capture group before trying to match it,
// by requiring that (...)+ only contain the matches for the last iteration.
// To do that, we have to keep track of which capture groups are "in scope", so we can clear them as needed.
- using CaptureGroup = Variant<size_t, String>;
- Vector<Vector<CaptureGroup>> m_capture_groups_in_scope;
+ Vector<Vector<size_t>> m_capture_groups_in_scope;
};
using PosixExtended = PosixExtendedParser;