summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Tests/LibRegex/Regex.cpp12
-rw-r--r--Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp22
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.cpp80
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.h69
-rw-r--r--Userland/Libraries/LibRegex/RegexMatch.h15
-rw-r--r--Userland/Libraries/LibRegex/RegexMatcher.cpp22
-rw-r--r--Userland/Libraries/LibRegex/RegexMatcher.h1
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.cpp39
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.h11
9 files changed, 69 insertions, 202 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index 9529ce790c..ea8707ccf7 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -422,9 +422,11 @@ TEST_CASE(named_capture_group)
EXPECT_EQ(re.search(haystack, result, PosixFlags::Multiline), true);
EXPECT_EQ(result.count, 2u);
EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
- EXPECT_EQ(result.named_capture_group_matches.at(0).ensure("Test").view, "255");
+ EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255");
+ EXPECT_EQ(result.capture_group_matches.at(0).at(0).capture_group_name, "Test");
EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
- EXPECT_EQ(result.named_capture_group_matches.at(1).ensure("Test").view, "0");
+ EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0");
+ EXPECT_EQ(result.capture_group_matches.at(1).at(0).capture_group_name, "Test");
}
TEST_CASE(ecma262_named_capture_group_with_dollar_sign)
@@ -443,9 +445,11 @@ TEST_CASE(ecma262_named_capture_group_with_dollar_sign)
EXPECT_EQ(re.search(haystack, result, ECMAScriptFlags::Multiline), true);
EXPECT_EQ(result.count, 2u);
EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
- EXPECT_EQ(result.named_capture_group_matches.at(0).ensure("$Test$").view, "255");
+ EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255");
+ EXPECT_EQ(result.capture_group_matches.at(0).at(0).capture_group_name, "$Test$");
EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
- EXPECT_EQ(result.named_capture_group_matches.at(1).ensure("$Test$").view, "0");
+ EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0");
+ EXPECT_EQ(result.capture_group_matches.at(1).at(0).capture_group_name, "$Test$");
}
TEST_CASE(a_star)
diff --git a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp
index 7a5c4fad99..ca3f4593b5 100644
--- a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp
+++ b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp
@@ -263,13 +263,16 @@ static Value regexp_builtin_exec(GlobalObject& global_object, RegExpObject& rege
return {};
}
- auto* array = Array::create(global_object, result.n_capture_groups + 1);
+ auto* array = Array::create(global_object, result.n_named_capture_groups + 1);
if (vm.exception())
return {};
Vector<Optional<Match>> indices { Match::create(match) };
HashMap<String, Match> group_names;
+ bool has_groups = result.n_named_capture_groups != 0;
+ Object* groups_object = has_groups ? Object::create(global_object, nullptr) : nullptr;
+
for (size_t i = 0; i < result.n_capture_groups; ++i) {
auto capture_value = js_undefined();
auto& capture = result.capture_group_matches[0][i + 1];
@@ -280,22 +283,15 @@ static Value regexp_builtin_exec(GlobalObject& global_object, RegExpObject& rege
indices.append(Match::create(capture));
}
array->create_data_property_or_throw(i + 1, capture_value);
- }
-
- bool has_groups = result.n_named_capture_groups > 0;
- Value groups = js_undefined();
- if (has_groups) {
- auto groups_object = Object::create(global_object, nullptr);
-
- for (auto& entry : result.named_capture_group_matches[0]) {
- groups_object->create_data_property_or_throw(entry.key, js_string(vm, entry.value.view.u16_view()));
- group_names.set(entry.key, Match::create(entry.value));
+ if (capture.capture_group_name.has_value()) {
+ auto group_name = capture.capture_group_name->to_string();
+ groups_object->create_data_property_or_throw(group_name, js_string(vm, capture.view.u16_view()));
+ group_names.set(move(group_name), Match::create(capture));
}
-
- groups = move(groups_object);
}
+ Value groups = has_groups ? groups_object : js_undefined();
array->create_data_property_or_throw(vm.names.groups, groups);
if (has_indices) {
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp
index 7cad14ba92..3d9b1d3882 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp
@@ -166,18 +166,12 @@ void ByteCode::ensure_opcodes_initialized()
case OpCodeId::ClearCaptureGroup:
s_opcodes[i] = make<OpCode_ClearCaptureGroup>();
break;
- case OpCodeId::ClearNamedCaptureGroup:
- s_opcodes[i] = make<OpCode_ClearNamedCaptureGroup>();
- break;
case OpCodeId::SaveLeftCaptureGroup:
s_opcodes[i] = make<OpCode_SaveLeftCaptureGroup>();
break;
case OpCodeId::SaveRightCaptureGroup:
s_opcodes[i] = make<OpCode_SaveRightCaptureGroup>();
break;
- case OpCodeId::SaveLeftNamedCaptureGroup:
- s_opcodes[i] = make<OpCode_SaveLeftNamedCaptureGroup>();
- break;
case OpCodeId::SaveRightNamedCaptureGroup:
s_opcodes[i] = make<OpCode_SaveRightNamedCaptureGroup>();
break;
@@ -378,52 +372,26 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
return ExecutionResult::Continue;
}
-ALWAYS_INLINE ExecutionResult OpCode_ClearNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
-{
- if (input.match_index < state.capture_group_matches.size()) {
- auto& group = state.named_capture_group_matches[input.match_index];
- if (auto it = group.find(name()); it != group.end())
- it->value.reset();
- }
- return ExecutionResult::Continue;
-}
-
-ALWAYS_INLINE ExecutionResult OpCode_SaveLeftNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
-{
- if (input.match_index >= state.named_capture_group_matches.size()) {
- state.named_capture_group_matches.ensure_capacity(input.match_index);
- auto capacity = state.named_capture_group_matches.capacity();
- for (size_t i = state.named_capture_group_matches.size(); i <= capacity; ++i)
- state.named_capture_group_matches.empend();
- }
- state.named_capture_group_matches.at(input.match_index).ensure(name()).column = state.string_position;
- return ExecutionResult::Continue;
-}
-
ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
{
- StringView capture_group_name = name();
+ auto& match = state.capture_group_matches.at(input.match_index).at(id());
+ auto start_position = match.left_column;
+ if (state.string_position < start_position)
+ return ExecutionResult::Failed_ExecuteLowPrioForks;
+
+ auto length = state.string_position - start_position;
- if (state.named_capture_group_matches.at(input.match_index).contains(capture_group_name)) {
- auto start_position = state.named_capture_group_matches.at(input.match_index).ensure(capture_group_name).column;
- auto length = state.string_position - start_position;
+ if (start_position < match.column)
+ return ExecutionResult::Continue;
- auto& map = state.named_capture_group_matches.at(input.match_index);
+ VERIFY(start_position + length <= input.view.length());
- if constexpr (REGEX_DEBUG) {
- VERIFY(start_position + length <= input.view.length());
- dbgln("Save named capture group with name={} and content='{}'", capture_group_name, input.view.substring_view(start_position, length));
- }
+ auto view = input.view.substring_view(start_position, length);
- VERIFY(start_position + length <= input.view.length());
- auto view = input.view.substring_view(start_position, length);
- if (input.regex_options & AllFlags::StringCopyMatches) {
- map.set(capture_group_name, { view.to_string(), input.line, start_position, input.global_offset + start_position }); // create a copy of the original string
- } else {
- map.set(capture_group_name, { view, input.line, start_position, input.global_offset + start_position }); // take view to original string
- }
+ if (input.regex_options & AllFlags::StringCopyMatches) {
+ match = { view.to_string(), name(), input.line, start_position, input.global_offset + start_position }; // create a copy of the original string
} else {
- warnln("Didn't find corresponding capture group match for name={}, match_index={}", capture_group_name.to_string(), input.match_index);
+ match = { view, name(), input.line, start_position, input.global_offset + start_position }; // take view to original string
}
return ExecutionResult::Continue;
@@ -543,24 +511,6 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
if (!compare_string(input, state, str, had_zero_length_match))
return ExecutionResult::Failed_ExecuteLowPrioForks;
- } else if (compare_type == CharacterCompareType::NamedReference) {
- auto ptr = (char const*)m_bytecode->at(offset++);
- auto length = (size_t)m_bytecode->at(offset++);
- StringView name { ptr, length };
-
- auto group = state.named_capture_group_matches.at(input.match_index).get(name);
- if (!group.has_value())
- return ExecutionResult::Failed_ExecuteLowPrioForks;
-
- auto str = group.value().view;
-
- // We want to compare a string that is definitely longer than the available string
- if (input.view.length() < state.string_position + str.length())
- return ExecutionResult::Failed_ExecuteLowPrioForks;
-
- if (!compare_string(input, state, str, had_zero_length_match))
- return ExecutionResult::Failed_ExecuteLowPrioForks;
-
} else if (compare_type == CharacterCompareType::Property) {
auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
compare_property(input, state, property, current_inversion_state(), inverse_matched);
@@ -869,10 +819,6 @@ Vector<String> const OpCode_Compare::variable_arguments_to_string(Optional<Match
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]));
}
}
- } else if (compare_type == CharacterCompareType::NamedReference) {
- auto ptr = (char const*)m_bytecode->at(offset++);
- auto length = m_bytecode->at(offset++);
- result.empend(String::formatted("name='{}'", StringView { ptr, (size_t)length }));
} else if (compare_type == CharacterCompareType::Reference) {
auto ref = m_bytecode->at(offset++);
result.empend(String::formatted("number={}", ref));
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h
index c1c7af235b..ab305785e2 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.h
+++ b/Userland/Libraries/LibRegex/RegexByteCode.h
@@ -32,7 +32,6 @@ using ByteCodeValueType = u64;
__ENUMERATE_OPCODE(FailForks) \
__ENUMERATE_OPCODE(SaveLeftCaptureGroup) \
__ENUMERATE_OPCODE(SaveRightCaptureGroup) \
- __ENUMERATE_OPCODE(SaveLeftNamedCaptureGroup) \
__ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \
__ENUMERATE_OPCODE(CheckBegin) \
__ENUMERATE_OPCODE(CheckEnd) \
@@ -41,7 +40,6 @@ using ByteCodeValueType = u64;
__ENUMERATE_OPCODE(Restore) \
__ENUMERATE_OPCODE(GoBack) \
__ENUMERATE_OPCODE(ClearCaptureGroup) \
- __ENUMERATE_OPCODE(ClearNamedCaptureGroup) \
__ENUMERATE_OPCODE(Exit)
// clang-format off
@@ -65,7 +63,6 @@ enum class OpCodeId : ByteCodeValueType {
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(NamedReference) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
@@ -159,7 +156,6 @@ public:
VERIFY(value.type != CharacterCompareType::RangeExpressionDummy);
VERIFY(value.type != CharacterCompareType::Undefined);
VERIFY(value.type != CharacterCompareType::String);
- VERIFY(value.type != CharacterCompareType::NamedReference);
arguments.append((ByteCodeValueType)value.type);
if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar && value.type != CharacterCompareType::TemporaryInverse)
@@ -187,13 +183,6 @@ public:
empend(index);
}
- void insert_bytecode_clear_named_capture_group(StringView name)
- {
- empend(static_cast<ByteCodeValueType>(OpCodeId::ClearNamedCaptureGroup));
- empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
- empend(name.length());
- }
-
void insert_bytecode_compare_string(StringView view)
{
ByteCode bytecode;
@@ -212,49 +201,24 @@ public:
extend(move(bytecode));
}
- void insert_bytecode_compare_named_reference(StringView name)
- {
- ByteCode bytecode;
-
- bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
- bytecode.empend(static_cast<u64>(1)); // number of arguments
-
- ByteCode arguments;
-
- arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::NamedReference));
- arguments.empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
- arguments.empend(name.length());
-
- bytecode.empend(arguments.size()); // size of arguments
- bytecode.extend(move(arguments));
-
- extend(move(bytecode));
- }
-
void insert_bytecode_group_capture_left(size_t capture_groups_count)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup));
empend(capture_groups_count);
}
- void insert_bytecode_group_capture_left(StringView const& name)
- {
- empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftNamedCaptureGroup));
- empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
- empend(name.length());
- }
-
void insert_bytecode_group_capture_right(size_t capture_groups_count)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup));
empend(capture_groups_count);
}
- void insert_bytecode_group_capture_right(StringView const& name)
+ void insert_bytecode_group_capture_right(size_t capture_groups_count, StringView const& name)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
empend(name.length());
+ empend(capture_groups_count);
}
enum class LookAroundType {
@@ -655,19 +619,6 @@ public:
String const arguments_string() const override { return String::formatted("id={}", id()); }
};
-class OpCode_ClearNamedCaptureGroup final : public OpCode {
-public:
- ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
- ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearNamedCaptureGroup; }
- ALWAYS_INLINE size_t size() const override { return 3; }
- ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
- ALWAYS_INLINE size_t length() const { return argument(1); }
- String const arguments_string() const override
- {
- return String::formatted("name={}, length={}", name(), length());
- }
-};
-
class OpCode_SaveLeftCaptureGroup final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
@@ -686,26 +637,14 @@ public:
String const arguments_string() const override { return String::formatted("id={}", id()); }
};
-class OpCode_SaveLeftNamedCaptureGroup final : public OpCode {
-public:
- ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
- ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftNamedCaptureGroup; }
- ALWAYS_INLINE size_t size() const override { return 3; }
- ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
- ALWAYS_INLINE size_t length() const { return argument(1); }
- String const arguments_string() const override
- {
- return String::formatted("name={}, length={}", name(), length());
- }
-};
-
class OpCode_SaveRightNamedCaptureGroup final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; }
- ALWAYS_INLINE size_t size() const override { return 3; }
+ ALWAYS_INLINE size_t size() const override { return 4; }
ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
ALWAYS_INLINE size_t length() const { return argument(1); }
+ ALWAYS_INLINE size_t id() const { return argument(2); }
String const arguments_string() const override
{
return String::formatted("name={}, length={}", name(), length());
diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h
index 599e68451a..98c324b359 100644
--- a/Userland/Libraries/LibRegex/RegexMatch.h
+++ b/Userland/Libraries/LibRegex/RegexMatch.h
@@ -442,11 +442,20 @@ public:
}
Match(String const string_, size_t const line_, size_t const column_, size_t const global_offset_)
- : string(string_)
+ : string(move(string_))
, view(string.value().view())
, line(line_)
, column(column_)
, global_offset(global_offset_)
+ {
+ }
+
+ Match(RegexStringView const view_, StringView capture_group_name_, size_t const line_, size_t const column_, size_t const global_offset_)
+ : view(view_)
+ , capture_group_name(capture_group_name_)
+ , line(line_)
+ , column(column_)
+ , global_offset(global_offset_)
, left_column(column_)
{
}
@@ -454,6 +463,7 @@ public:
void reset()
{
view = view.typed_null_view();
+ capture_group_name.clear();
line = 0;
column = 0;
global_offset = 0;
@@ -461,6 +471,7 @@ public:
}
RegexStringView view { nullptr };
+ Optional<StringView> capture_group_name {};
size_t line { 0 };
size_t column { 0 };
size_t global_offset { 0 };
@@ -494,8 +505,6 @@ struct MatchState {
size_t fork_at_position { 0 };
Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches;
- Vector<HashMap<String, Match>> named_capture_group_matches;
- size_t recursion_level { 0 };
};
struct MatchOutput {
diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp
index c312ca3c81..0d41c9af42 100644
--- a/Userland/Libraries/LibRegex/RegexMatcher.cpp
+++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp
@@ -149,10 +149,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
if (c_match_preallocation_count) {
state.matches.ensure_capacity(c_match_preallocation_count);
state.capture_group_matches.ensure_capacity(c_match_preallocation_count);
- state.named_capture_group_matches.ensure_capacity(c_match_preallocation_count);
-
auto& capture_groups_count = m_pattern->parser_result.capture_groups_count;
- auto& named_capture_groups_count = m_pattern->parser_result.named_capture_groups_count;
for (size_t j = 0; j < c_match_preallocation_count; ++j) {
state.matches.empend();
@@ -160,9 +157,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
state.capture_group_matches.at(j).ensure_capacity(capture_groups_count);
for (size_t k = 0; k < capture_groups_count; ++k)
state.capture_group_matches.at(j).unchecked_append({});
-
- state.named_capture_group_matches.unchecked_append({});
- state.named_capture_group_matches.at(j).ensure_capacity(named_capture_groups_count);
}
}
@@ -315,15 +309,9 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
matches.template remove_all_matching([](auto& match) { return match.view.is_null(); });
}
- output_copy.named_capture_group_matches = state.named_capture_group_matches;
- // Make sure there are as many capture matches as there are actual matches.
- if (output_copy.named_capture_group_matches.size() < match_count)
- output_copy.named_capture_group_matches.resize(match_count);
-
output_copy.matches = state.matches;
} else {
output_copy.capture_group_matches.clear_with_capacity();
- output_copy.named_capture_group_matches.clear_with_capacity();
}
return {
@@ -331,7 +319,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
match_count,
move(output_copy.matches),
move(output_copy.capture_group_matches),
- move(output_copy.named_capture_group_matches),
output.operations,
m_pattern->parser_result.capture_groups_count,
m_pattern->parser_result.named_capture_groups_count,
@@ -399,9 +386,8 @@ private:
template<class Parser>
Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output) const
{
- state.recursion_level = 0;
-
BumpAllocatedLinkedList<MatchState> states_to_try_next;
+ size_t recursion_level = 0;
auto& bytecode = m_pattern->parser_result.bytecode;
@@ -410,7 +396,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
auto& opcode = bytecode.get_opcode(state);
#if REGEX_DEBUG
- s_regex_dbg.print_opcode("VM", opcode, state, state.recursion_level, false);
+ s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false);
#endif
ExecutionResult result;
@@ -435,7 +421,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
case ExecutionResult::Fork_PrioHigh:
states_to_try_next.append(state);
state.instruction_position = state.fork_at_position;
- ++state.recursion_level;
+ ++recursion_level;
continue;
case ExecutionResult::Continue:
continue;
@@ -454,7 +440,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
return false;
}
state = states_to_try_next.take_last();
- ++state.recursion_level;
+ ++recursion_level;
continue;
}
}
diff --git a/Userland/Libraries/LibRegex/RegexMatcher.h b/Userland/Libraries/LibRegex/RegexMatcher.h
index 8e1ce64e6d..4a1e64b8f4 100644
--- a/Userland/Libraries/LibRegex/RegexMatcher.h
+++ b/Userland/Libraries/LibRegex/RegexMatcher.h
@@ -31,7 +31,6 @@ struct RegexResult final {
size_t count { 0 };
Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches;
- Vector<HashMap<String, Match>> named_capture_group_matches;
size_t n_operations { 0 };
size_t n_capture_groups { 0 };
size_t n_named_capture_groups { 0 };
diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp
index 84768cf121..db08d836fc 100644
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@@ -150,7 +150,6 @@ ALWAYS_INLINE void Parser::reset()
m_parser_state.capture_group_minimum_lengths.clear();
m_parser_state.capture_groups_count = 0;
m_parser_state.named_capture_groups_count = 0;
- m_parser_state.named_capture_group_minimum_lengths.clear();
m_parser_state.named_capture_groups.clear();
}
@@ -780,12 +779,8 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
}
}
- if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
- if (capture_group_name.has_value())
- bytecode.insert_bytecode_group_capture_left(capture_group_name.value());
- else
- bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count);
- }
+ if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group))
+ bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count);
ByteCode capture_group_bytecode;
@@ -814,12 +809,12 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
if (capture_group_name.has_value()) {
- bytecode.insert_bytecode_group_capture_right(capture_group_name.value());
+ bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count, capture_group_name.value());
++m_parser_state.named_capture_groups_count;
} else {
bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count);
- ++m_parser_state.capture_groups_count;
}
+ ++m_parser_state.capture_groups_count;
}
should_parse_repetition_symbol = true;
break;
@@ -1564,14 +1559,14 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
- auto maybe_length = m_parser_state.named_capture_group_minimum_lengths.get(name);
- if (!maybe_length.has_value()) {
+ auto maybe_capture_group = m_parser_state.named_capture_groups.get(name);
+ if (!maybe_capture_group.has_value()) {
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
- match_length_minimum += maybe_length.value();
+ match_length_minimum += maybe_capture_group->minimum_length;
- stack.insert_bytecode_compare_named_reference(name);
+ stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group->group_index } });
return true;
}
@@ -2121,15 +2116,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
m_capture_groups_in_scope.last().empend(identifier);
};
auto clear_all_capture_groups_in_scope = [&] {
- for (auto& entry : m_capture_groups_in_scope.last()) {
- entry.visit(
- [&](size_t index) {
- stack.insert_bytecode_clear_capture_group(index);
- },
- [&](String const& name) {
- stack.insert_bytecode_clear_named_capture_group(name);
- });
- }
+ for (auto& index : m_capture_groups_in_scope.last())
+ stack.insert_bytecode_clear_capture_group(index);
};
if (match(TokenType::Questionmark)) {
@@ -2172,21 +2160,18 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
clear_all_capture_groups_in_scope();
exit_capture_group_scope();
- register_capture_group_in_current_scope(name);
register_capture_group_in_current_scope(group_index);
consume(TokenType::RightParen, Error::MismatchingParen);
- stack.insert_bytecode_group_capture_left(name);
stack.insert_bytecode_group_capture_left(group_index);
stack.extend(move(capture_group_bytecode));
- stack.insert_bytecode_group_capture_right(name);
- stack.insert_bytecode_group_capture_right(group_index);
+ stack.insert_bytecode_group_capture_right(group_index, name);
match_length_minimum += length;
- m_parser_state.named_capture_group_minimum_lengths.set(name, length);
m_parser_state.capture_group_minimum_lengths.set(group_index, length);
+ m_parser_state.named_capture_groups.set(name, { group_index, length });
return true;
}
diff --git a/Userland/Libraries/LibRegex/RegexParser.h b/Userland/Libraries/LibRegex/RegexParser.h
index 25a9677189..c1f54be69c 100644
--- a/Userland/Libraries/LibRegex/RegexParser.h
+++ b/Userland/Libraries/LibRegex/RegexParser.h
@@ -88,6 +88,11 @@ protected:
ALWAYS_INLINE bool done() const;
ALWAYS_INLINE bool set_error(Error error);
+ struct NamedCaptureGroup {
+ size_t group_index { 0 };
+ size_t minimum_length { 0 };
+ };
+
struct ParserState {
Lexer& lexer;
Token current_token;
@@ -99,8 +104,7 @@ protected:
size_t match_length_minimum { 0 };
AllOptions regex_options;
HashMap<int, size_t> capture_group_minimum_lengths;
- HashMap<FlyString, size_t> named_capture_group_minimum_lengths;
- HashMap<size_t, FlyString> named_capture_groups;
+ HashMap<FlyString, NamedCaptureGroup> named_capture_groups;
explicit ParserState(Lexer& lexer)
: lexer(lexer)
@@ -258,8 +262,7 @@ private:
// ECMA-262 basically requires that we clear the inner captures of a capture group before trying to match it,
// by requiring that (...)+ only contain the matches for the last iteration.
// To do that, we have to keep track of which capture groups are "in scope", so we can clear them as needed.
- using CaptureGroup = Variant<size_t, String>;
- Vector<Vector<CaptureGroup>> m_capture_groups_in_scope;
+ Vector<Vector<size_t>> m_capture_groups_in_scope;
};
using PosixExtended = PosixExtendedParser;