summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibRegex
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2023-02-15 10:14:13 +0330
committerAndreas Kling <kling@serenityos.org>2023-02-15 10:14:26 +0100
commit7f530c075384a7d868736cec67bf6853586a7752 (patch)
tree3c8adef6c13e92b9dd369c12fc5825c9eeacb915 /Userland/Libraries/LibRegex
parentaf441bb939493671aa89db2f8f1ee6daf6868acc (diff)
downloadserenity-7f530c075384a7d868736cec67bf6853586a7752.zip
LibRegex: Bail out of atomic rewrite if a block doesn't contain compares
If a block jumps before performing a compare, we'd need to recursively find the first of the jumped-to block. While this is doable, it's not really worth spending the time as most such cases won't actually qualify for atomic loop rewrite anyway. Fixes an invalid rewrite when `.+` is followed by an alternation, e.g. /.+(a|b|c)/.
Diffstat (limited to 'Userland/Libraries/LibRegex')
-rw-r--r--Userland/Libraries/LibRegex/RegexOptimizer.cpp20
1 files changed, 19 insertions, 1 deletions
diff --git a/Userland/Libraries/LibRegex/RegexOptimizer.cpp b/Userland/Libraries/LibRegex/RegexOptimizer.cpp
index fbb55e4ac4..337e3d3529 100644
--- a/Userland/Libraries/LibRegex/RegexOptimizer.cpp
+++ b/Userland/Libraries/LibRegex/RegexOptimizer.cpp
@@ -303,10 +303,12 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
HashTable<size_t> active_capture_groups;
MatchState state;
+ auto has_seen_actionable_opcode = false;
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
auto& opcode = bytecode.get_opcode(state);
switch (opcode.opcode_id()) {
case OpCodeId::Compare: {
+ has_seen_actionable_opcode = true;
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
if (repeated_values.is_empty() && any_of(compares, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
return AtomicRewritePreconditionResult::NotSatisfied;
@@ -315,6 +317,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
}
case OpCodeId::CheckBegin:
case OpCodeId::CheckEnd:
+ has_seen_actionable_opcode = true;
if (repeated_values.is_empty())
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
break;
@@ -330,6 +333,13 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
case OpCodeId::SaveLeftCaptureGroup:
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
break;
+ case OpCodeId::ForkJump:
+ case OpCodeId::ForkReplaceJump:
+ case OpCodeId::JumpNonEmpty:
+ // We could attempt to recursively resolve the follow set, but pretending that this just goes nowhere is faster.
+ if (!has_seen_actionable_opcode)
+ return AtomicRewritePreconditionResult::NotSatisfied;
+ break;
default:
break;
}
@@ -375,6 +385,13 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
case OpCodeId::CheckBoundary:
// FIXME: What should we do with these? For now, consider them a failure.
return AtomicRewritePreconditionResult::NotSatisfied;
+ case OpCodeId::ForkJump:
+ case OpCodeId::ForkReplaceJump:
+ case OpCodeId::JumpNonEmpty:
+ // See note in the previous switch, same cases.
+ if (!following_block_has_at_least_one_compare)
+ return AtomicRewritePreconditionResult::NotSatisfied;
+ break;
default:
break;
}
@@ -484,7 +501,8 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
if (is_an_eligible_jump(opcode, state.instruction_position, forking_block.start, AlternateForm::DirectLoopWithoutHeader)) {
// We've found RE0 (and RE1 is just the following block, if any), let's see if the precondition applies.
// if RE1 is empty, there's no first(RE1), so this is an automatic pass.
- if (!fork_fallback_block.has_value() || fork_fallback_block->end == fork_fallback_block->start) {
+ if (!fork_fallback_block.has_value()
+ || (fork_fallback_block->end == fork_fallback_block->start && block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block) != AtomicRewritePreconditionResult::NotSatisfied)) {
candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader });
break;
}