summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibRegex/RegexByteCode.h
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2021-10-03 19:01:25 +0330
committerAndreas Kling <kling@serenityos.org>2021-10-03 19:16:36 +0200
commit8f722302d9c038ae9c307bc553e55b8dbbcff7ed (patch)
tree0fa7cafcc822db448c6ffbe422d824f17a8e4989 /Userland/Libraries/LibRegex/RegexByteCode.h
parent478b36c37b887d6d33915738111e9063778ceab2 (diff)
downloadserenity-8f722302d9c038ae9c307bc553e55b8dbbcff7ed.zip
LibRegex: Use a match table for character classes
Generate a sorted, compressed series of ranges in a match table for character classes, and use a binary search to find the matches. This is about a 3-4x speedup for character class match performance. :^)
Diffstat (limited to 'Userland/Libraries/LibRegex/RegexByteCode.h')
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.h52
1 files changed, 17 insertions, 35 deletions
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h
index cea60b9fcf..950f668645 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.h
+++ b/Userland/Libraries/LibRegex/RegexByteCode.h
@@ -61,21 +61,22 @@ enum class OpCodeId : ByteCodeValueType {
};
// clang-format on
-#define ENUMERATE_CHARACTER_COMPARE_TYPES \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(TemporaryInverse) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(Char) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(String) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(ScriptExtension) \
- __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy)
+#define ENUMERATE_CHARACTER_COMPARE_TYPES \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(TemporaryInverse) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(Char) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(String) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(ScriptExtension) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy) \
+ __ENUMERATE_CHARACTER_COMPARE_TYPE(LookupTable)
enum class CharacterCompareType : ByteCodeValueType {
#define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x,
@@ -186,26 +187,7 @@ public:
void insert_bytecode_compare_values(Vector<CompareTypeAndValuePair>&& pairs)
{
- ByteCode bytecode;
-
- bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
- bytecode.empend(pairs.size()); // number of arguments
-
- ByteCode arguments;
- for (auto& value : pairs) {
- VERIFY(value.type != CharacterCompareType::RangeExpressionDummy);
- VERIFY(value.type != CharacterCompareType::Undefined);
- VERIFY(value.type != CharacterCompareType::String);
-
- arguments.append((ByteCodeValueType)value.type);
- if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar && value.type != CharacterCompareType::TemporaryInverse)
- arguments.append(move(value.value));
- }
-
- bytecode.empend(arguments.size()); // size of arguments
- bytecode.extend(move(arguments));
-
- extend(move(bytecode));
+ Optimizer::append_character_class(*this, move(pairs));
}
void insert_bytecode_check_boundary(BoundaryCheckType type)