diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2021-10-03 19:01:25 +0330 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-10-03 19:16:36 +0200 |
commit | 8f722302d9c038ae9c307bc553e55b8dbbcff7ed (patch) | |
tree | 0fa7cafcc822db448c6ffbe422d824f17a8e4989 /Userland/Libraries/LibRegex/RegexByteCode.h | |
parent | 478b36c37b887d6d33915738111e9063778ceab2 (diff) | |
download | serenity-8f722302d9c038ae9c307bc553e55b8dbbcff7ed.zip |
LibRegex: Use a match table for character classes
Generate a sorted, compressed series of ranges in a match table for
character classes, and use a binary search to find the matches.
This is about a 3-4x speedup for character class match performance. :^)
Diffstat (limited to 'Userland/Libraries/LibRegex/RegexByteCode.h')
-rw-r--r-- | Userland/Libraries/LibRegex/RegexByteCode.h | 52 |
1 files changed, 17 insertions, 35 deletions
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.h b/Userland/Libraries/LibRegex/RegexByteCode.h index cea60b9fcf..950f668645 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.h +++ b/Userland/Libraries/LibRegex/RegexByteCode.h @@ -61,21 +61,22 @@ enum class OpCodeId : ByteCodeValueType { }; // clang-format on -#define ENUMERATE_CHARACTER_COMPARE_TYPES \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(TemporaryInverse) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(Char) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(String) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(ScriptExtension) \ - __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy) +#define ENUMERATE_CHARACTER_COMPARE_TYPES \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(TemporaryInverse) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(Char) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(String) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(ScriptExtension) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy) \ + __ENUMERATE_CHARACTER_COMPARE_TYPE(LookupTable) enum class CharacterCompareType : ByteCodeValueType { #define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x, @@ -186,26 +187,7 @@ public: void insert_bytecode_compare_values(Vector<CompareTypeAndValuePair>&& pairs) { - ByteCode bytecode; - - bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare)); - bytecode.empend(pairs.size()); // number of arguments - - ByteCode arguments; - for (auto& value : pairs) { - VERIFY(value.type != CharacterCompareType::RangeExpressionDummy); - VERIFY(value.type != CharacterCompareType::Undefined); - VERIFY(value.type != CharacterCompareType::String); - - arguments.append((ByteCodeValueType)value.type); - if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar && value.type != CharacterCompareType::TemporaryInverse) - arguments.append(move(value.value)); - } - - bytecode.empend(arguments.size()); // size of arguments - bytecode.extend(move(arguments)); - - extend(move(bytecode)); + Optimizer::append_character_class(*this, move(pairs)); } void insert_bytecode_check_boundary(BoundaryCheckType type) |