summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibRegex/RegexByteCode.cpp
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-07-31 17:46:05 -0400
committerAli Mohammad Pur <Ali.mpfard@gmail.com>2021-08-02 21:02:09 +0430
commit1e10d6d7ce70dafd638747fadc6874a47d0448db (patch)
treeca46edbf4c9e18c6a600bc7649f59a2da32b5327 /Userland/Libraries/LibRegex/RegexByteCode.cpp
parent5de6d3dd9050fca8c68851d125efb4737aba18cd (diff)
downloadserenity-1e10d6d7ce70dafd638747fadc6874a47d0448db.zip
LibRegex: Support property escapes of Unicode General Categories
This changes LibRegex to parse the property escape as a Variant of Unicode Property & General Category values. A byte code instruction is added to perform matching based on General Category values.
Diffstat (limited to 'Userland/Libraries/LibRegex/RegexByteCode.cpp')
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.cpp20
1 files changed, 20 insertions, 0 deletions
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp
index 329afb1456..20a65b3d0c 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp
@@ -537,6 +537,10 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
compare_property(input, state, property, current_inversion_state(), inverse_matched);
+ } else if (compare_type == CharacterCompareType::GeneralCategory) {
+ auto general_category = static_cast<Unicode::GeneralCategory>(m_bytecode->at(offset++));
+ compare_general_category(input, state, general_category, current_inversion_state(), inverse_matched);
+
} else {
warnln("Undefined comparison: {}", (int)compare_type);
VERIFY_NOT_REACHED();
@@ -742,6 +746,22 @@ ALWAYS_INLINE void OpCode_Compare::compare_property(MatchInput const& input, Mat
}
}
+ALWAYS_INLINE void OpCode_Compare::compare_general_category(MatchInput const& input, MatchState& state, Unicode::GeneralCategory general_category, bool inverse, bool& inverse_matched)
+{
+ if (state.string_position == input.view.length())
+ return;
+
+ u32 code_point = input.view[state.string_position];
+ bool equal = Unicode::code_point_has_general_category(code_point, general_category);
+
+ if (equal) {
+ if (inverse)
+ inverse_matched = true;
+ else
+ ++state.string_position;
+ }
+}
+
String const OpCode_Compare::arguments_string() const
{
return String::formatted("argc={}, args={} ", arguments_count(), arguments_size());