diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-07-31 17:46:05 -0400 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2021-08-02 21:02:09 +0430 |
commit | 1e10d6d7ce70dafd638747fadc6874a47d0448db (patch) | |
tree | ca46edbf4c9e18c6a600bc7649f59a2da32b5327 /Userland/Libraries/LibRegex/RegexByteCode.cpp | |
parent | 5de6d3dd9050fca8c68851d125efb4737aba18cd (diff) | |
download | serenity-1e10d6d7ce70dafd638747fadc6874a47d0448db.zip |
LibRegex: Support property escapes of Unicode General Categories
This changes LibRegex to parse the property escape as a Variant of
Unicode Property & General Category values. A byte code instruction is
added to perform matching based on General Category values.
Diffstat (limited to 'Userland/Libraries/LibRegex/RegexByteCode.cpp')
-rw-r--r-- | Userland/Libraries/LibRegex/RegexByteCode.cpp | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index 329afb1456..20a65b3d0c 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -537,6 +537,10 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++)); compare_property(input, state, property, current_inversion_state(), inverse_matched); + } else if (compare_type == CharacterCompareType::GeneralCategory) { + auto general_category = static_cast<Unicode::GeneralCategory>(m_bytecode->at(offset++)); + compare_general_category(input, state, general_category, current_inversion_state(), inverse_matched); + } else { warnln("Undefined comparison: {}", (int)compare_type); VERIFY_NOT_REACHED(); @@ -742,6 +746,22 @@ ALWAYS_INLINE void OpCode_Compare::compare_property(MatchInput const& input, Mat } } +ALWAYS_INLINE void OpCode_Compare::compare_general_category(MatchInput const& input, MatchState& state, Unicode::GeneralCategory general_category, bool inverse, bool& inverse_matched) +{ + if (state.string_position == input.view.length()) + return; + + u32 code_point = input.view[state.string_position]; + bool equal = Unicode::code_point_has_general_category(code_point, general_category); + + if (equal) { + if (inverse) + inverse_matched = true; + else + ++state.string_position; + } +} + String const OpCode_Compare::arguments_string() const { return String::formatted("argc={}, args={} ", arguments_count(), arguments_size()); |