diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-09-05 15:03:56 -0400 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-09-06 15:24:27 +0100 |
commit | 436faf9fd97c4d595bb1dc7a47585cc876b19da8 (patch) | |
tree | 0b4a9d972914c388344e5f72f219013d9e7641b4 /Userland/Libraries/LibUnicode | |
parent | 1427ebc622b775885ff45d2f2b684534c2b3350a (diff) | |
download | serenity-436faf9fd97c4d595bb1dc7a47585cc876b19da8.zip |
LibUnicode: Implement locale-aware MORE_ABOVE special casing
Diffstat (limited to 'Userland/Libraries/LibUnicode')
-rw-r--r-- | Userland/Libraries/LibUnicode/CharacterTypes.cpp | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index b6f3555632..347d2f0c04 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -115,6 +115,26 @@ static bool is_final_code_point(Utf8View const& string, size_t index, size_t byt return true; } +static bool is_followed_by_combining_class_above(Utf8View const& string, size_t index, size_t byte_length) +{ + // C is followed by a character of combining class 230 (Above) with no intervening character of combining class 0 or 230 (Above). + auto following_view = ((index + byte_length) < string.byte_length()) + ? string.substring_view(index + byte_length) + : Utf8View {}; + + for (auto code_point : following_view) { + auto unicode_data = Detail::unicode_data_for_code_point(code_point); + if (!unicode_data.has_value()) + return false; + if (unicode_data->canonical_combining_class == 0) + return false; + if (unicode_data->canonical_combining_class == 230) + return true; + } + + return false; +} + static SpecialCasing const* find_matching_special_case(Utf8View const& string, Optional<StringView> locale, size_t index, size_t byte_length, UnicodeData const& unicode_data) { auto requested_locale = Locale::None; @@ -149,6 +169,11 @@ static SpecialCasing const* find_matching_special_case(Utf8View const& string, O return special_casing; break; + case Condition::MoreAbove: + if (is_followed_by_combining_class_above(string, index, byte_length)) + return special_casing; + break; + default: break; } |