diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-07-27 17:47:09 -0400 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-07-28 23:42:29 +0200 |
commit | c45a014645649380e4f0928fba52a07cc1a147a8 (patch) | |
tree | 421188970149a9a6d8783030703d1520b7f4e50d | |
parent | 38adfd8874354a077f359de5fa272dd56bc78984 (diff) | |
download | serenity-c45a014645649380e4f0928fba52a07cc1a147a8.zip |
LibUnicode: Check property list when deciding if a code point is cased
-rw-r--r-- | Tests/LibUnicode/TestUnicodeCharacterTypes.cpp | 8 | ||||
-rw-r--r-- | Userland/Libraries/LibUnicode/CharacterTypes.cpp | 18 |
2 files changed, 23 insertions, 3 deletions
diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index 7badcf9e86..ce57b67ac7 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -110,6 +110,14 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma) result = Unicode::to_unicode_lowercase_full("A\u03A3"sv); EXPECT_EQ(result, "a\u03C2"); + // Sigma preceded by FEMININE ORDINAL INDICATOR + result = Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv); + EXPECT_EQ(result, "\u00AA\u03C2"); + + // Sigma preceded by ROMAN NUMERAL ONE + result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv); + EXPECT_EQ(result, "\u2170\u03C2"); + // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv); EXPECT_EQ(result, "a\u180E\u03C2"); diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index 768f2b874d..b68877b46c 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -23,18 +23,30 @@ namespace Unicode { #if ENABLE_UNICODE_DATA +static bool has_property(UnicodeData const& unicode_data, Property property) +{ + for (u32 i = 0; i < unicode_data.prop_list_size; ++i) { + if (unicode_data.prop_list[i] == property) + return true; + } + + return false; +} + static bool is_cased_letter(UnicodeData const& unicode_data) { // A character C is defined to be cased if and only if C has the Lowercase or Uppercase property // or has a General_Category value of Titlecase_Letter. switch (unicode_data.general_category) { - case GeneralCategory::Ll: // FIXME: Should be Ll + Other_Lowercase (PropList.txt). - case GeneralCategory::Lu: // FIXME: Should be Lu + Other_Uppercase (PropList.txt). + case GeneralCategory::Ll: + case GeneralCategory::Lu: case GeneralCategory::Lt: return true; default: - return false; + break; } + + return has_property(unicode_data, Property::OtherLowercase) || has_property(unicode_data, Property::OtherUppercase); } static bool is_case_ignorable(UnicodeData const& unicode_data) |