summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-07-27 17:47:09 -0400
committerAndreas Kling <kling@serenityos.org>2021-07-28 23:42:29 +0200
commitc45a014645649380e4f0928fba52a07cc1a147a8 (patch)
tree421188970149a9a6d8783030703d1520b7f4e50d
parent38adfd8874354a077f359de5fa272dd56bc78984 (diff)
downloadserenity-c45a014645649380e4f0928fba52a07cc1a147a8.zip
LibUnicode: Check property list when deciding if a code point is cased
-rw-r--r--Tests/LibUnicode/TestUnicodeCharacterTypes.cpp8
-rw-r--r--Userland/Libraries/LibUnicode/CharacterTypes.cpp18
2 files changed, 23 insertions, 3 deletions
diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
index 7badcf9e86..ce57b67ac7 100644
--- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
+++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
@@ -110,6 +110,14 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
result = Unicode::to_unicode_lowercase_full("A\u03A3"sv);
EXPECT_EQ(result, "a\u03C2");
+ // Sigma preceded by FEMININE ORDINAL INDICATOR
+ result = Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv);
+ EXPECT_EQ(result, "\u00AA\u03C2");
+
+ // Sigma preceded by ROMAN NUMERAL ONE
+ result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
+ EXPECT_EQ(result, "\u2170\u03C2");
+
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
EXPECT_EQ(result, "a\u180E\u03C2");
diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
index 768f2b874d..b68877b46c 100644
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@@ -23,18 +23,30 @@ namespace Unicode {
#if ENABLE_UNICODE_DATA
+static bool has_property(UnicodeData const& unicode_data, Property property)
+{
+ for (u32 i = 0; i < unicode_data.prop_list_size; ++i) {
+ if (unicode_data.prop_list[i] == property)
+ return true;
+ }
+
+ return false;
+}
+
static bool is_cased_letter(UnicodeData const& unicode_data)
{
// A character C is defined to be cased if and only if C has the Lowercase or Uppercase property
// or has a General_Category value of Titlecase_Letter.
switch (unicode_data.general_category) {
- case GeneralCategory::Ll: // FIXME: Should be Ll + Other_Lowercase (PropList.txt).
- case GeneralCategory::Lu: // FIXME: Should be Lu + Other_Uppercase (PropList.txt).
+ case GeneralCategory::Ll:
+ case GeneralCategory::Lu:
case GeneralCategory::Lt:
return true;
default:
- return false;
+ break;
}
+
+ return has_property(unicode_data, Property::OtherLowercase) || has_property(unicode_data, Property::OtherUppercase);
}
static bool is_case_ignorable(UnicodeData const& unicode_data)