2 files changed, 62 insertions, 0 deletions
diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
index eff0b89286..18cff2bdcb 100644
--- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
+++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
@@ -257,6 +257,37 @@ TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
     EXPECT_EQ(result, "\u03A9\u0342\u0399");
 }
 
+TEST_CASE(to_unicode_uppercase_special_casing_soft_dotted)
+{
+    // LATIN SMALL LETTER I
+    auto result = Unicode::to_unicode_uppercase_full("i"sv, "en"sv);
+    EXPECT_EQ(result, "I"sv);
+
+    result = Unicode::to_unicode_uppercase_full("i"sv, "lt"sv);
+    EXPECT_EQ(result, "I"sv);
+
+    // LATIN SMALL LETTER J
+    result = Unicode::to_unicode_uppercase_full("j"sv, "en"sv);
+    EXPECT_EQ(result, "J"sv);
+
+    result = Unicode::to_unicode_uppercase_full("j"sv, "lt"sv);
+    EXPECT_EQ(result, "J"sv);
+
+    // LATIN SMALL LETTER I followed by COMBINING DOT ABOVE
+    result = Unicode::to_unicode_uppercase_full("i\u0307"sv, "en"sv);
+    EXPECT_EQ(result, "I\u0307"sv);
+
+    result = Unicode::to_unicode_uppercase_full("i\u0307"sv, "lt"sv);
+    EXPECT_EQ(result, "I"sv);
+
+    // LATIN SMALL LETTER J followed by COMBINING DOT ABOVE
+    result = Unicode::to_unicode_uppercase_full("j\u0307"sv, "en"sv);
+    EXPECT_EQ(result, "J\u0307"sv);
+
+    result = Unicode::to_unicode_uppercase_full("j\u0307"sv, "lt"sv);
+    EXPECT_EQ(result, "J"sv);
+}
+
 TEST_CASE(general_category)
 {
     auto general_category = [](StringView name) {
diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
index b566416c08..b6f3555632 100644
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@@ -49,6 +49,32 @@ static bool is_after_uppercase_i(Utf8View const& string, size_t index)
     return found_uppercase_i;
 }
 
+static bool is_after_soft_dotted_code_point(Utf8View const& string, size_t index)
+{
+    // There is a Soft_Dotted character before C, with no intervening character of combining class 0 or 230 (Above).
+    auto preceding_view = string.substring_view(0, index);
+    bool found_soft_dotted_code_point = false;
+
+    // FIXME: Would be better if Utf8View supported reverse iteration.
+    for (auto code_point : preceding_view) {
+        if (code_point_has_property(code_point, Property::Soft_Dotted)) {
+            found_soft_dotted_code_point = true;
+            continue;
+        }
+
+        auto unicode_data = Detail::unicode_data_for_code_point(code_point);
+        if (!unicode_data.has_value())
+            return false;
+
+        if (unicode_data->canonical_combining_class == 0)
+            found_soft_dotted_code_point = false;
+        else if (unicode_data->canonical_combining_class == 230)
+            found_soft_dotted_code_point = false;
+    }
+
+    return found_soft_dotted_code_point;
+}
+
 static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)
 {
     // C is preceded by a sequence consisting of a cased letter and then zero or more case-ignorable
@@ -113,6 +139,11 @@ static SpecialCasing const* find_matching_special_case(Utf8View const& string, O
                 return special_casing;
             break;
 
+        case Condition::AfterSoftDotted:
+            if (is_after_soft_dotted_code_point(string, index))
+                return special_casing;
+            break;
+
         case Condition::FinalSigma:
             if (is_final_code_point(string, index, byte_length))
                 return special_casing;