diff options
author | Timothy Flynn <trflynn89@pm.me> | 2023-01-16 11:22:01 -0500 |
---|---|---|
committer | Tim Flynn <trflynn89@pm.me> | 2023-01-16 18:33:44 -0500 |
commit | bc51017a03087057dc8e8f437b4049f2ab7ebba1 (patch) | |
tree | 043d1b7c361016bdd7fb86d1669252312fef261b /Tests | |
parent | b562348d316cbd3a646f33490809c356c591265d (diff) | |
download | serenity-bc51017a03087057dc8e8f437b4049f2ab7ebba1.zip |
LibUnicode: Support full case folding for titlecasing a string
Unicode declares that to titlecase a string, the first cased code point
after each word boundary should be transformed to its titlecase mapping.
All other codepoints are transformed to their lowercase mapping.
Diffstat (limited to 'Tests')
-rw-r--r-- | Tests/LibUnicode/TestUnicodeCharacterTypes.cpp | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index dd6f5c4654..971461ba8e 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -74,6 +74,27 @@ TEST_CASE(to_unicode_titlecase) EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "lj" to "Lj" EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "nj" to "Nj" EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "dz" to "Dz" + + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(""sv)), ""sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" "sv)), " "sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" - "sv)), " - "sv); + + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a"sv)), "A"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("A"sv)), "A"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" a"sv)), " A"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a "sv)), "A "sv); + + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab"sv)), "Ab"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("Ab"sv)), "Ab"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("aB"sv)), "Ab"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("AB"sv)), "Ab"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" ab"sv)), " Ab"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab "sv)), "Ab "sv); + + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo bar baz"sv)), "Foo Bar Baz"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo \n \r bar \t baz"sv)), "Foo \n \r Bar \t Baz"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("f\"oo\" b'ar'"sv)), "F\"Oo\" B'Ar'"sv); + EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("123dollars"sv)), "123Dollars"sv); } TEST_CASE(to_unicode_lowercase_unconditional_special_casing) @@ -382,6 +403,78 @@ TEST_CASE(to_unicode_uppercase_special_casing_soft_dotted) EXPECT_EQ(result, "J"sv); } +TEST_CASE(to_unicode_titlecase_unconditional_special_casing) +{ + // LATIN SMALL LETTER SHARP S + auto result = MUST(Unicode::to_unicode_titlecase_full("\u00DF"sv)); + EXPECT_EQ(result, "\u0053\u0073"sv); + + // LATIN CAPITAL LETTER I WITH DOT ABOVE + result = MUST(Unicode::to_unicode_titlecase_full("\u0130"sv)); + EXPECT_EQ(result, "\u0130"sv); + + // LATIN SMALL LIGATURE FF + result = MUST(Unicode::to_unicode_titlecase_full("\uFB00"sv)); + EXPECT_EQ(result, "\u0046\u0066"sv); + + // LATIN SMALL LIGATURE FI + result = MUST(Unicode::to_unicode_titlecase_full("\uFB01"sv)); + EXPECT_EQ(result, "\u0046\u0069"sv); + + // LATIN SMALL LIGATURE FL + result = MUST(Unicode::to_unicode_titlecase_full("\uFB02"sv)); + EXPECT_EQ(result, "\u0046\u006C"sv); + + // LATIN SMALL LIGATURE FFI + result = MUST(Unicode::to_unicode_titlecase_full("\uFB03"sv)); + EXPECT_EQ(result, "\u0046\u0066\u0069"sv); + + // LATIN SMALL LIGATURE FFL + result = MUST(Unicode::to_unicode_titlecase_full("\uFB04"sv)); + EXPECT_EQ(result, "\u0046\u0066\u006C"sv); + + // LATIN SMALL LIGATURE LONG S T + result = MUST(Unicode::to_unicode_titlecase_full("\uFB05"sv)); + EXPECT_EQ(result, "\u0053\u0074"sv); + + // LATIN SMALL LIGATURE ST + result = MUST(Unicode::to_unicode_titlecase_full("\uFB06"sv)); + EXPECT_EQ(result, "\u0053\u0074"sv); + + // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + result = MUST(Unicode::to_unicode_titlecase_full("\u0390"sv)); + EXPECT_EQ(result, "\u0399\u0308\u0301"sv); + + // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + result = MUST(Unicode::to_unicode_titlecase_full("\u03B0"sv)); + EXPECT_EQ(result, "\u03A5\u0308\u0301"sv); + + // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + result = MUST(Unicode::to_unicode_titlecase_full("\u1FB7"sv)); + EXPECT_EQ(result, "\u0391\u0342\u0345"sv); + + // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI + result = MUST(Unicode::to_unicode_titlecase_full("\u1FC7"sv)); + EXPECT_EQ(result, "\u0397\u0342\u0345"sv); + + // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + result = MUST(Unicode::to_unicode_titlecase_full("\u1FF7"sv)); + EXPECT_EQ(result, "\u03A9\u0342\u0345"sv); +} + +TEST_CASE(to_unicode_titlecase_special_casing_i) +{ + // LATIN SMALL LETTER I + auto result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "en"sv)); + EXPECT_EQ(result, "I"sv); + + result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "az"sv)); + EXPECT_EQ(result, "\u0130"sv); + + result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "tr"sv)); + EXPECT_EQ(result, "\u0130"sv); +} + TEST_CASE(general_category) { auto general_category = [](StringView name) { |