diff options
author | Timothy Flynn <trflynn89@pm.me> | 2023-01-17 08:34:38 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2023-01-18 14:43:40 +0000 |
commit | 8f2589b3b07470ebe5773c6e719ede169d894a93 (patch) | |
tree | b044d9ab374db0ff09d6357bca0c978822815ade /Tests | |
parent | 9226cf7272c57d57379a9bfdd1a9a6c6426818be (diff) | |
download | serenity-8f2589b3b07470ebe5773c6e719ede169d894a93.zip |
LibUnicode: Parse and generate case folding code point data
Case folding rules have a similar mapping style as special casing rules,
where one code point may map to zero or more case folding rules. These
will be used for case-insensitive string comparisons. To see how case
folding can differ from other casing rules, consider "ß" (U+00DF):
>>> "ß".lower()
'ß'
>>> "ß".upper()
'SS'
>>> "ß".title()
'Ss'
>>> "ß".casefold()
'ss'
Diffstat (limited to 'Tests')
-rw-r--r-- | Tests/LibUnicode/TestUnicodeCharacterTypes.cpp | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index 971461ba8e..cc13963e27 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -97,6 +97,33 @@ TEST_CASE(to_unicode_titlecase) EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("123dollars"sv)), "123Dollars"sv); } +TEST_CASE(to_unicode_casefold) +{ + for (u8 code_point = 0; code_point < 0x80; ++code_point) { + auto ascii = tolower(code_point); + auto unicode = MUST(Unicode::to_unicode_casefold_full({ reinterpret_cast<char const*>(&code_point), 1 })); + + EXPECT_EQ(unicode.bytes_as_string_view().length(), 1u); + EXPECT_EQ(unicode.bytes_as_string_view()[0], ascii); + } + + // LATIN SMALL LETTER SHARP S + auto result = MUST(Unicode::to_unicode_casefold_full("\u00DF"sv)); + EXPECT_EQ(result, "\u0073\u0073"sv); + + // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI + result = MUST(Unicode::to_unicode_casefold_full("\u1FB3"sv)); + EXPECT_EQ(result, "\u03B1\u03B9"sv); + + // GREEK SMALL LETTER ALPHA WITH PERISPOMENI + result = MUST(Unicode::to_unicode_casefold_full("\u1FB6"sv)); + EXPECT_EQ(result, "\u03B1\u0342"sv); + + // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + result = MUST(Unicode::to_unicode_casefold_full("\u1FB7"sv)); + EXPECT_EQ(result, "\u03B1\u0342\u03B9"sv); +} + TEST_CASE(to_unicode_lowercase_unconditional_special_casing) { // LATIN SMALL LETTER SHARP S |