diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-08-10 15:00:05 -0400 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-08-11 13:11:01 +0200 |
commit | 47bb350ebde09a11a594a605d3faea4167392048 (patch) | |
tree | 4aebf5505c037dc1500d4b50a5efa2f4eced949a /Tests/LibUnicode | |
parent | e6e462249fbc000d78eee27a2ed809d483ea9433 (diff) | |
download | serenity-47bb350ebde09a11a594a605d3faea4167392048.zip |
LibUnicode: Generate separate tables for scripts and script extensions
Notice that unlike the note in populate_general_category_unions(),
script extension do indeed have code point ranges which overlap. Thus,
this commit adds code to handle that, and hooks it into the GC unions.
Diffstat (limited to 'Tests/LibUnicode')
-rw-r--r-- | Tests/LibUnicode/TestUnicodeCharacterTypes.cpp | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index 24c9e0c881..8afbbdc5b4 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -375,3 +375,93 @@ TEST_CASE(property) EXPECT(!Unicode::code_point_has_property(code_point, property_white_space)); } } + +TEST_CASE(script) +{ + auto script = [](StringView name) { + auto script = Unicode::script_from_string(name); + VERIFY(script.has_value()); + return *script; + }; + + auto script_latin = script("Latin"sv); + auto script_latn = script("Latn"sv); + EXPECT_EQ(script_latin, script_latn); + + auto script_cyrillic = script("Cyrillic"sv); + auto script_cyrl = script("Cyrl"sv); + EXPECT_EQ(script_cyrillic, script_cyrl); + + auto script_greek = script("Greek"sv); + auto script_grek = script("Grek"sv); + EXPECT_EQ(script_greek, script_grek); + + for (u32 code_point = 0x41; code_point <= 0x5a; ++code_point) { + EXPECT(Unicode::code_point_has_script(code_point, script_latin)); + EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin)); + + EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic)); + EXPECT(!Unicode::code_point_has_script(code_point, script_greek)); + } + + for (u32 code_point = 0x61; code_point <= 0x7a; ++code_point) { + EXPECT(Unicode::code_point_has_script(code_point, script_latin)); + EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin)); + + EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic)); + EXPECT(!Unicode::code_point_has_script(code_point, script_greek)); + } + + for (u32 code_point = 0x400; code_point <= 0x481; ++code_point) { + EXPECT(Unicode::code_point_has_script(code_point, script_cyrillic)); + EXPECT(Unicode::code_point_has_script_extension(code_point, script_cyrillic)); + + EXPECT(!Unicode::code_point_has_script(code_point, script_latin)); + EXPECT(!Unicode::code_point_has_script(code_point, script_greek)); + } + + for (u32 code_point = 0x400; code_point <= 0x481; ++code_point) { + EXPECT(Unicode::code_point_has_script(code_point, script_cyrillic)); + EXPECT(Unicode::code_point_has_script_extension(code_point, script_cyrillic)); + + EXPECT(!Unicode::code_point_has_script(code_point, script_latin)); + EXPECT(!Unicode::code_point_has_script(code_point, script_greek)); + } + + for (u32 code_point = 0x1f80; code_point <= 0x1fb4; ++code_point) { + EXPECT(Unicode::code_point_has_script(code_point, script_greek)); + EXPECT(Unicode::code_point_has_script_extension(code_point, script_greek)); + + EXPECT(!Unicode::code_point_has_script(code_point, script_latin)); + EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic)); + } +} + +TEST_CASE(script_extension) +{ + auto script = [](StringView name) { + auto script = Unicode::script_from_string(name); + VERIFY(script.has_value()); + return *script; + }; + + auto script_latin = script("Latin"sv); + auto script_greek = script("Greek"sv); + + for (u32 code_point = 0x363; code_point <= 0x36f; ++code_point) { + EXPECT(!Unicode::code_point_has_script(code_point, script_latin)); + EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin)); + } + + EXPECT(!Unicode::code_point_has_script(0x342, script_greek)); + EXPECT(Unicode::code_point_has_script_extension(0x342, script_greek)); + + EXPECT(!Unicode::code_point_has_script(0x345, script_greek)); + EXPECT(Unicode::code_point_has_script_extension(0x345, script_greek)); + + EXPECT(!Unicode::code_point_has_script(0x1dc0, script_greek)); + EXPECT(Unicode::code_point_has_script_extension(0x1dc0, script_greek)); + + EXPECT(!Unicode::code_point_has_script(0x1dc1, script_greek)); + EXPECT(Unicode::code_point_has_script_extension(0x1dc1, script_greek)); +} |