summaryrefslogtreecommitdiff
path: root/Tests/LibUnicode
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-08-10 15:00:05 -0400
committerAndreas Kling <kling@serenityos.org>2021-08-11 13:11:01 +0200
commit47bb350ebde09a11a594a605d3faea4167392048 (patch)
tree4aebf5505c037dc1500d4b50a5efa2f4eced949a /Tests/LibUnicode
parente6e462249fbc000d78eee27a2ed809d483ea9433 (diff)
downloadserenity-47bb350ebde09a11a594a605d3faea4167392048.zip
LibUnicode: Generate separate tables for scripts and script extensions
Notice that unlike the note in populate_general_category_unions(), script extension do indeed have code point ranges which overlap. Thus, this commit adds code to handle that, and hooks it into the GC unions.
Diffstat (limited to 'Tests/LibUnicode')
-rw-r--r--Tests/LibUnicode/TestUnicodeCharacterTypes.cpp90
1 files changed, 90 insertions, 0 deletions
diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
index 24c9e0c881..8afbbdc5b4 100644
--- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
+++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
@@ -375,3 +375,93 @@ TEST_CASE(property)
EXPECT(!Unicode::code_point_has_property(code_point, property_white_space));
}
}
+
+TEST_CASE(script)
+{
+ auto script = [](StringView name) {
+ auto script = Unicode::script_from_string(name);
+ VERIFY(script.has_value());
+ return *script;
+ };
+
+ auto script_latin = script("Latin"sv);
+ auto script_latn = script("Latn"sv);
+ EXPECT_EQ(script_latin, script_latn);
+
+ auto script_cyrillic = script("Cyrillic"sv);
+ auto script_cyrl = script("Cyrl"sv);
+ EXPECT_EQ(script_cyrillic, script_cyrl);
+
+ auto script_greek = script("Greek"sv);
+ auto script_grek = script("Grek"sv);
+ EXPECT_EQ(script_greek, script_grek);
+
+ for (u32 code_point = 0x41; code_point <= 0x5a; ++code_point) {
+ EXPECT(Unicode::code_point_has_script(code_point, script_latin));
+ EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
+
+ EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
+ EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
+ }
+
+ for (u32 code_point = 0x61; code_point <= 0x7a; ++code_point) {
+ EXPECT(Unicode::code_point_has_script(code_point, script_latin));
+ EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
+
+ EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
+ EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
+ }
+
+ for (u32 code_point = 0x400; code_point <= 0x481; ++code_point) {
+ EXPECT(Unicode::code_point_has_script(code_point, script_cyrillic));
+ EXPECT(Unicode::code_point_has_script_extension(code_point, script_cyrillic));
+
+ EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
+ EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
+ }
+
+ for (u32 code_point = 0x400; code_point <= 0x481; ++code_point) {
+ EXPECT(Unicode::code_point_has_script(code_point, script_cyrillic));
+ EXPECT(Unicode::code_point_has_script_extension(code_point, script_cyrillic));
+
+ EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
+ EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
+ }
+
+ for (u32 code_point = 0x1f80; code_point <= 0x1fb4; ++code_point) {
+ EXPECT(Unicode::code_point_has_script(code_point, script_greek));
+ EXPECT(Unicode::code_point_has_script_extension(code_point, script_greek));
+
+ EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
+ EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
+ }
+}
+
+TEST_CASE(script_extension)
+{
+ auto script = [](StringView name) {
+ auto script = Unicode::script_from_string(name);
+ VERIFY(script.has_value());
+ return *script;
+ };
+
+ auto script_latin = script("Latin"sv);
+ auto script_greek = script("Greek"sv);
+
+ for (u32 code_point = 0x363; code_point <= 0x36f; ++code_point) {
+ EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
+ EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
+ }
+
+ EXPECT(!Unicode::code_point_has_script(0x342, script_greek));
+ EXPECT(Unicode::code_point_has_script_extension(0x342, script_greek));
+
+ EXPECT(!Unicode::code_point_has_script(0x345, script_greek));
+ EXPECT(Unicode::code_point_has_script_extension(0x345, script_greek));
+
+ EXPECT(!Unicode::code_point_has_script(0x1dc0, script_greek));
+ EXPECT(Unicode::code_point_has_script_extension(0x1dc0, script_greek));
+
+ EXPECT(!Unicode::code_point_has_script(0x1dc1, script_greek));
+ EXPECT(Unicode::code_point_has_script_extension(0x1dc1, script_greek));
+}