diff options
author | Sergey Bugaev <bugaevc@gmail.com> | 2019-08-28 00:57:15 +0300 |
---|---|---|
committer | Andreas Kling <awesomekling@gmail.com> | 2019-08-28 13:46:02 +0200 |
commit | 5d3696174be8ab88ddad8397753b307a3a3ef94d (patch) | |
tree | 20c05b1bb06a3649c0e68193422240d133d90545 /AK/Tests | |
parent | 970e0147f7f73ada908563be71159fdaba09f7f8 (diff) | |
download | serenity-5d3696174be8ab88ddad8397753b307a3a3ef94d.zip |
AK: Add a Utf8View type for iterating over UTF-8 codepoints
Utf8View wraps a StringView and implements begin() and end() that
return a Utf8CodepointIterator, which parses UTF-8-encoded Unicode
codepoints and returns them as 32-bit integers.
This is the first step towards supporting emojis in Serenity ^)
https://github.com/SerenityOS/serenity/issues/490
Diffstat (limited to 'AK/Tests')
-rw-r--r-- | AK/Tests/Makefile | 6 | ||||
-rw-r--r-- | AK/Tests/TestUtf8.cpp | 58 |
2 files changed, 63 insertions, 1 deletions
diff --git a/AK/Tests/Makefile b/AK/Tests/Makefile index a87819f174..a20db0fbf8 100644 --- a/AK/Tests/Makefile +++ b/AK/Tests/Makefile @@ -1,4 +1,4 @@ -PROGRAMS = TestString TestQueue TestVector TestHashMap TestJSON TestWeakPtr TestNonnullRefPtr TestRefPtr TestFixedArray TestFileSystemPath TestURL TestStringView +PROGRAMS = TestString TestQueue TestVector TestHashMap TestJSON TestWeakPtr TestNonnullRefPtr TestRefPtr TestFixedArray TestFileSystemPath TestURL TestStringView TestUtf8 CXXFLAGS = -std=c++17 -Wall -Wextra -ggdb3 -O2 -I../ -I../../ @@ -14,6 +14,7 @@ SHARED_TEST_OBJS = \ ../JsonParser.o \ ../FileSystemPath.o \ ../URL.o \ + ../Utf8View.o \ .cpp.o: @echo "HOST_CXX $<"; $(PRE_CXX) $(CXX) $(CXXFLAGS) -o $@ -c $< @@ -65,6 +66,9 @@ TestURL: TestURL.o $(SHARED_TEST_OBJS) TestStringView: TestStringView.o $(SHARED_TEST_OBJS) $(PRE_CXX) $(CXX) $(CXXFLAGS) -o $@ TestStringView.o $(SHARED_TEST_OBJS) +TestUtf8: TestUtf8.o $(SHARED_TEST_OBJS) + $(PRE_CXX) $(CXX) $(CXXFLAGS) -o $@ TestUtf8.o $(SHARED_TEST_OBJS) + clean: rm -f $(SHARED_TEST_OBJS) rm -f $(PROGRAMS) diff --git a/AK/Tests/TestUtf8.cpp b/AK/Tests/TestUtf8.cpp new file mode 100644 index 0000000000..8107ba9bac --- /dev/null +++ b/AK/Tests/TestUtf8.cpp @@ -0,0 +1,58 @@ +#include <AK/TestSuite.h> + +#include <AK/Utf8View.h> + +TEST_CASE(decode_ascii) +{ + Utf8View utf8 { "Hello World!11" }; + EXPECT(utf8.validate()); + + u32 expected[] = { 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 49, 49 }; + size_t expected_size = sizeof(expected) / sizeof(expected[0]); + + size_t i = 0; + for (u32 codepoint : utf8) { + ASSERT(i < expected_size); + EXPECT_EQ(codepoint, expected[i]); + i++; + } + EXPECT_EQ(i, expected_size); +} + +TEST_CASE(decode_utf8) +{ + Utf8View utf8 { "Привет, мир! 😀 γειά σου κόσμος こんにちは世界" }; + EXPECT(utf8.validate()); + + u32 expected[] = { 1055, 1088, 1080, 1074, 1077, 1090, 44, 32, 1084, 1080, 1088, 33, 32, 128512, 32, 947, 949, 953, 940, 32, 963, 959, 965, 32, 954, 972, 963, 956, 959, 962, 32, 12371, 12435, 12395, 12385, 12399, 19990, 30028 }; + size_t expected_size = sizeof(expected) / sizeof(expected[0]); + + size_t i = 0; + for (u32 codepoint : utf8) { + ASSERT(i < expected_size); + EXPECT_EQ(codepoint, expected[i]); + i++; + } + EXPECT_EQ(i, expected_size); +} + +TEST_CASE(validate_invalid_ut8) +{ + char invalid_utf8_1[] = { 42, 35, (char)182, 9, 0 }; + Utf8View utf8_1 { invalid_utf8_1 }; + EXPECT(!utf8_1.validate()); + + char invalid_utf8_2[] = { 42, 35, (char)208, (char)208, 0 }; + Utf8View utf8_2 { invalid_utf8_2 }; + EXPECT(!utf8_2.validate()); + + char invalid_utf8_3[] = { (char)208, 0 }; + Utf8View utf8_3 { invalid_utf8_3 }; + EXPECT(!utf8_3.validate()); + + char invalid_utf8_4[] = { (char)208, 35, 0 }; + Utf8View utf8_4 { invalid_utf8_4 }; + EXPECT(!utf8_4.validate()); +} + +TEST_MAIN(UTF8) |