diff options
author | Nico Weber <thakis@chromium.org> | 2023-01-22 12:59:00 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2023-01-22 21:30:44 +0000 |
commit | 3423b54eb918eac7abf8a0be5a58a00088cec4ae (patch) | |
tree | 6c0a4889050a1db870f44134f6000dded7f5cad3 /Tests | |
parent | aa9037eed4c1af9f8c8555c552a4488e97f107db (diff) | |
download | serenity-3423b54eb918eac7abf8a0be5a58a00088cec4ae.zip |
LibTextCodec: Make utf-16be and utf-16le codecs actually work
There were two problems:
1. They didn't handle surrogates
2. They used signed chars, leading to eg 0x00e4 being treated as 0xffe4
Also add a basic test that catches both issues.
There's some code duplication with Utf16CodePointIterator::operator*(),
but let's get things working first.
Diffstat (limited to 'Tests')
-rw-r--r-- | Tests/LibTextCodec/TestTextDecoders.cpp | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/Tests/LibTextCodec/TestTextDecoders.cpp b/Tests/LibTextCodec/TestTextDecoders.cpp index c5964695a6..fe873bf7ff 100644 --- a/Tests/LibTextCodec/TestTextDecoders.cpp +++ b/Tests/LibTextCodec/TestTextDecoders.cpp @@ -23,3 +23,37 @@ TEST_CASE(test_utf8_decode) EXPECT(decoder.to_utf8(test_string) == test_string); } + +TEST_CASE(test_utf16be_decode) +{ + auto decoder = TextCodec::UTF16BEDecoder(); + // This is the output of `python3 -c "print('sรคk๐'.encode('utf-16be'))"`. + auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv; + + Vector<u32> processed_code_points; + decoder.process(test_string, [&](u32 code_point) { + processed_code_points.append(code_point); + }); + EXPECT(processed_code_points.size() == 4); + EXPECT(processed_code_points[0] == 0x73); + EXPECT(processed_code_points[1] == 0xE4); + EXPECT(processed_code_points[2] == 0x6B); + EXPECT(processed_code_points[3] == 0x1F600); +} + +TEST_CASE(test_utf16le_decode) +{ + auto decoder = TextCodec::UTF16LEDecoder(); + // This is the output of `python3 -c "print('sรคk๐'.encode('utf-16le'))"`. + auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv; + + Vector<u32> processed_code_points; + decoder.process(test_string, [&](u32 code_point) { + processed_code_points.append(code_point); + }); + EXPECT(processed_code_points.size() == 4); + EXPECT(processed_code_points[0] == 0x73); + EXPECT(processed_code_points[1] == 0xE4); + EXPECT(processed_code_points[2] == 0x6B); + EXPECT(processed_code_points[3] == 0x1F600); +} |