diff options
author | Sébastien Helleu <flashcode@flashtux.org> | 2022-11-26 16:38:25 +0100 |
---|---|---|
committer | Sébastien Helleu <flashcode@flashtux.org> | 2022-11-27 12:29:36 +0100 |
commit | bbd0f5d26bcbbeb7b88036f2a8b1f8a62cec4cf1 (patch) | |
tree | f88c9b4a679c1eb9dad8b510687dbf6e5d2bc0bc /tests | |
parent | e08fcdd04c8233e48b3b076975dce1aee9723878 (diff) | |
download | weechat-bbd0f5d26bcbbeb7b88036f2a8b1f8a62cec4cf1.zip |
tests: define constants with UTF-8 strings, add tests on functions returning size on screen
Diffstat (limited to 'tests')
-rw-r--r-- | tests/unit/core/test-core-utf8.cpp | 220 |
1 files changed, 150 insertions, 70 deletions
diff --git a/tests/unit/core/test-core-utf8.cpp b/tests/unit/core/test-core-utf8.cpp index 63138cbaf..14e397402 100644 --- a/tests/unit/core/test-core-utf8.cpp +++ b/tests/unit/core/test-core-utf8.cpp @@ -32,22 +32,57 @@ extern "C" #include "src/core/wee-config.h" } -const char *empty_string = ""; -const char *utf_4bytes_invalid = "\xf0\x03\x02\x01"; -const char *utf_2bytes_truncated_1 = "\xc0"; -const char *utf_3bytes_truncated_1 = "\xe2"; -const char *utf_3bytes_truncated_2 = "\xe2\xbb"; -const char *utf_4bytes_truncated_1 = "\xf0"; -const char *utf_4bytes_truncated_2 = "\xf0\xa4"; -const char *utf_4bytes_truncated_3 = "\xf0\xa4\xad"; -const char *noel_valid = "no\xc3\xabl"; /* noël */ -const char *noel_invalid = "no\xc3l"; -const char *noel_invalid2 = "no\xff\xffl"; -const char *noel_invalid_norm = "no?l"; -const char *noel_invalid2_norm = "no??l"; -const char *cjk_yellow = "\xe2\xbb\xa9"; /* U+2EE9 */ -const char *han_char = "\xf0\xa4\xad\xa2"; /* U+24B62 */ -const char *han_char_z = "\xf0\xa4\xad\xa2Z"; +/* + * soft hyphen: + * [] + * U+00AD (173) + * UTF-8: 2 bytes = 0xC2 0xAD + */ +#define UNICODE_SOFT_HYPHEN "\u00ad" + +/* zero width space: + * [] + * U+200B (8203) + * UTF-8: 3 bytes = 0xE2 0x80 0x8B + */ +#define UNICODE_ZERO_WIDTH_SPACE "\u200b" + +/* snowman without snow: + * [⛄] + * U+26C4 (9924) + * UTF-8: 3 bytes = 0xE2 0x9B 0x84 + */ +#define UNICODE_SNOWMAN "\u26c4" + +/* cjk yellow: + * [⻩] + * U+2EE9 (12009) + * UTF-8: 3 bytes = 0xE2 0xBB 0xA9 + */ +#define UNICODE_CJK_YELLOW "\u2ee9" + +/* han char: + * [𤭢] + * U+24B62 (150370) + * UTF-8: 4 bytes = 0xF0 0xA4 0xAD 0xA2 + */ +#define UNICODE_HAN_CHAR "\U00024b62" + +/* various invalid or incomplete UTF-8 sequences */ +#define UTF8_4BYTES_INVALID "\xf0\x03\x02\x01" +#define UTF8_2BYTES_TRUNCATED_1 "\xc0" +#define UTF8_3BYTES_TRUNCATED_1 "\xe2" +#define UTF8_3BYTES_TRUNCATED_2 "\xe2\xbb" +#define UTF8_4BYTES_TRUNCATED_1 "\xf0" +#define UTF8_4BYTES_TRUNCATED_2 "\xf0\xa4" +#define UTF8_4BYTES_TRUNCATED_3 "\xf0\xa4\xad" + +/* "noël" */ +#define UTF8_NOEL_VALID "no\xc3\xabl" +#define UTF8_NOEL_INVALID "no\xc3l" +#define UTF8_NOEL_INVALID2 "no\xff\xffl" +#define UTF8_NOEL_INVALID_NORM "no?l" +#define UTF8_NOEL_INVALID2_NORM "no??l" TEST_GROUP(CoreUtf8) { @@ -61,6 +96,9 @@ TEST_GROUP(CoreUtf8) TEST(CoreUtf8, Validity) { + const char *noel_valid = UTF8_NOEL_VALID; + const char *noel_invalid = UTF8_NOEL_INVALID; + const char *utf8_4bytes_invalid = UTF8_4BYTES_INVALID; char *error; /* check 8 bits */ @@ -94,18 +132,18 @@ TEST(CoreUtf8, Validity) POINTERS_EQUAL(NULL, error); LONGS_EQUAL(1, utf8_is_valid (noel_valid, 1, &error)); POINTERS_EQUAL(NULL, error); - LONGS_EQUAL(0, utf8_is_valid (utf_4bytes_invalid, -1, &error)); - POINTERS_EQUAL(utf_4bytes_invalid, error); - LONGS_EQUAL(0, utf8_is_valid (utf_4bytes_invalid, 0, &error)); - POINTERS_EQUAL(utf_4bytes_invalid, error); - LONGS_EQUAL(0, utf8_is_valid (utf_4bytes_invalid, 1, &error)); - POINTERS_EQUAL(utf_4bytes_invalid, error); - LONGS_EQUAL(0, utf8_is_valid (utf_4bytes_invalid, 2, &error)); - POINTERS_EQUAL(utf_4bytes_invalid, error); - LONGS_EQUAL(0, utf8_is_valid (utf_4bytes_invalid, 3, &error)); - POINTERS_EQUAL(utf_4bytes_invalid, error); - LONGS_EQUAL(0, utf8_is_valid (utf_4bytes_invalid, 4, &error)); - POINTERS_EQUAL(utf_4bytes_invalid, error); + LONGS_EQUAL(0, utf8_is_valid (utf8_4bytes_invalid, -1, &error)); + POINTERS_EQUAL(utf8_4bytes_invalid, error); + LONGS_EQUAL(0, utf8_is_valid (utf8_4bytes_invalid, 0, &error)); + POINTERS_EQUAL(utf8_4bytes_invalid, error); + LONGS_EQUAL(0, utf8_is_valid (utf8_4bytes_invalid, 1, &error)); + POINTERS_EQUAL(utf8_4bytes_invalid, error); + LONGS_EQUAL(0, utf8_is_valid (utf8_4bytes_invalid, 2, &error)); + POINTERS_EQUAL(utf8_4bytes_invalid, error); + LONGS_EQUAL(0, utf8_is_valid (utf8_4bytes_invalid, 3, &error)); + POINTERS_EQUAL(utf8_4bytes_invalid, error); + LONGS_EQUAL(0, utf8_is_valid (utf8_4bytes_invalid, 4, &error)); + POINTERS_EQUAL(utf8_4bytes_invalid, error); LONGS_EQUAL(0, utf8_is_valid (noel_invalid, -1, &error)); POINTERS_EQUAL(noel_invalid + 2, error); LONGS_EQUAL(0, utf8_is_valid (noel_invalid, 0, &error)); @@ -233,15 +271,15 @@ TEST(CoreUtf8, Normalize) { char *str; - str = strdup (noel_invalid); + str = strdup (UTF8_NOEL_INVALID); utf8_normalize (NULL, '?'); utf8_normalize (str, '?'); - STRCMP_EQUAL(noel_invalid_norm, str); + STRCMP_EQUAL(UTF8_NOEL_INVALID_NORM, str); free (str); - str = strdup (noel_invalid2); + str = strdup (UTF8_NOEL_INVALID2); utf8_normalize (str, '?'); - STRCMP_EQUAL(noel_invalid2_norm, str); + STRCMP_EQUAL(UTF8_NOEL_INVALID2_NORM, str); free (str); } @@ -257,6 +295,16 @@ TEST(CoreUtf8, Normalize) TEST(CoreUtf8, Move) { const char *ptr; + const char *empty_string = ""; + const char *noel_valid = UTF8_NOEL_VALID; + const char *utf8_2bytes_truncated_1 = UTF8_2BYTES_TRUNCATED_1; + const char *utf8_3bytes_truncated_1 = UTF8_3BYTES_TRUNCATED_1; + const char *utf8_3bytes_truncated_2 = UTF8_3BYTES_TRUNCATED_2; + const char *utf8_4bytes_truncated_1 = UTF8_4BYTES_TRUNCATED_1; + const char *utf8_4bytes_truncated_2 = UTF8_4BYTES_TRUNCATED_2; + const char *utf8_4bytes_truncated_3 = UTF8_4BYTES_TRUNCATED_3; + + const char *han_char = UNICODE_HAN_CHAR; /* previous/next char */ POINTERS_EQUAL(NULL, utf8_prev_char (NULL, NULL)); @@ -281,18 +329,18 @@ TEST(CoreUtf8, Move) POINTERS_EQUAL(han_char, ptr); ptr = utf8_prev_char (noel_valid + 3, noel_valid + 4); POINTERS_EQUAL(noel_valid + 3, ptr); - POINTERS_EQUAL(utf_2bytes_truncated_1 + 1, - utf8_next_char (utf_2bytes_truncated_1)); - POINTERS_EQUAL(utf_3bytes_truncated_1 + 1, - utf8_next_char (utf_3bytes_truncated_1)); - POINTERS_EQUAL(utf_3bytes_truncated_2 + 2, - utf8_next_char (utf_3bytes_truncated_2)); - POINTERS_EQUAL(utf_4bytes_truncated_1 + 1, - utf8_next_char (utf_4bytes_truncated_1)); - POINTERS_EQUAL(utf_4bytes_truncated_2 + 2, - utf8_next_char (utf_4bytes_truncated_2)); - POINTERS_EQUAL(utf_4bytes_truncated_3 + 3, - utf8_next_char (utf_4bytes_truncated_3)); + POINTERS_EQUAL(utf8_2bytes_truncated_1 + 1, + utf8_next_char (utf8_2bytes_truncated_1)); + POINTERS_EQUAL(utf8_3bytes_truncated_1 + 1, + utf8_next_char (utf8_3bytes_truncated_1)); + POINTERS_EQUAL(utf8_3bytes_truncated_2 + 2, + utf8_next_char (utf8_3bytes_truncated_2)); + POINTERS_EQUAL(utf8_4bytes_truncated_1 + 1, + utf8_next_char (utf8_4bytes_truncated_1)); + POINTERS_EQUAL(utf8_4bytes_truncated_2 + 2, + utf8_next_char (utf8_4bytes_truncated_2)); + POINTERS_EQUAL(utf8_4bytes_truncated_3 + 3, + utf8_next_char (utf8_4bytes_truncated_3)); /* add offset */ POINTERS_EQUAL(NULL, utf8_add_offset (NULL, 0)); @@ -331,6 +379,13 @@ TEST(CoreUtf8, Move) TEST(CoreUtf8, Convert) { + const char *utf8_2bytes_truncated_1 = UTF8_2BYTES_TRUNCATED_1; + const char *utf8_3bytes_truncated_1 = UTF8_3BYTES_TRUNCATED_1; + const char *utf8_3bytes_truncated_2 = UTF8_3BYTES_TRUNCATED_2; + const char *utf8_4bytes_truncated_1 = UTF8_4BYTES_TRUNCATED_1; + const char *utf8_4bytes_truncated_2 = UTF8_4BYTES_TRUNCATED_2; + const char *utf8_4bytes_truncated_3 = UTF8_4BYTES_TRUNCATED_3; + char result[5]; /* get UTF-8 char as integer */ @@ -339,8 +394,8 @@ TEST(CoreUtf8, Convert) LONGS_EQUAL(65, utf8_char_int ("ABC")); LONGS_EQUAL(235, utf8_char_int ("ë")); LONGS_EQUAL(0x20ac, utf8_char_int ("€")); - LONGS_EQUAL(0x2ee9, utf8_char_int (cjk_yellow)); - LONGS_EQUAL(0x24b62, utf8_char_int (han_char)); + LONGS_EQUAL(0x2ee9, utf8_char_int (UNICODE_CJK_YELLOW)); + LONGS_EQUAL(0x24b62, utf8_char_int (UNICODE_HAN_CHAR)); LONGS_EQUAL(0x0, utf8_char_int ("\xc0\x80")); /* invalid */ LONGS_EQUAL(0x7f, utf8_char_int ("\xc1\xbf")); /* invalid */ @@ -361,12 +416,12 @@ TEST(CoreUtf8, Convert) LONGS_EQUAL(0x10000, utf8_char_int ("\xf0\x90\x80\x80")); LONGS_EQUAL(0x1fffff, utf8_char_int ("\xf7\xbf\xbf\xbf")); - LONGS_EQUAL(0x0, utf8_char_int (utf_2bytes_truncated_1)); - LONGS_EQUAL(0x02, utf8_char_int (utf_3bytes_truncated_1)); - LONGS_EQUAL(0xbb, utf8_char_int (utf_3bytes_truncated_2)); - LONGS_EQUAL(0x0, utf8_char_int (utf_4bytes_truncated_1)); - LONGS_EQUAL(0x24, utf8_char_int (utf_4bytes_truncated_2)); - LONGS_EQUAL(0x92d, utf8_char_int (utf_4bytes_truncated_3)); + LONGS_EQUAL(0x0, utf8_char_int (utf8_2bytes_truncated_1)); + LONGS_EQUAL(0x02, utf8_char_int (utf8_3bytes_truncated_1)); + LONGS_EQUAL(0xbb, utf8_char_int (utf8_3bytes_truncated_2)); + LONGS_EQUAL(0x0, utf8_char_int (utf8_4bytes_truncated_1)); + LONGS_EQUAL(0x24, utf8_char_int (utf8_4bytes_truncated_2)); + LONGS_EQUAL(0x92d, utf8_char_int (utf8_4bytes_truncated_3)); /* convert unicode char to a string */ utf8_int_string (0, NULL); @@ -377,9 +432,9 @@ TEST(CoreUtf8, Convert) utf8_int_string (0x20ac, result); STRCMP_EQUAL("€", result); utf8_int_string (0x2ee9, result); - STRCMP_EQUAL(cjk_yellow, result); + STRCMP_EQUAL(UNICODE_CJK_YELLOW, result); utf8_int_string (0x24b62, result); - STRCMP_EQUAL(han_char, result); + STRCMP_EQUAL(UNICODE_HAN_CHAR, result); /* get wide char */ LONGS_EQUAL(WEOF, utf8_wide_char (NULL)); @@ -387,8 +442,8 @@ TEST(CoreUtf8, Convert) LONGS_EQUAL(65, utf8_wide_char ("A")); LONGS_EQUAL(0xc3ab, utf8_wide_char ("ë")); LONGS_EQUAL(0xe282ac, utf8_wide_char ("€")); - LONGS_EQUAL(0xe2bba9, utf8_wide_char (cjk_yellow)); - LONGS_EQUAL(0xf0a4ada2, utf8_wide_char (han_char)); + LONGS_EQUAL(0xe2bba9, utf8_wide_char (UNICODE_CJK_YELLOW)); + LONGS_EQUAL(0xf0a4ada2, utf8_wide_char (UNICODE_HAN_CHAR)); } /* @@ -402,14 +457,18 @@ TEST(CoreUtf8, Convert) TEST(CoreUtf8, Size) { - /* char size */ + /* char size (in bytes) */ LONGS_EQUAL(0, utf8_char_size (NULL)); LONGS_EQUAL(1, utf8_char_size ("")); LONGS_EQUAL(1, utf8_char_size ("A")); LONGS_EQUAL(2, utf8_char_size ("ë")); LONGS_EQUAL(3, utf8_char_size ("€")); - LONGS_EQUAL(3, utf8_char_size (cjk_yellow)); - LONGS_EQUAL(4, utf8_char_size (han_char)); + LONGS_EQUAL(1, utf8_char_size ("\x01")); + LONGS_EQUAL(2, utf8_char_size (UNICODE_SOFT_HYPHEN)); + LONGS_EQUAL(3, utf8_char_size (UNICODE_ZERO_WIDTH_SPACE)); + LONGS_EQUAL(3, utf8_char_size (UNICODE_SNOWMAN)); + LONGS_EQUAL(3, utf8_char_size (UNICODE_CJK_YELLOW)); + LONGS_EQUAL(4, utf8_char_size (UNICODE_HAN_CHAR)); /* ë as iso-8859-15: invalid UTF-8 */ LONGS_EQUAL(1, utf8_char_size ("\xeb")); /* ël as iso-8859-15: invalid UTF-8 */ @@ -425,7 +484,12 @@ TEST(CoreUtf8, Size) LONGS_EQUAL(1, utf8_char_size_screen ("A")); LONGS_EQUAL(1, utf8_char_size_screen ("ë")); LONGS_EQUAL(1, utf8_char_size_screen ("€")); - LONGS_EQUAL(2, utf8_char_size_screen (cjk_yellow)); + LONGS_EQUAL(1, utf8_char_size_screen ("\x01")); + LONGS_EQUAL(1, utf8_char_size_screen (UNICODE_SOFT_HYPHEN)); + LONGS_EQUAL(0, utf8_char_size_screen (UNICODE_ZERO_WIDTH_SPACE)); + LONGS_EQUAL(2, utf8_char_size_screen (UNICODE_SNOWMAN)); + LONGS_EQUAL(2, utf8_char_size_screen (UNICODE_CJK_YELLOW)); + LONGS_EQUAL(2, utf8_char_size_screen (UNICODE_HAN_CHAR)); /* ë as iso-8859-15: invalid UTF-8 */ LONGS_EQUAL(1, utf8_char_size_screen ("\xeb")); /* ël as iso-8859-15: invalid UTF-8 */ @@ -441,8 +505,13 @@ TEST(CoreUtf8, Size) LONGS_EQUAL(1, utf8_strlen ("A")); LONGS_EQUAL(1, utf8_strlen ("ë")); LONGS_EQUAL(1, utf8_strlen ("€")); - LONGS_EQUAL(1, utf8_strlen (cjk_yellow)); - LONGS_EQUAL(1, utf8_strlen (han_char)); + LONGS_EQUAL(1, utf8_strlen ("\x01")); + LONGS_EQUAL(4, utf8_strlen (UTF8_NOEL_VALID)); + LONGS_EQUAL(1, utf8_strlen (UNICODE_SOFT_HYPHEN)); + LONGS_EQUAL(1, utf8_strlen (UNICODE_ZERO_WIDTH_SPACE)); + LONGS_EQUAL(1, utf8_strlen (UNICODE_SNOWMAN)); + LONGS_EQUAL(1, utf8_strlen (UNICODE_CJK_YELLOW)); + LONGS_EQUAL(1, utf8_strlen (UNICODE_HAN_CHAR)); /* length of string (in chars, for max N bytes) */ LONGS_EQUAL(0, utf8_strnlen (NULL, 0)); @@ -450,7 +519,7 @@ TEST(CoreUtf8, Size) LONGS_EQUAL(1, utf8_strnlen ("AZ", 1)); LONGS_EQUAL(1, utf8_strnlen ("ëZ", 2)); LONGS_EQUAL(1, utf8_strnlen ("€Z", 3)); - LONGS_EQUAL(1, utf8_strnlen (han_char_z, 4)); + LONGS_EQUAL(1, utf8_strnlen (UNICODE_HAN_CHAR "Z", 4)); /* length of string on screen (in chars) */ LONGS_EQUAL(0, utf8_strlen_screen (NULL)); @@ -459,7 +528,18 @@ TEST(CoreUtf8, Size) LONGS_EQUAL(1, utf8_strlen_screen ("ë")); LONGS_EQUAL(1, utf8_strlen_screen ("€")); LONGS_EQUAL(1, utf8_strlen_screen ("\x7f")); - LONGS_EQUAL(2, utf8_strlen_screen (cjk_yellow)); + LONGS_EQUAL(1, utf8_strlen_screen ("\x01")); + LONGS_EQUAL(4, utf8_strlen_screen (UTF8_NOEL_VALID)); + LONGS_EQUAL(1, utf8_strlen_screen (UNICODE_SOFT_HYPHEN)); + LONGS_EQUAL(3, utf8_strlen_screen ("a" UNICODE_SOFT_HYPHEN "b")); + LONGS_EQUAL(0, utf8_strlen_screen (UNICODE_ZERO_WIDTH_SPACE)); + LONGS_EQUAL(2, utf8_strlen_screen ("a" UNICODE_ZERO_WIDTH_SPACE "b")); + LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_SNOWMAN)); + LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_SNOWMAN "b")); + LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_CJK_YELLOW)); + LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_CJK_YELLOW "b")); + LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_HAN_CHAR)); + LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_HAN_CHAR "b")); /* length of Tabulation */ LONGS_EQUAL(1, utf8_strlen_screen ("\t")); @@ -530,10 +610,10 @@ TEST(CoreUtf8, Duplicate) char *str; WEE_TEST_STR(NULL, utf8_strndup (NULL, 0)); - WEE_TEST_STR("", utf8_strndup (noel_valid, 0)); - WEE_TEST_STR("n", utf8_strndup (noel_valid, 1)); - WEE_TEST_STR("no", utf8_strndup (noel_valid, 2)); - WEE_TEST_STR("noë", utf8_strndup (noel_valid, 3)); - WEE_TEST_STR("noël", utf8_strndup (noel_valid, 4)); - WEE_TEST_STR("noël", utf8_strndup (noel_valid, 5)); + WEE_TEST_STR("", utf8_strndup (UTF8_NOEL_VALID, 0)); + WEE_TEST_STR("n", utf8_strndup (UTF8_NOEL_VALID, 1)); + WEE_TEST_STR("no", utf8_strndup (UTF8_NOEL_VALID, 2)); + WEE_TEST_STR("noë", utf8_strndup (UTF8_NOEL_VALID, 3)); + WEE_TEST_STR("noël", utf8_strndup (UTF8_NOEL_VALID, 4)); + WEE_TEST_STR("noël", utf8_strndup (UTF8_NOEL_VALID, 5)); } |