diff options
-rw-r--r-- | src/core/wee-string.c | 3 | ||||
-rw-r--r-- | src/core/wee-utf8.c | 22 | ||||
-rw-r--r-- | src/core/wee-utf8.h | 2 | ||||
-rw-r--r-- | tests/unit/core/test-core-utf8.cpp | 12 |
4 files changed, 27 insertions, 12 deletions
diff --git a/src/core/wee-string.c b/src/core/wee-string.c index 971882f7c..e41884950 100644 --- a/src/core/wee-string.c +++ b/src/core/wee-string.c @@ -1073,10 +1073,9 @@ string_convert_escaped_chars (const char *string) { value = (value * 16) + HEX2DEC(ptr_string[i + 1]); } - utf8_int_string (value, utf_char); + length = utf8_int_string (value, utf_char); if (utf_char[0]) { - length = strlen (utf_char); memcpy (output + pos_output, utf_char, length); pos_output += length; } diff --git a/src/core/wee-utf8.c b/src/core/wee-utf8.c index c976f7441..0fc4560ac 100644 --- a/src/core/wee-utf8.c +++ b/src/core/wee-utf8.c @@ -331,21 +331,32 @@ utf8_char_int (const char *string) * * In case of error (if unicode value is > 0x1FFFFF), the string is set to an * empty string (string[0] == '\0'). + * + * Returns the number of bytes in the UTF-8 char (not counting the final '\0'). */ -void +int utf8_int_string (unsigned int unicode_value, char *string) { + int num_bytes; + + num_bytes = 0; + if (!string) - return; + return num_bytes; string[0] = '\0'; - if (unicode_value <= 0x007F) + if (unicode_value == 0) + { + /* NUL char */ + } + else if (unicode_value <= 0x007F) { /* UTF-8, 1 byte: 0vvvvvvv */ string[0] = unicode_value; string[1] = '\0'; + num_bytes = 1; } else if (unicode_value <= 0x07FF) { @@ -353,6 +364,7 @@ utf8_int_string (unsigned int unicode_value, char *string) string[0] = 0xC0 | ((unicode_value >> 6) & 0x1F); string[1] = 0x80 | (unicode_value & 0x3F); string[2] = '\0'; + num_bytes = 2; } else if (unicode_value <= 0xFFFF) { @@ -361,6 +373,7 @@ utf8_int_string (unsigned int unicode_value, char *string) string[1] = 0x80 | ((unicode_value >> 6) & 0x3F); string[2] = 0x80 | (unicode_value & 0x3F); string[3] = '\0'; + num_bytes = 3; } else if (unicode_value <= 0x1FFFFF) { @@ -370,7 +383,10 @@ utf8_int_string (unsigned int unicode_value, char *string) string[2] = 0x80 | ((unicode_value >> 6) & 0x3F); string[3] = 0x80 | (unicode_value & 0x3F); string[4] = '\0'; + num_bytes = 4; } + + return num_bytes; } /* diff --git a/src/core/wee-utf8.h b/src/core/wee-utf8.h index 85259ef82..6c5771860 100644 --- a/src/core/wee-utf8.h +++ b/src/core/wee-utf8.h @@ -36,7 +36,7 @@ extern const char *utf8_prev_char (const char *string_start, const char *string); extern const char *utf8_next_char (const char *string); extern int utf8_char_int (const char *string); -extern void utf8_int_string (unsigned int unicode_value, char *string); +extern int utf8_int_string (unsigned int unicode_value, char *string); extern wint_t utf8_wide_char (const char *string); extern int utf8_char_size (const char *string); extern int utf8_strlen (const char *string); diff --git a/tests/unit/core/test-core-utf8.cpp b/tests/unit/core/test-core-utf8.cpp index 26f42bde2..7051e1478 100644 --- a/tests/unit/core/test-core-utf8.cpp +++ b/tests/unit/core/test-core-utf8.cpp @@ -458,16 +458,16 @@ TEST(CoreUtf8, Convert) LONGS_EQUAL(0x92d, utf8_char_int (utf8_4bytes_truncated_3)); /* convert unicode char to a string */ - utf8_int_string (0, NULL); - utf8_int_string (0, result); + LONGS_EQUAL(0, utf8_int_string (0, NULL)); + LONGS_EQUAL(0, utf8_int_string (0, result)); STRCMP_EQUAL("", result); - utf8_int_string (235, result); + LONGS_EQUAL(2, utf8_int_string (L'ë', result)); STRCMP_EQUAL("ë", result); - utf8_int_string (0x20ac, result); + LONGS_EQUAL(3, utf8_int_string (L'€', result)); STRCMP_EQUAL("€", result); - utf8_int_string (0x2ee9, result); + LONGS_EQUAL(3, utf8_int_string (0x2ee9, result)); STRCMP_EQUAL(UNICODE_CJK_YELLOW, result); - utf8_int_string (0x24b62, result); + LONGS_EQUAL(4, utf8_int_string (0x24b62, result)); STRCMP_EQUAL(UNICODE_HAN_CHAR, result); /* get wide char */ |