summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/wee-string.c3
-rw-r--r--src/core/wee-utf8.c22
-rw-r--r--src/core/wee-utf8.h2
-rw-r--r--tests/unit/core/test-core-utf8.cpp12
4 files changed, 27 insertions, 12 deletions
diff --git a/src/core/wee-string.c b/src/core/wee-string.c
index 971882f7c..e41884950 100644
--- a/src/core/wee-string.c
+++ b/src/core/wee-string.c
@@ -1073,10 +1073,9 @@ string_convert_escaped_chars (const char *string)
{
value = (value * 16) + HEX2DEC(ptr_string[i + 1]);
}
- utf8_int_string (value, utf_char);
+ length = utf8_int_string (value, utf_char);
if (utf_char[0])
{
- length = strlen (utf_char);
memcpy (output + pos_output, utf_char, length);
pos_output += length;
}
diff --git a/src/core/wee-utf8.c b/src/core/wee-utf8.c
index c976f7441..0fc4560ac 100644
--- a/src/core/wee-utf8.c
+++ b/src/core/wee-utf8.c
@@ -331,21 +331,32 @@ utf8_char_int (const char *string)
*
* In case of error (if unicode value is > 0x1FFFFF), the string is set to an
* empty string (string[0] == '\0').
+ *
+ * Returns the number of bytes in the UTF-8 char (not counting the final '\0').
*/
-void
+int
utf8_int_string (unsigned int unicode_value, char *string)
{
+ int num_bytes;
+
+ num_bytes = 0;
+
if (!string)
- return;
+ return num_bytes;
string[0] = '\0';
- if (unicode_value <= 0x007F)
+ if (unicode_value == 0)
+ {
+ /* NUL char */
+ }
+ else if (unicode_value <= 0x007F)
{
/* UTF-8, 1 byte: 0vvvvvvv */
string[0] = unicode_value;
string[1] = '\0';
+ num_bytes = 1;
}
else if (unicode_value <= 0x07FF)
{
@@ -353,6 +364,7 @@ utf8_int_string (unsigned int unicode_value, char *string)
string[0] = 0xC0 | ((unicode_value >> 6) & 0x1F);
string[1] = 0x80 | (unicode_value & 0x3F);
string[2] = '\0';
+ num_bytes = 2;
}
else if (unicode_value <= 0xFFFF)
{
@@ -361,6 +373,7 @@ utf8_int_string (unsigned int unicode_value, char *string)
string[1] = 0x80 | ((unicode_value >> 6) & 0x3F);
string[2] = 0x80 | (unicode_value & 0x3F);
string[3] = '\0';
+ num_bytes = 3;
}
else if (unicode_value <= 0x1FFFFF)
{
@@ -370,7 +383,10 @@ utf8_int_string (unsigned int unicode_value, char *string)
string[2] = 0x80 | ((unicode_value >> 6) & 0x3F);
string[3] = 0x80 | (unicode_value & 0x3F);
string[4] = '\0';
+ num_bytes = 4;
}
+
+ return num_bytes;
}
/*
diff --git a/src/core/wee-utf8.h b/src/core/wee-utf8.h
index 85259ef82..6c5771860 100644
--- a/src/core/wee-utf8.h
+++ b/src/core/wee-utf8.h
@@ -36,7 +36,7 @@ extern const char *utf8_prev_char (const char *string_start,
const char *string);
extern const char *utf8_next_char (const char *string);
extern int utf8_char_int (const char *string);
-extern void utf8_int_string (unsigned int unicode_value, char *string);
+extern int utf8_int_string (unsigned int unicode_value, char *string);
extern wint_t utf8_wide_char (const char *string);
extern int utf8_char_size (const char *string);
extern int utf8_strlen (const char *string);
diff --git a/tests/unit/core/test-core-utf8.cpp b/tests/unit/core/test-core-utf8.cpp
index 26f42bde2..7051e1478 100644
--- a/tests/unit/core/test-core-utf8.cpp
+++ b/tests/unit/core/test-core-utf8.cpp
@@ -458,16 +458,16 @@ TEST(CoreUtf8, Convert)
LONGS_EQUAL(0x92d, utf8_char_int (utf8_4bytes_truncated_3));
/* convert unicode char to a string */
- utf8_int_string (0, NULL);
- utf8_int_string (0, result);
+ LONGS_EQUAL(0, utf8_int_string (0, NULL));
+ LONGS_EQUAL(0, utf8_int_string (0, result));
STRCMP_EQUAL("", result);
- utf8_int_string (235, result);
+ LONGS_EQUAL(2, utf8_int_string (L'ë', result));
STRCMP_EQUAL("ë", result);
- utf8_int_string (0x20ac, result);
+ LONGS_EQUAL(3, utf8_int_string (L'€', result));
STRCMP_EQUAL("€", result);
- utf8_int_string (0x2ee9, result);
+ LONGS_EQUAL(3, utf8_int_string (0x2ee9, result));
STRCMP_EQUAL(UNICODE_CJK_YELLOW, result);
- utf8_int_string (0x24b62, result);
+ LONGS_EQUAL(4, utf8_int_string (0x24b62, result));
STRCMP_EQUAL(UNICODE_HAN_CHAR, result);
/* get wide char */