summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorAnders Bergh <anders1@gmail.com>2014-06-16 22:24:46 +0200
committerAnders Bergh <anders1@gmail.com>2014-06-16 22:24:46 +0200
commitb705e20a44d3aedefd3863115a4db954b50e4953 (patch)
treea8b1b2faad02f94179c946336339eb42a58834e7 /src/core
parentd65afdfc7c6135fbf270a8b8725e4a1f2852c803 (diff)
downloadweechat-b705e20a44d3aedefd3863115a4db954b50e4953.zip
core: overlong UTF-8 encoding and surrogates (U+D800-DFFF) are invalid.
Diffstat (limited to 'src/core')
-rw-r--r--src/core/wee-utf8.c45
1 files changed, 22 insertions, 23 deletions
diff --git a/src/core/wee-utf8.c b/src/core/wee-utf8.c
index 8871b78aa..8f20812bc 100644
--- a/src/core/wee-utf8.c
+++ b/src/core/wee-utf8.c
@@ -79,59 +79,58 @@ utf8_has_8bits (const char *string)
int
utf8_is_valid (const char *string, char **error)
{
+ int n;
while (string && string[0])
{
- /* UTF-8, 2 bytes, should be: 110vvvvv 10vvvvvv */
+ /* UTF-8, 2 bytes, should be: 110vvvvv 10vvvvvv and U+0080-07FF */
if (((unsigned char)(string[0]) & 0xE0) == 0xC0)
{
if (!string[1] || (((unsigned char)(string[1]) & 0xC0) != 0x80))
- {
- if (error)
- *error = (char *)string;
- return 0;
- }
+ goto err;
+ n = utf8_char_int(string);
+ if (n < 0x80 || n > 0x7ff)
+ goto err;
string += 2;
}
- /* UTF-8, 3 bytes, should be: 1110vvvv 10vvvvvv 10vvvvvv */
+ /* UTF-8, 3 bytes, should be: 1110vvvv 10vvvvvv 10vvvvvv and U+0800-FFFF */
else if (((unsigned char)(string[0]) & 0xF0) == 0xE0)
{
if (!string[1] || !string[2]
|| (((unsigned char)(string[1]) & 0xC0) != 0x80)
|| (((unsigned char)(string[2]) & 0xC0) != 0x80))
- {
- if (error)
- *error = (char *)string;
- return 0;
- }
+ goto err;
+ n = utf8_char_int(string);
+ if (n < 0x800 || n > 0xffff || (n >= 0xd800 && n <= 0xdfff))
+ goto err;
string += 3;
}
- /* UTF-8, 4 bytes, should be: 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv */
+ /* UTF-8, 4 bytes, should be: 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv and U+10000-1FFFFF */
else if (((unsigned char)(string[0]) & 0xF8) == 0xF0)
{
if (!string[1] || !string[2] || !string[3]
|| (((unsigned char)(string[1]) & 0xC0) != 0x80)
|| (((unsigned char)(string[2]) & 0xC0) != 0x80)
|| (((unsigned char)(string[3]) & 0xC0) != 0x80))
- {
- if (error)
- *error = (char *)string;
- return 0;
- }
+ goto err;
+ n = utf8_char_int(string);
+ if (n < 0x10000 || n > 0x1fffff)
+ goto err;
+
string += 4;
}
/* UTF-8, 1 byte, should be: 0vvvvvvv */
else if ((unsigned char)(string[0]) >= 0x80)
- {
- if (error)
- *error = (char *)string;
- return 0;
- }
+ goto err;
else
string++;
}
if (error)
*error = NULL;
return 1;
+err:
+ if (error)
+ *error = (char *)string;
+ return 0;
}
/*