diff options
author | Anders Bergh <anders1@gmail.com> | 2014-06-16 22:24:46 +0200 |
---|---|---|
committer | Anders Bergh <anders1@gmail.com> | 2014-06-16 22:24:46 +0200 |
commit | b705e20a44d3aedefd3863115a4db954b50e4953 (patch) | |
tree | a8b1b2faad02f94179c946336339eb42a58834e7 /src/core | |
parent | d65afdfc7c6135fbf270a8b8725e4a1f2852c803 (diff) | |
download | weechat-b705e20a44d3aedefd3863115a4db954b50e4953.zip |
core: overlong UTF-8 encoding and surrogates (U+D800-DFFF) are invalid.
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/wee-utf8.c | 45 |
1 files changed, 22 insertions, 23 deletions
diff --git a/src/core/wee-utf8.c b/src/core/wee-utf8.c index 8871b78aa..8f20812bc 100644 --- a/src/core/wee-utf8.c +++ b/src/core/wee-utf8.c @@ -79,59 +79,58 @@ utf8_has_8bits (const char *string) int utf8_is_valid (const char *string, char **error) { + int n; while (string && string[0]) { - /* UTF-8, 2 bytes, should be: 110vvvvv 10vvvvvv */ + /* UTF-8, 2 bytes, should be: 110vvvvv 10vvvvvv and U+0080-07FF */ if (((unsigned char)(string[0]) & 0xE0) == 0xC0) { if (!string[1] || (((unsigned char)(string[1]) & 0xC0) != 0x80)) - { - if (error) - *error = (char *)string; - return 0; - } + goto err; + n = utf8_char_int(string); + if (n < 0x80 || n > 0x7ff) + goto err; string += 2; } - /* UTF-8, 3 bytes, should be: 1110vvvv 10vvvvvv 10vvvvvv */ + /* UTF-8, 3 bytes, should be: 1110vvvv 10vvvvvv 10vvvvvv and U+0800-FFFF */ else if (((unsigned char)(string[0]) & 0xF0) == 0xE0) { if (!string[1] || !string[2] || (((unsigned char)(string[1]) & 0xC0) != 0x80) || (((unsigned char)(string[2]) & 0xC0) != 0x80)) - { - if (error) - *error = (char *)string; - return 0; - } + goto err; + n = utf8_char_int(string); + if (n < 0x800 || n > 0xffff || (n >= 0xd800 && n <= 0xdfff)) + goto err; string += 3; } - /* UTF-8, 4 bytes, should be: 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv */ + /* UTF-8, 4 bytes, should be: 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv and U+10000-1FFFFF */ else if (((unsigned char)(string[0]) & 0xF8) == 0xF0) { if (!string[1] || !string[2] || !string[3] || (((unsigned char)(string[1]) & 0xC0) != 0x80) || (((unsigned char)(string[2]) & 0xC0) != 0x80) || (((unsigned char)(string[3]) & 0xC0) != 0x80)) - { - if (error) - *error = (char *)string; - return 0; - } + goto err; + n = utf8_char_int(string); + if (n < 0x10000 || n > 0x1fffff) + goto err; + string += 4; } /* UTF-8, 1 byte, should be: 0vvvvvvv */ else if ((unsigned char)(string[0]) >= 0x80) - { - if (error) - *error = (char *)string; - return 0; - } + goto err; else string++; } if (error) *error = NULL; return 1; +err: + if (error) + *error = (char *)string; + return 0; } /* |