summaryrefslogtreecommitdiff
path: root/src/fe-common/core/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fe-common/core/utf8.c')
-rw-r--r--src/fe-common/core/utf8.c130
1 files changed, 12 insertions, 118 deletions
diff --git a/src/fe-common/core/utf8.c b/src/fe-common/core/utf8.c
index 564eb98e..6cd809f8 100644
--- a/src/fe-common/core/utf8.c
+++ b/src/fe-common/core/utf8.c
@@ -24,74 +24,6 @@
#include "module.h"
-#define UTF8_COMPUTE(Char, Mask, Len) \
- if (Char < 128) \
- { \
- Len = 1; \
- Mask = 0x7f; \
- } \
- else if ((Char & 0xe0) == 0xc0) \
- { \
- Len = 2; \
- Mask = 0x1f; \
- } \
- else if ((Char & 0xf0) == 0xe0) \
- { \
- Len = 3; \
- Mask = 0x0f; \
- } \
- else if ((Char & 0xf8) == 0xf0) \
- { \
- Len = 4; \
- Mask = 0x07; \
- } \
- else if ((Char & 0xfc) == 0xf8) \
- { \
- Len = 5; \
- Mask = 0x03; \
- } \
- else if ((Char & 0xfe) == 0xfc) \
- { \
- Len = 6; \
- Mask = 0x01; \
- } \
- else \
- Len = -1;
-
-#define UTF8_GET(Result, Chars, Count, Mask, Len) \
- (Result) = (Chars)[0] & (Mask); \
- for ((Count) = 1; (Count) < (Len); ++(Count)) \
- { \
- if (((Chars)[(Count)] & 0xc0) != 0x80) \
- { \
- (Result) = -1; \
- break; \
- } \
- (Result) <<= 6; \
- (Result) |= ((Chars)[(Count)] & 0x3f); \
- }
-
-int get_utf8_char(const unsigned char **ptr, int len, unichar *chr_r)
-{
- int i, result, mask, chrlen;
-
- mask = 0;
- UTF8_COMPUTE(**ptr, mask, chrlen);
- if (chrlen == -1)
- return -2;
-
- if (chrlen > len)
- return -1;
-
- UTF8_GET(result, *ptr, i, mask, chrlen);
- if (result == -1)
- return -2;
-
- *chr_r = (unichar) result;
- *ptr += chrlen-1;
- return result;
-}
-
int strlen_utf8(const char *str)
{
const unsigned char *p = (const unsigned char *) str;
@@ -99,65 +31,27 @@ int strlen_utf8(const char *str)
unichar chr_r;
len = 0;
- while (*p != '\0' && get_utf8_char(&p, 6, &chr_r) > 0) {
+ while (*p != '\0') {
+ chr_r = g_utf8_get_char_validated(p, -1);
+ if (chr_r & 0x80000000)
+ break;
len++;
- p++;
+ p = g_utf8_next_char(p);
}
return len;
}
-int utf16_char_to_utf8(unichar c, char *outbuf)
-{
- int len, i, first;
-
- len = 0;
- if (c < 0x80) {
- first = 0;
- len = 1;
- } else if (c < 0x800) {
- first = 0xc0;
- len = 2;
- } else if (c < 0x10000) {
- first = 0xe0;
- len = 3;
- } else if (c < 0x200000) {
- first = 0xf0;
- len = 4;
- } else if (c < 0x4000000) {
- first = 0xf8;
- len = 5;
- } else {
- first = 0xfc;
- len = 6;
- }
-
- if (outbuf) {
- for (i = len - 1; i > 0; --i) {
- outbuf[i] = (c & 0x3f) | 0x80;
- c >>= 6;
- }
- outbuf[0] = c | first;
- }
-
- return len;
-}
-
void utf8_to_utf16(const char *str, unichar *out)
{
const unsigned char *p = (const unsigned char *) str;
- int i, result, mask, len;
+ unichar result;
while (*p != '\0') {
- mask = 0;
- UTF8_COMPUTE(*p, mask, len);
- if (len == -1)
- break;
-
- UTF8_GET(result, p, i, mask, len);
- if (result == -1)
- break;
+ result = g_utf8_get_char_validated(p, -1);
+ if (result & 0x80000000)
+ break;
- p += len;
+ p = g_utf8_next_char(p);
*out++ = result;
}
@@ -169,7 +63,7 @@ void utf16_to_utf8(const unichar *str, char *out)
int len;
while (*str != '\0') {
- len = utf16_char_to_utf8(*str, out);
+ len = g_unichar_to_utf8(*str, out);
out += len;
str++;
@@ -185,7 +79,7 @@ void utf16_to_utf8_with_pos(const unichar *str, int spos, char *out, int *opos)
*opos = 0;
while (*str != '\0') {
- len = utf16_char_to_utf8(*str, out);
+ len = g_unichar_to_utf8(*str, out);
out += len;
str++;