summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to 'src/core')
-rw-r--r--src/core/wee-utf8.c55
-rw-r--r--src/core/wee-utf8.h1
2 files changed, 56 insertions, 0 deletions
diff --git a/src/core/wee-utf8.c b/src/core/wee-utf8.c
index 7c5b5cf06..bae3693fe 100644
--- a/src/core/wee-utf8.c
+++ b/src/core/wee-utf8.c
@@ -226,6 +226,61 @@ utf8_next_char (const char *string)
}
/*
+ * utf8_char_int: return UTF-8 char as integer
+ */
+
+int
+utf8_char_int (const char *string)
+{
+ const unsigned char *ptr_string;
+
+ if (!string)
+ return 0;
+
+ ptr_string = (unsigned char *)string;
+
+ /* UTF-8, 2 bytes: 110vvvvv 10vvvvvv */
+ if ((ptr_string[0] & 0xE0) == 0xC0)
+ {
+ if (!ptr_string[1])
+ return (int)(ptr_string[0] & 0x1F);
+ return ((int)(ptr_string[0] & 0x1F) << 6) +
+ ((int)(ptr_string[1] & 0x3F));
+ }
+ /* UTF-8, 3 bytes: 1110vvvv 10vvvvvv 10vvvvvv */
+ else if ((ptr_string[0] & 0xF0) == 0xE0)
+ {
+ if (!ptr_string[1])
+ return (int)(ptr_string[0] & 0x0F);
+ if (!ptr_string[2])
+ return (((int)(ptr_string[0] & 0x0F)) << 6) +
+ ((int)(ptr_string[1] & 0x3F));
+ return (((int)(ptr_string[0] & 0x0F)) << 12) +
+ (((int)(ptr_string[1] & 0x3F)) << 6) +
+ ((int)(ptr_string[2] & 0x3F));
+ }
+ /* UTF-8, 4 bytes: 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv */
+ else if ((ptr_string[0] & 0xF8) == 0xF0)
+ {
+ if (!ptr_string[1])
+ return (int)ptr_string[0] & 0x07;
+ if (!ptr_string[2])
+ return (((int)(ptr_string[0] & 0x07)) << 6) +
+ ((int)(ptr_string[1] & 0x3F));
+ if (!ptr_string[3])
+ return (((int)(ptr_string[0] & 0x07)) << 12) +
+ (((int)(ptr_string[1] & 0x3F)) << 6) +
+ ((int)(ptr_string[2] & 0x3F));
+ return (((int)(ptr_string[0] & 0x07)) << 18) +
+ (((int)(ptr_string[1] & 0x3F)) << 12) +
+ (((int)(ptr_string[2] & 0x3F)) << 6) +
+ ((int)(ptr_string[3] & 0x3F));
+ }
+ /* UTF-8, 1 byte: 0vvvvvvv */
+ return (int)ptr_string[0];
+}
+
+/*
* utf8_char_size: return UTF-8 char size (in bytes)
*/
diff --git a/src/core/wee-utf8.h b/src/core/wee-utf8.h
index 60511e607..d6fc8f927 100644
--- a/src/core/wee-utf8.h
+++ b/src/core/wee-utf8.h
@@ -38,6 +38,7 @@ extern int utf8_is_valid (const char *string, char **error);
extern void utf8_normalize (const char *string, char replacement);
extern char *utf8_prev_char (const char *string_start, const char *string);
extern char *utf8_next_char (const char *string);
+extern int utf8_char_int (const char *string);
extern int utf8_char_size (const char *string);
extern int utf8_strlen (const char *string);
extern int utf8_strnlen (const char *string, int bytes);