summaryrefslogtreecommitdiff
path: root/src/core/utf8.h
blob: 5bb5319308e29cdb737764d3693a59f2190628b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#ifndef __UTF8_H
#define __UTF8_H

/* XXX I didn't check the encoding range of big5+. This is standard big5. */
#define is_big5_los(lo) (0x40 <= (lo) && (lo) <= 0x7E) /* standard */
#define is_big5_lox(lo) (0x80 <= (lo) && (lo) <= 0xFE) /* extended */
#define is_big5_lo(lo)	((is_big5_los(lo) || is_big5_lox(lo)))
#define is_big5_hi(hi)  (0x81 <= (hi) && (hi) <= 0xFE)
#define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo))

#include <glib.h>
typedef guint32 unichar;

/* Returns width for character (0-2). */
int mk_wcwidth(unichar c);

/* Advance the str pointer one character further; return the number of columns
 * occupied by the skipped character.
 */
int string_advance(char const **str, int policy);

/* TREAT_STRING_AS_BYTES means strings are to be treated using strncpy,
 * strnlen, etc.
 * TREAT_STRING_AS_UTF8 means strings are to be treated using g_utf8_*
 * functions.
 */
enum str_policy {
	TREAT_STRING_AS_BYTES,
	TREAT_STRING_AS_UTF8
};

/* Return how the str string ought to be treated: TREAT_STRING_AS_UTF8 if the
 * terminal handles UTF-8 and if the string appears to be a valid UTF-8 string;
 * TREAT_STRING_AS_BYTES otherwise.
 */
int string_policy(const char *str);

/* Return the length of the str string according to the given policy; if policy
 * is -1, this function will call string_policy().
 */
int string_length(const char *str, int policy);
/* Return the screen width of the str string according to the given policy; if
 * policy is -1, this function will call string_policy().
 */
int string_width(const char *str, int policy);

/* Return the amount of characters from str it takes to reach n columns, or -1 if
 * str is NULL. Optionally return the equivalent amount of bytes.
 * If policy is -1, this function will call string_policy().
 */
int string_chars_for_width(const char *str, int policy, unsigned int n, unsigned int *bytes);

#define unichar_isprint(c) (((c) & ~0x80) >= 32)
#define is_utf8_leading(c) (((c) & 0xc0) != 0x80)

#endif