diff options
author | ailin-nemui <ailin-nemui@users.noreply.github.com> | 2016-05-18 09:18:21 +0200 |
---|---|---|
committer | ailin-nemui <ailin-nemui@users.noreply.github.com> | 2016-05-18 09:18:21 +0200 |
commit | 74d38683bf5f20142b14dcd5162fd258d8fb2f37 (patch) | |
tree | 7e4fdf05e76005ff0e469ac168b1da0cd48d9da1 | |
parent | 5564f850418d54f13befe15bec5481d29026b7d3 (diff) | |
parent | 5d69b4c4a7d334ca29e61969712f5f8eedfa5cd9 (diff) | |
download | irssi-74d38683bf5f20142b14dcd5162fd258d8fb2f37.zip |
Merge pull request #480 from xavierog/handle-utf8-nicks-with-mk_wcwidth
Handle utf8 nicks with mk_wcwidth()
-rw-r--r-- | src/core/Makefile.am | 1 | ||||
-rw-r--r-- | src/core/special-vars.c | 21 | ||||
-rw-r--r-- | src/core/special-vars.h | 7 | ||||
-rw-r--r-- | src/core/utf8.c | 135 | ||||
-rw-r--r-- | src/core/utf8.h | 56 | ||||
-rw-r--r-- | src/core/wcwidth.c (renamed from src/fe-common/core/wcwidth.c) | 0 | ||||
-rw-r--r-- | src/fe-common/core/Makefile.am | 5 | ||||
-rw-r--r-- | src/fe-common/core/fe-channels.c | 83 | ||||
-rw-r--r-- | src/fe-common/core/formats.c | 28 | ||||
-rw-r--r-- | src/fe-common/core/module.h | 2 | ||||
-rw-r--r-- | src/fe-common/core/utf8.c | 26 | ||||
-rw-r--r-- | src/fe-common/core/utf8.h | 17 | ||||
-rw-r--r-- | src/fe-text/gui-entry.c | 18 | ||||
-rw-r--r-- | src/fe-text/term.h | 2 |
14 files changed, 261 insertions, 140 deletions
diff --git a/src/core/Makefile.am b/src/core/Makefile.am index fc32e17e..cc200034 100644 --- a/src/core/Makefile.am +++ b/src/core/Makefile.am @@ -44,6 +44,7 @@ libcore_a_SOURCES = \ settings.c \ signals.c \ special-vars.c \ + utf8.c \ write-buffer.c structure_headers = \ diff --git a/src/core/special-vars.c b/src/core/special-vars.c index 4dcc3d2f..fe6bbed2 100644 --- a/src/core/special-vars.c +++ b/src/core/special-vars.c @@ -25,10 +25,7 @@ #include "settings.h" #include "servers.h" #include "misc.h" - -#define ALIGN_RIGHT 0x01 -#define ALIGN_CUT 0x02 -#define ALIGN_PAD 0x04 +#include "utf8.h" #define isvarchar(c) \ (i_isalnum(c) || (c) == '_') @@ -316,22 +313,28 @@ static int get_alignment_args(char **data, int *align, int *flags, char *pad) } /* return the aligned text */ -static char *get_alignment(const char *text, int align, int flags, char pad) +char *get_alignment(const char *text, int align, int flags, char pad) { GString *str; char *ret; + int policy; + unsigned int cut_bytes; g_return_val_if_fail(text != NULL, NULL); + policy = string_policy(text); + str = g_string_new(text); /* cut */ - if ((flags & ALIGN_CUT) && align > 0 && str->len > align) - g_string_truncate(str, align); + if ((flags & ALIGN_CUT) && align > 0 && string_width(text, policy) > align) { + string_chars_for_width(text, policy, align, &cut_bytes); + g_string_truncate(str, cut_bytes); + } /* add pad characters */ if (flags & ALIGN_PAD) { - while (str->len < align) { + while (string_width(str->str, policy) < align) { if (flags & ALIGN_RIGHT) g_string_prepend_c(str, pad); else @@ -340,7 +343,7 @@ static char *get_alignment(const char *text, int align, int flags, char pad) } ret = str->str; - g_string_free(str, FALSE); + g_string_free(str, FALSE); return ret; } diff --git a/src/core/special-vars.h b/src/core/special-vars.h index 11262dad..300dae0e 100644 --- a/src/core/special-vars.h +++ b/src/core/special-vars.h @@ -9,9 +9,16 @@ #define PARSE_FLAG_ESCAPE_THEME 0x08 /* if any arguments/variables contain { or } chars, escape them with % */ #define PARSE_FLAG_ONLY_ARGS 0x10 /* expand only arguments ($0 $1 etc.) but no other $variables */ +#define ALIGN_RIGHT 0x01 +#define ALIGN_CUT 0x02 +#define ALIGN_PAD 0x04 + typedef char* (*SPECIAL_HISTORY_FUNC) (const char *text, void *item, int *free_ret); +/* Cut and/or pad text so it takes exactly "align" characters on the screen */ +char *get_alignment(const char *text, int align, int flags, char pad); + /* Parse and expand text after '$' character. return value has to be g_free()'d if `free_ret' is TRUE. */ char *parse_special(char **cmd, SERVER_REC *server, void *item, diff --git a/src/core/utf8.c b/src/core/utf8.c new file mode 100644 index 00000000..29b277e1 --- /dev/null +++ b/src/core/utf8.c @@ -0,0 +1,135 @@ +/* utf8.c - Operations on UTF-8 strings. + * + * Copyright (C) 2002 Timo Sirainen + * + * Based on GLib code by + * + * Copyright (C) 1999 Tom Tromey + * Copyright (C) 2000 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "utf8.h" +#include "module.h" +#include "wcwidth.c" +/* Provide is_utf8(): */ +#include "recode.h" + +int string_advance(char const **str, int policy) +{ + if (policy == TREAT_STRING_AS_UTF8) { + gunichar c; + + c = g_utf8_get_char(*str); + *str = g_utf8_next_char(*str); + + return unichar_isprint(c) ? mk_wcwidth(c) : 1; + } else { + /* Assume TREAT_STRING_AS_BYTES: */ + *str += 1; + + return 1; + } +} + +int string_policy(const char *str) +{ + if (is_utf8()) { + if (str == NULL || g_utf8_validate(str, -1, NULL)) { + /* No string provided or valid UTF-8 string: treat as UTF-8: */ + return TREAT_STRING_AS_UTF8; + } + } + return TREAT_STRING_AS_BYTES; +} + +int string_length(const char *str, int policy) +{ + g_return_val_if_fail(str != NULL, 0); + + if (policy == -1) { + policy = string_policy(str); + } + + if (policy == TREAT_STRING_AS_UTF8) { + return g_utf8_strlen(str, -1); + } + else { + /* Assume TREAT_STRING_AS_BYTES: */ + return strlen(str); + } +} + +int string_width(const char *str, int policy) +{ + int len; + + g_return_val_if_fail(str != NULL, 0); + + if (policy == -1) { + policy = string_policy(str); + } + + len = 0; + while (*str != '\0') { + len += string_advance(&str, policy); + } + return len; +} + +int string_chars_for_width(const char *str, int policy, unsigned int n, unsigned int *bytes) +{ + const char *c, *previous_c; + int str_width, char_width, char_count; + + g_return_val_if_fail(str != NULL, -1); + + /* Handle the dummy case where n is 0: */ + if (n == 0) { + if (bytes != NULL) { + *bytes = 0; + } + return 0; + } + + if (policy == -1) { + policy = string_policy(str); + } + + /* Iterate over characters until we reach n: */ + char_count = 0; + str_width = 0; + c = str; + while (*c != '\0') { + previous_c = c; + char_width = string_advance(&c, policy); + if (str_width + char_width > n) { + /* We stepped beyond n, get one step back and stop there: */ + c = previous_c; + break; + } + ++ char_count; + str_width += char_width; + } + /* At this point, we know that char_count characters reach str_width + * columns, which is less than or equal to n. */ + + /* Optionally provide the equivalent amount of bytes: */ + if (bytes != NULL) { + *bytes = c - str; + } + return char_count; +} diff --git a/src/core/utf8.h b/src/core/utf8.h new file mode 100644 index 00000000..5bb53193 --- /dev/null +++ b/src/core/utf8.h @@ -0,0 +1,56 @@ +#ifndef __UTF8_H +#define __UTF8_H + +/* XXX I didn't check the encoding range of big5+. This is standard big5. */ +#define is_big5_los(lo) (0x40 <= (lo) && (lo) <= 0x7E) /* standard */ +#define is_big5_lox(lo) (0x80 <= (lo) && (lo) <= 0xFE) /* extended */ +#define is_big5_lo(lo) ((is_big5_los(lo) || is_big5_lox(lo))) +#define is_big5_hi(hi) (0x81 <= (hi) && (hi) <= 0xFE) +#define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo)) + +#include <glib.h> +typedef guint32 unichar; + +/* Returns width for character (0-2). */ +int mk_wcwidth(unichar c); + +/* Advance the str pointer one character further; return the number of columns + * occupied by the skipped character. + */ +int string_advance(char const **str, int policy); + +/* TREAT_STRING_AS_BYTES means strings are to be treated using strncpy, + * strnlen, etc. + * TREAT_STRING_AS_UTF8 means strings are to be treated using g_utf8_* + * functions. + */ +enum str_policy { + TREAT_STRING_AS_BYTES, + TREAT_STRING_AS_UTF8 +}; + +/* Return how the str string ought to be treated: TREAT_STRING_AS_UTF8 if the + * terminal handles UTF-8 and if the string appears to be a valid UTF-8 string; + * TREAT_STRING_AS_BYTES otherwise. + */ +int string_policy(const char *str); + +/* Return the length of the str string according to the given policy; if policy + * is -1, this function will call string_policy(). + */ +int string_length(const char *str, int policy); +/* Return the screen width of the str string according to the given policy; if + * policy is -1, this function will call string_policy(). + */ +int string_width(const char *str, int policy); + +/* Return the amount of characters from str it takes to reach n columns, or -1 if + * str is NULL. Optionally return the equivalent amount of bytes. + * If policy is -1, this function will call string_policy(). + */ +int string_chars_for_width(const char *str, int policy, unsigned int n, unsigned int *bytes); + +#define unichar_isprint(c) (((c) & ~0x80) >= 32) +#define is_utf8_leading(c) (((c) & 0xc0) != 0x80) + +#endif diff --git a/src/fe-common/core/wcwidth.c b/src/core/wcwidth.c index 80d20fa1..80d20fa1 100644 --- a/src/fe-common/core/wcwidth.c +++ b/src/core/wcwidth.c diff --git a/src/fe-common/core/Makefile.am b/src/fe-common/core/Makefile.am index e755b510..63f91fa6 100644 --- a/src/fe-common/core/Makefile.am +++ b/src/fe-common/core/Makefile.am @@ -24,8 +24,6 @@ libfe_common_core_a_SOURCES = \ fe-queries.c \ fe-server.c \ fe-settings.c \ - utf8.c \ - wcwidth.c \ formats.c \ hilight-text.c \ keyboard.c \ @@ -62,6 +60,3 @@ pkginc_fe_common_core_HEADERS = \ window-items.h \ windows-layout.h \ fe-windows.h - -noinst_HEADERS = \ - utf8.h diff --git a/src/fe-common/core/fe-channels.c b/src/fe-common/core/fe-channels.c index 046d641a..d87c4ce5 100644 --- a/src/fe-common/core/fe-channels.c +++ b/src/fe-common/core/fe-channels.c @@ -26,6 +26,8 @@ #include "levels.h" #include "misc.h" #include "settings.h" +#include "special-vars.h" +#include "utf8.h" #include "chat-protocols.h" #include "chatnets.h" @@ -323,40 +325,40 @@ static void cmd_channel_remove(const char *data) static int get_nick_length(void *data) { - return strlen(((NICK_REC *) data)->nick); + return string_width(((NICK_REC *) data)->nick, -1); } static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist) { - WINDOW_REC *window; + WINDOW_REC *window; TEXT_DEST_REC dest; GString *str; GSList *tmp; - char *format, *stripped, *prefix_format; - char *linebuf, nickmode[2] = { 0, 0 }; + char *format, *stripped, *prefix_format; + char *aligned_nick, nickmode[2] = { 0, 0 }; int *columns, cols, rows, last_col_rows, col, row, max_width; - int item_extra, linebuf_size, formatnum; + int item_extra, formatnum; window = window_find_closest(channel->server, channel->visible_name, - MSGLEVEL_CLIENTCRAP); - max_width = window->width; + MSGLEVEL_CLIENTCRAP); + max_width = window->width; - /* get the length of item extra stuff ("[ ] ") */ + /* get the length of item extra stuff ("[ ] ") */ format = format_get_text(MODULE_NAME, NULL, - channel->server, channel->visible_name, - TXT_NAMES_NICK, " ", ""); + channel->server, channel->visible_name, + TXT_NAMES_NICK, " ", ""); stripped = strip_codes(format); item_extra = strlen(stripped); - g_free(stripped); + g_free(stripped); g_free(format); if (settings_get_int("names_max_width") > 0 && settings_get_int("names_max_width") < max_width) max_width = settings_get_int("names_max_width"); - /* remove width of the timestamp from max_width */ + /* remove width of the timestamp from max_width */ format_create_dest(&dest, channel->server, channel->visible_name, - MSGLEVEL_CLIENTCRAP, NULL); + MSGLEVEL_CLIENTCRAP, NULL); format = format_get_line_start(current_theme, &dest, time(NULL)); if (format != NULL) { stripped = strip_codes(format); @@ -365,11 +367,11 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist) g_free(format); } - /* remove width of the prefix from max_width */ + /* remove width of the prefix from max_width */ prefix_format = format_get_text(MODULE_NAME, NULL, - channel->server, channel->visible_name, - TXT_NAMES_PREFIX, - channel->visible_name); + channel->server, channel->visible_name, + TXT_NAMES_PREFIX, + channel->visible_name); if (prefix_format != NULL) { stripped = strip_codes(prefix_format); max_width -= strlen(stripped); @@ -384,19 +386,18 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist) /* calculate columns */ cols = get_max_column_count(nicklist, get_nick_length, max_width, - settings_get_int("names_max_columns"), - item_extra, 3, &columns, &rows); + settings_get_int("names_max_columns"), + item_extra, 3, &columns, &rows); nicklist = columns_sort_list(nicklist, rows); - /* rows in last column */ + /* rows in last column */ last_col_rows = rows-(cols*rows-g_slist_length(nicklist)); if (last_col_rows == 0) - last_col_rows = rows; + last_col_rows = rows; str = g_string_new(prefix_format); - linebuf_size = max_width+1; linebuf = g_malloc(linebuf_size); - col = 0; row = 0; + col = 0; row = 0; for (tmp = nicklist; tmp != NULL; tmp = tmp->next) { NICK_REC *rec = tmp->data; @@ -405,48 +406,44 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist) else nickmode[0] = ' '; - if (linebuf_size < columns[col]-item_extra+1) { - linebuf_size = (columns[col]-item_extra+1)*2; - linebuf = g_realloc(linebuf, linebuf_size); - } - memset(linebuf, ' ', columns[col]-item_extra); - linebuf[columns[col]-item_extra] = '\0'; - memcpy(linebuf, rec->nick, strlen(rec->nick)); - - formatnum = rec->op ? TXT_NAMES_NICK_OP : - rec->halfop ? TXT_NAMES_NICK_HALFOP : - rec->voice ? TXT_NAMES_NICK_VOICE : - TXT_NAMES_NICK; + aligned_nick = get_alignment(rec->nick, + columns[col]-item_extra, + ALIGN_PAD, ' '); + + formatnum = rec->op ? TXT_NAMES_NICK_OP : + rec->halfop ? TXT_NAMES_NICK_HALFOP : + rec->voice ? TXT_NAMES_NICK_VOICE : + TXT_NAMES_NICK; format = format_get_text(MODULE_NAME, NULL, - channel->server, - channel->visible_name, - formatnum, nickmode, linebuf); + channel->server, + channel->visible_name, + formatnum, nickmode, aligned_nick); g_string_append(str, format); + g_free(aligned_nick); g_free(format); if (++col == cols) { printtext(channel->server, channel->visible_name, - MSGLEVEL_CLIENTCRAP, "%s", str->str); + MSGLEVEL_CLIENTCRAP, "%s", str->str); g_string_truncate(str, 0); if (prefix_format != NULL) - g_string_assign(str, prefix_format); + g_string_assign(str, prefix_format); col = 0; row++; if (row == last_col_rows) - cols--; + cols--; } } if (str->len > strlen(prefix_format)) { printtext(channel->server, channel->visible_name, - MSGLEVEL_CLIENTCRAP, "%s", str->str); + MSGLEVEL_CLIENTCRAP, "%s", str->str); } g_slist_free(nicklist); g_string_free(str, TRUE); g_free_not_null(columns); g_free_not_null(prefix_format); - g_free(linebuf); } void fe_channels_nicklist(CHANNEL_REC *channel, int flags) diff --git a/src/fe-common/core/formats.c b/src/fe-common/core/formats.c index ccf48394..3e88426f 100644 --- a/src/fe-common/core/formats.c +++ b/src/fe-common/core/formats.c @@ -420,33 +420,17 @@ void format_create_dest_tag(TEXT_DEST_REC *dest, void *server, window_find_closest(server, target, level); } -static int advance (char const **str, gboolean utf8) -{ - if (utf8) { - gunichar c; - - c = g_utf8_get_char(*str); - *str = g_utf8_next_char(*str); - - return unichar_isprint(c) ? mk_wcwidth(c) : 1; - } else { - *str += 1; - - return 1; - } -} - /* Return length of text part in string (ie. without % codes) */ int format_get_length(const char *str) { GString *tmp; int len; - gboolean utf8; + int utf8; int adv = 0; g_return_val_if_fail(str != NULL, 0); - utf8 = is_utf8() && g_utf8_validate(str, -1, NULL); + utf8 = string_policy(str); tmp = g_string_new(NULL); len = 0; @@ -465,7 +449,7 @@ int format_get_length(const char *str) len++; } - len += advance(&str, utf8); + len += string_advance(&str, utf8); } g_string_free(tmp, TRUE); @@ -480,12 +464,12 @@ int format_real_length(const char *str, int len) GString *tmp; const char *start; const char *oldstr; - gboolean utf8; + int utf8; int adv = 0; g_return_val_if_fail(str != NULL, 0); g_return_val_if_fail(len >= 0, 0); - utf8 = is_utf8() && g_utf8_validate(str, -1, NULL); + utf8 = string_policy(str); start = str; tmp = g_string_new(NULL); @@ -507,7 +491,7 @@ int format_real_length(const char *str, int len) } oldstr = str; - len -= advance(&str, utf8); + len -= string_advance(&str, utf8); if (len < 0) str = oldstr; } diff --git a/src/fe-common/core/module.h b/src/fe-common/core/module.h index 51b61b3e..db712ec7 100644 --- a/src/fe-common/core/module.h +++ b/src/fe-common/core/module.h @@ -2,7 +2,7 @@ #define MODULE_NAME "fe-common/core" -typedef guint32 unichar; +#include "utf8.h" typedef struct { time_t time; char *nick; diff --git a/src/fe-common/core/utf8.c b/src/fe-common/core/utf8.c deleted file mode 100644 index 2d07ea8e..00000000 --- a/src/fe-common/core/utf8.c +++ /dev/null @@ -1,26 +0,0 @@ -/* utf8.c - Operations on UTF-8 strings. - * - * Copyright (C) 2002 Timo Sirainen - * - * Based on GLib code by - * - * Copyright (C) 1999 Tom Tromey - * Copyright (C) 2000 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include "module.h" - diff --git a/src/fe-common/core/utf8.h b/src/fe-common/core/utf8.h deleted file mode 100644 index 3c15dc7d..00000000 --- a/src/fe-common/core/utf8.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef __UTF8_H -#define __UTF8_H - -/* XXX I didn't check the encoding range of big5+. This is standard big5. */ -#define is_big5_los(lo) (0x40 <= (lo) && (lo) <= 0x7E) /* standard */ -#define is_big5_lox(lo) (0x80 <= (lo) && (lo) <= 0xFE) /* extended */ -#define is_big5_lo(lo) ((is_big5_los(lo) || is_big5_lox(lo))) -#define is_big5_hi(hi) (0x81 <= (hi) && (hi) <= 0xFE) -#define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo)) - -/* Returns width for character (0-2). */ -int mk_wcwidth(unichar c); - -#define unichar_isprint(c) (((c) & ~0x80) >= 32) -#define is_utf8_leading(c) (((c) & 0xc0) != 0x80) - -#endif diff --git a/src/fe-text/gui-entry.c b/src/fe-text/gui-entry.c index 31fe0e1e..82645a8e 100644 --- a/src/fe-text/gui-entry.c +++ b/src/fe-text/gui-entry.c @@ -366,22 +366,8 @@ static int scrlen_str(const char *str) char *stripped; g_return_val_if_fail(str != NULL, 0); - str = stripped = strip_codes(str); - if (is_utf8() && g_utf8_validate(str, -1, NULL)) { - - while (*str != '\0') { - gunichar c; - - c = g_utf8_get_char(str); - str = g_utf8_next_char(str); - - len += unichar_isprint(c) ? mk_wcwidth(c) : 1; - } - - } else { - len = strlen(str); - } - + stripped = strip_codes(str); + len = string_width(stripped, -1); g_free(stripped); return len; } diff --git a/src/fe-text/term.h b/src/fe-text/term.h index 9b726d82..0c7847f6 100644 --- a/src/fe-text/term.h +++ b/src/fe-text/term.h @@ -27,7 +27,7 @@ typedef struct _TERM_WINDOW TERM_WINDOW; #define TERM_TYPE_UTF8 1 #define TERM_TYPE_BIG5 2 -typedef guint32 unichar; +#include "utf8.h" extern TERM_WINDOW *root_window; extern int term_width, term_height; |