diff options
author | Sébastien Helleu <flashcode@flashtux.org> | 2022-12-03 11:40:30 +0100 |
---|---|---|
committer | Sébastien Helleu <flashcode@flashtux.org> | 2022-12-10 16:05:14 +0100 |
commit | e5cbbd781d814e321845598775f594f0f808e18e (patch) | |
tree | 32dde8ec949ddaa0c9d239338e0a0b729d73c7d6 | |
parent | 0e6677fbcbb81e972e5f210e5dd7a2186fb7ba66 (diff) | |
download | weechat-e5cbbd781d814e321845598775f594f0f808e18e.zip |
core: optimize and fix function utf8_strlen_screen with non printable chars
When there non printable chars, the return of the function was 1.
For example utf8_strlen_screen("abc\x01") now returns 4 instead of 1.
In addition the function has been optimized to not use the `mbstowcs` function
which is slow; result is up to 15% faster.
-rw-r--r-- | src/core/wee-utf8.c | 101 | ||||
-rw-r--r-- | tests/unit/core/test-core-utf8.cpp | 16 |
2 files changed, 40 insertions, 77 deletions
diff --git a/src/core/wee-utf8.c b/src/core/wee-utf8.c index 868ed5a8c..78f717817 100644 --- a/src/core/wee-utf8.c +++ b/src/core/wee-utf8.c @@ -481,6 +481,29 @@ utf8_strnlen (const char *string, int bytes) } /* + * Gets number of chars needed on screen to display the UTF-8 char. + * + * Returns the number of chars (>= 0). + */ + +int +utf8_char_size_screen (const char *string) +{ + int width; + + if (!string) + return 0; + + if (string[0] == '\t') + return CONFIG_INTEGER(config_look_tab_width); + + width = wcwidth ((wchar_t)utf8_char_int (string)); + + /* non printable chars are displayed with a space (so size = 1) */ + return (width >= 0) ? width : 1; +} + +/* * Gets number of chars needed on screen to display the UTF-8 string. * * Returns the number of chars (>= 0). @@ -489,61 +512,24 @@ utf8_strnlen (const char *string, int bytes) int utf8_strlen_screen (const char *string) { - int length, num_char, add_for_tab; - wchar_t *alloc_wstring, *ptr_wstring, wstring[4+2]; + int size_screen; const char *ptr_string; - if (!string || !string[0]) + if (!string) return 0; if (!local_utf8) return utf8_strlen (string); - alloc_wstring = NULL; - - if (!string[1] || !string[2] || !string[3] || !string[4]) - { - /* optimization for max 4 chars: no malloc */ - num_char = 4 + 1; - ptr_wstring = wstring; - } - else - { - num_char = mbstowcs (NULL, string, 0) + 1; - alloc_wstring = malloc ((num_char + 1) * sizeof (alloc_wstring[0])); - if (!alloc_wstring) - return utf8_strlen (string); - ptr_wstring = alloc_wstring; - } - - if (mbstowcs (ptr_wstring, string, num_char) != (size_t)(-1)) - { - length = wcswidth (ptr_wstring, num_char); - /* - * if the char is non-printable, wcswidth returns -1 - * (for example the length of the snowman without snow (U+26C4) == -1) - * => in this case, consider the length is 1, to prevent any display bug - */ - if (length < 0) - length = 1; - } - else - length = utf8_strlen (string); - - if (alloc_wstring) - free (alloc_wstring); - - add_for_tab = CONFIG_INTEGER(config_look_tab_width) - 1; - if (add_for_tab > 0) + size_screen = 0; + ptr_string = string; + while (ptr_string && ptr_string[0]) { - for (ptr_string = string; ptr_string[0]; ptr_string++) - { - if (ptr_string[0] == '\t') - length += add_for_tab; - } + size_screen += utf8_char_size_screen (ptr_string); + ptr_string = utf8_next_char (ptr_string); } - return length; + return size_screen; } /* @@ -650,31 +636,6 @@ utf8_charcasecmp_range (const char *string1, const char *string2, int range) } /* - * Gets number of chars needed on screen to display the UTF-8 char. - * - * Returns the number of chars (>= 0). - */ - -int -utf8_char_size_screen (const char *string) -{ - int char_size; - char utf_char[16]; - - if (!string) - return 0; - - char_size = utf8_char_size (string); - if (char_size == 0) - return 0; - - memcpy (utf_char, string, char_size); - utf_char[char_size] = '\0'; - - return utf8_strlen_screen (utf_char); -} - -/* * Moves forward N chars in an UTF-8 string. * * Returns pointer to the new position in string. diff --git a/tests/unit/core/test-core-utf8.cpp b/tests/unit/core/test-core-utf8.cpp index 14e397402..0bd8e015b 100644 --- a/tests/unit/core/test-core-utf8.cpp +++ b/tests/unit/core/test-core-utf8.cpp @@ -495,9 +495,9 @@ TEST(CoreUtf8, Size) /* ël as iso-8859-15: invalid UTF-8 */ LONGS_EQUAL(1, utf8_char_size_screen ("\xebl")); /* ëlm as iso-8859-15: invalid UTF-8 */ - LONGS_EQUAL(1, utf8_char_size_screen ("\xeblm")); + LONGS_EQUAL(2, utf8_char_size_screen ("\xeblm")); /* ëlmn as iso-8859-15: invalid UTF-8 */ - LONGS_EQUAL(1, utf8_char_size_screen ("\xeblmn")); + LONGS_EQUAL(2, utf8_char_size_screen ("\xeblmn")); /* length of string (in chars) */ LONGS_EQUAL(0, utf8_strlen (NULL)); @@ -530,16 +530,18 @@ TEST(CoreUtf8, Size) LONGS_EQUAL(1, utf8_strlen_screen ("\x7f")); LONGS_EQUAL(1, utf8_strlen_screen ("\x01")); LONGS_EQUAL(4, utf8_strlen_screen (UTF8_NOEL_VALID)); + LONGS_EQUAL(4, utf8_strlen_screen ("abc\x01")); + LONGS_EQUAL(8, utf8_strlen_screen ("a" "\x01" UTF8_NOEL_VALID "\x02" "b")); LONGS_EQUAL(1, utf8_strlen_screen (UNICODE_SOFT_HYPHEN)); - LONGS_EQUAL(3, utf8_strlen_screen ("a" UNICODE_SOFT_HYPHEN "b")); + LONGS_EQUAL(5, utf8_strlen_screen ("a" "\x01" UNICODE_SOFT_HYPHEN "\x02" "b")); LONGS_EQUAL(0, utf8_strlen_screen (UNICODE_ZERO_WIDTH_SPACE)); - LONGS_EQUAL(2, utf8_strlen_screen ("a" UNICODE_ZERO_WIDTH_SPACE "b")); + LONGS_EQUAL(4, utf8_strlen_screen ("a" "\x01" UNICODE_ZERO_WIDTH_SPACE "\x02" "b")); LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_SNOWMAN)); - LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_SNOWMAN "b")); + LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_SNOWMAN "\x02" "b")); LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_CJK_YELLOW)); - LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_CJK_YELLOW "b")); + LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_CJK_YELLOW "\x02" "b")); LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_HAN_CHAR)); - LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_HAN_CHAR "b")); + LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_HAN_CHAR "\x02" "b")); /* length of Tabulation */ LONGS_EQUAL(1, utf8_strlen_screen ("\t")); |