summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSébastien Helleu <flashcode@flashtux.org>2022-12-03 11:40:30 +0100
committerSébastien Helleu <flashcode@flashtux.org>2022-12-10 16:05:14 +0100
commite5cbbd781d814e321845598775f594f0f808e18e (patch)
tree32dde8ec949ddaa0c9d239338e0a0b729d73c7d6
parent0e6677fbcbb81e972e5f210e5dd7a2186fb7ba66 (diff)
downloadweechat-e5cbbd781d814e321845598775f594f0f808e18e.zip
core: optimize and fix function utf8_strlen_screen with non printable chars
When there non printable chars, the return of the function was 1. For example utf8_strlen_screen("abc\x01") now returns 4 instead of 1. In addition the function has been optimized to not use the `mbstowcs` function which is slow; result is up to 15% faster.
-rw-r--r--src/core/wee-utf8.c101
-rw-r--r--tests/unit/core/test-core-utf8.cpp16
2 files changed, 40 insertions, 77 deletions
diff --git a/src/core/wee-utf8.c b/src/core/wee-utf8.c
index 868ed5a8c..78f717817 100644
--- a/src/core/wee-utf8.c
+++ b/src/core/wee-utf8.c
@@ -481,6 +481,29 @@ utf8_strnlen (const char *string, int bytes)
}
/*
+ * Gets number of chars needed on screen to display the UTF-8 char.
+ *
+ * Returns the number of chars (>= 0).
+ */
+
+int
+utf8_char_size_screen (const char *string)
+{
+ int width;
+
+ if (!string)
+ return 0;
+
+ if (string[0] == '\t')
+ return CONFIG_INTEGER(config_look_tab_width);
+
+ width = wcwidth ((wchar_t)utf8_char_int (string));
+
+ /* non printable chars are displayed with a space (so size = 1) */
+ return (width >= 0) ? width : 1;
+}
+
+/*
* Gets number of chars needed on screen to display the UTF-8 string.
*
* Returns the number of chars (>= 0).
@@ -489,61 +512,24 @@ utf8_strnlen (const char *string, int bytes)
int
utf8_strlen_screen (const char *string)
{
- int length, num_char, add_for_tab;
- wchar_t *alloc_wstring, *ptr_wstring, wstring[4+2];
+ int size_screen;
const char *ptr_string;
- if (!string || !string[0])
+ if (!string)
return 0;
if (!local_utf8)
return utf8_strlen (string);
- alloc_wstring = NULL;
-
- if (!string[1] || !string[2] || !string[3] || !string[4])
- {
- /* optimization for max 4 chars: no malloc */
- num_char = 4 + 1;
- ptr_wstring = wstring;
- }
- else
- {
- num_char = mbstowcs (NULL, string, 0) + 1;
- alloc_wstring = malloc ((num_char + 1) * sizeof (alloc_wstring[0]));
- if (!alloc_wstring)
- return utf8_strlen (string);
- ptr_wstring = alloc_wstring;
- }
-
- if (mbstowcs (ptr_wstring, string, num_char) != (size_t)(-1))
- {
- length = wcswidth (ptr_wstring, num_char);
- /*
- * if the char is non-printable, wcswidth returns -1
- * (for example the length of the snowman without snow (U+26C4) == -1)
- * => in this case, consider the length is 1, to prevent any display bug
- */
- if (length < 0)
- length = 1;
- }
- else
- length = utf8_strlen (string);
-
- if (alloc_wstring)
- free (alloc_wstring);
-
- add_for_tab = CONFIG_INTEGER(config_look_tab_width) - 1;
- if (add_for_tab > 0)
+ size_screen = 0;
+ ptr_string = string;
+ while (ptr_string && ptr_string[0])
{
- for (ptr_string = string; ptr_string[0]; ptr_string++)
- {
- if (ptr_string[0] == '\t')
- length += add_for_tab;
- }
+ size_screen += utf8_char_size_screen (ptr_string);
+ ptr_string = utf8_next_char (ptr_string);
}
- return length;
+ return size_screen;
}
/*
@@ -650,31 +636,6 @@ utf8_charcasecmp_range (const char *string1, const char *string2, int range)
}
/*
- * Gets number of chars needed on screen to display the UTF-8 char.
- *
- * Returns the number of chars (>= 0).
- */
-
-int
-utf8_char_size_screen (const char *string)
-{
- int char_size;
- char utf_char[16];
-
- if (!string)
- return 0;
-
- char_size = utf8_char_size (string);
- if (char_size == 0)
- return 0;
-
- memcpy (utf_char, string, char_size);
- utf_char[char_size] = '\0';
-
- return utf8_strlen_screen (utf_char);
-}
-
-/*
* Moves forward N chars in an UTF-8 string.
*
* Returns pointer to the new position in string.
diff --git a/tests/unit/core/test-core-utf8.cpp b/tests/unit/core/test-core-utf8.cpp
index 14e397402..0bd8e015b 100644
--- a/tests/unit/core/test-core-utf8.cpp
+++ b/tests/unit/core/test-core-utf8.cpp
@@ -495,9 +495,9 @@ TEST(CoreUtf8, Size)
/* ël as iso-8859-15: invalid UTF-8 */
LONGS_EQUAL(1, utf8_char_size_screen ("\xebl"));
/* ëlm as iso-8859-15: invalid UTF-8 */
- LONGS_EQUAL(1, utf8_char_size_screen ("\xeblm"));
+ LONGS_EQUAL(2, utf8_char_size_screen ("\xeblm"));
/* ëlmn as iso-8859-15: invalid UTF-8 */
- LONGS_EQUAL(1, utf8_char_size_screen ("\xeblmn"));
+ LONGS_EQUAL(2, utf8_char_size_screen ("\xeblmn"));
/* length of string (in chars) */
LONGS_EQUAL(0, utf8_strlen (NULL));
@@ -530,16 +530,18 @@ TEST(CoreUtf8, Size)
LONGS_EQUAL(1, utf8_strlen_screen ("\x7f"));
LONGS_EQUAL(1, utf8_strlen_screen ("\x01"));
LONGS_EQUAL(4, utf8_strlen_screen (UTF8_NOEL_VALID));
+ LONGS_EQUAL(4, utf8_strlen_screen ("abc\x01"));
+ LONGS_EQUAL(8, utf8_strlen_screen ("a" "\x01" UTF8_NOEL_VALID "\x02" "b"));
LONGS_EQUAL(1, utf8_strlen_screen (UNICODE_SOFT_HYPHEN));
- LONGS_EQUAL(3, utf8_strlen_screen ("a" UNICODE_SOFT_HYPHEN "b"));
+ LONGS_EQUAL(5, utf8_strlen_screen ("a" "\x01" UNICODE_SOFT_HYPHEN "\x02" "b"));
LONGS_EQUAL(0, utf8_strlen_screen (UNICODE_ZERO_WIDTH_SPACE));
- LONGS_EQUAL(2, utf8_strlen_screen ("a" UNICODE_ZERO_WIDTH_SPACE "b"));
+ LONGS_EQUAL(4, utf8_strlen_screen ("a" "\x01" UNICODE_ZERO_WIDTH_SPACE "\x02" "b"));
LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_SNOWMAN));
- LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_SNOWMAN "b"));
+ LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_SNOWMAN "\x02" "b"));
LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_CJK_YELLOW));
- LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_CJK_YELLOW "b"));
+ LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_CJK_YELLOW "\x02" "b"));
LONGS_EQUAL(2, utf8_strlen_screen (UNICODE_HAN_CHAR));
- LONGS_EQUAL(4, utf8_strlen_screen ("a" UNICODE_HAN_CHAR "b"));
+ LONGS_EQUAL(6, utf8_strlen_screen ("a" "\x01" UNICODE_HAN_CHAR "\x02" "b"));
/* length of Tabulation */
LONGS_EQUAL(1, utf8_strlen_screen ("\t"));