diff options
-rw-r--r-- | src/core/wee-string.c | 57 | ||||
-rw-r--r-- | src/core/wee-string.h | 2 | ||||
-rw-r--r-- | tests/unit/core/test-core-string.cpp | 54 |
3 files changed, 113 insertions, 0 deletions
diff --git a/src/core/wee-string.c b/src/core/wee-string.c index 368cee7d2..abe7ccd17 100644 --- a/src/core/wee-string.c +++ b/src/core/wee-string.c @@ -63,6 +63,7 @@ #define HEX2DEC(c) (((c >= 'a') && (c <= 'f')) ? c - 'a' + 10 : \ ((c >= 'A') && (c <= 'F')) ? c - 'A' + 10 : \ c - '0') +#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) struct t_hashtable *string_hashtable_shared = NULL; @@ -3960,6 +3961,62 @@ string_input_for_buffer (const char *string) } /* + * Returns the distance between two strings using the Levenshtein algorithm. + * See: https://en.wikipedia.org/wiki/Levenshtein_distance + */ + +int +string_levenshtein (const char *string1, const char *string2, + int case_sensitive) +{ + int x, y, length1, length2, last_diag, old_diag; + wint_t char1, char2; + const char *ptr_str1, *ptr_str2; + + length1 = (string1) ? utf8_strlen (string1) : 0; + length2 = (string2) ? utf8_strlen (string2) : 0; + + if (length1 == 0) + return length2; + if (length2 == 0) + return length1; + + int column[length1 + 1]; + + for (y = 1; y <= length1; y++) + { + column[y] = y; + } + + ptr_str2 = string2; + + for (x = 1; x <= length2; x++) + { + char2 = (case_sensitive) ? + (wint_t)utf8_char_int (ptr_str2) : + towlower (utf8_char_int (ptr_str2)); + column[0] = x; + ptr_str1 = string1; + for (y = 1, last_diag = x - 1; y <= length1; y++) + { + char1 = (case_sensitive) ? + (wint_t)utf8_char_int (ptr_str1) : + towlower (utf8_char_int (ptr_str1)); + old_diag = column[y]; + column[y] = MIN3( + column[y] + 1, + column[y - 1] + 1, + last_diag + ((char1 == char2) ? 0 : 1)); + last_diag = old_diag; + ptr_str1 = utf8_next_char (ptr_str1); + } + ptr_str2 = utf8_next_char (ptr_str2); + } + + return column[length1]; +} + +/* * Replaces ${vars} using a callback that returns replacement value (this value * must be newly allocated because it will be freed in this function). * diff --git a/src/core/wee-string.h b/src/core/wee-string.h index 785e41bac..641855f7f 100644 --- a/src/core/wee-string.h +++ b/src/core/wee-string.h @@ -134,6 +134,8 @@ extern char *string_hex_dump (const char *data, int data_size, const char *prefix, const char *suffix); extern int string_is_command_char (const char *string); extern const char *string_input_for_buffer (const char *string); +extern int string_levenshtein (const char *string1, const char *string2, + int case_sensitive); extern char *string_replace_with_callback (const char *string, const char *prefix, const char *suffix, diff --git a/tests/unit/core/test-core-string.cpp b/tests/unit/core/test-core-string.cpp index 7c14345d9..671a1327c 100644 --- a/tests/unit/core/test-core-string.cpp +++ b/tests/unit/core/test-core-string.cpp @@ -2542,6 +2542,60 @@ TEST(CoreString, InputForBuffer) /* * Tests functions: + * string_levenshtein + */ + +TEST(CoreString, Levenshtein) +{ + LONGS_EQUAL(0, string_levenshtein (NULL, NULL, 1)); + LONGS_EQUAL(0, string_levenshtein ("", "", 1)); + LONGS_EQUAL(3, string_levenshtein (NULL, "abc", 1)); + LONGS_EQUAL(3, string_levenshtein ("abc", NULL, 1)); + LONGS_EQUAL(3, string_levenshtein ("", "abc", 1)); + LONGS_EQUAL(3, string_levenshtein ("abc", "", 1)); + + LONGS_EQUAL(0, string_levenshtein ("abc", "abc", 1)); + LONGS_EQUAL(1, string_levenshtein ("abc", "ab", 1)); + LONGS_EQUAL(1, string_levenshtein ("ab", "abc", 1)); + LONGS_EQUAL(2, string_levenshtein ("abc", "a", 1)); + LONGS_EQUAL(2, string_levenshtein ("a", "abc", 1)); + LONGS_EQUAL(3, string_levenshtein ("abc", "", 1)); + LONGS_EQUAL(3, string_levenshtein ("", "abc", 1)); + + LONGS_EQUAL(3, string_levenshtein ("abc", "ABC", 1)); + LONGS_EQUAL(3, string_levenshtein ("abc", "AB", 1)); + LONGS_EQUAL(3, string_levenshtein ("ab", "ABC", 1)); + LONGS_EQUAL(3, string_levenshtein ("abc", "A", 1)); + LONGS_EQUAL(3, string_levenshtein ("a", "ABC", 1)); + LONGS_EQUAL(3, string_levenshtein ("abc", "", 1)); + LONGS_EQUAL(3, string_levenshtein ("", "ABC", 1)); + + LONGS_EQUAL(0, string_levenshtein ("abc", "ABC", 0)); + LONGS_EQUAL(1, string_levenshtein ("abc", "AB", 0)); + LONGS_EQUAL(1, string_levenshtein ("ab", "ABC", 0)); + LONGS_EQUAL(2, string_levenshtein ("abc", "A", 0)); + LONGS_EQUAL(2, string_levenshtein ("a", "ABC", 0)); + LONGS_EQUAL(3, string_levenshtein ("abc", "", 0)); + LONGS_EQUAL(3, string_levenshtein ("", "ABC", 0)); + + LONGS_EQUAL(2, string_levenshtein ("response", "respond", 1)); + LONGS_EQUAL(4, string_levenshtein ("response", "resist", 1)); + + LONGS_EQUAL(2, string_levenshtein ("response", "responsive", 1)); + + /* with UTF-8 chars */ + LONGS_EQUAL(1, string_levenshtein ("é", "É", 1)); + LONGS_EQUAL(0, string_levenshtein ("é", "É", 0)); + LONGS_EQUAL(1, string_levenshtein ("é", "à", 1)); + LONGS_EQUAL(1, string_levenshtein ("é", "à", 0)); + LONGS_EQUAL(1, string_levenshtein ("té", "to", 1)); + LONGS_EQUAL(1, string_levenshtein ("noël", "noel", 1)); + LONGS_EQUAL(2, string_levenshtein ("bôô", "boo", 1)); + LONGS_EQUAL(2, string_levenshtein ("界世", "こん", 1)); +} + +/* + * Tests functions: * string_get_priority_and_name */ |