summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/wee-string.c57
-rw-r--r--src/core/wee-string.h2
-rw-r--r--tests/unit/core/test-core-string.cpp54
3 files changed, 113 insertions, 0 deletions
diff --git a/src/core/wee-string.c b/src/core/wee-string.c
index 368cee7d2..abe7ccd17 100644
--- a/src/core/wee-string.c
+++ b/src/core/wee-string.c
@@ -63,6 +63,7 @@
#define HEX2DEC(c) (((c >= 'a') && (c <= 'f')) ? c - 'a' + 10 : \
((c >= 'A') && (c <= 'F')) ? c - 'A' + 10 : \
c - '0')
+#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
struct t_hashtable *string_hashtable_shared = NULL;
@@ -3960,6 +3961,62 @@ string_input_for_buffer (const char *string)
}
/*
+ * Returns the distance between two strings using the Levenshtein algorithm.
+ * See: https://en.wikipedia.org/wiki/Levenshtein_distance
+ */
+
+int
+string_levenshtein (const char *string1, const char *string2,
+ int case_sensitive)
+{
+ int x, y, length1, length2, last_diag, old_diag;
+ wint_t char1, char2;
+ const char *ptr_str1, *ptr_str2;
+
+ length1 = (string1) ? utf8_strlen (string1) : 0;
+ length2 = (string2) ? utf8_strlen (string2) : 0;
+
+ if (length1 == 0)
+ return length2;
+ if (length2 == 0)
+ return length1;
+
+ int column[length1 + 1];
+
+ for (y = 1; y <= length1; y++)
+ {
+ column[y] = y;
+ }
+
+ ptr_str2 = string2;
+
+ for (x = 1; x <= length2; x++)
+ {
+ char2 = (case_sensitive) ?
+ (wint_t)utf8_char_int (ptr_str2) :
+ towlower (utf8_char_int (ptr_str2));
+ column[0] = x;
+ ptr_str1 = string1;
+ for (y = 1, last_diag = x - 1; y <= length1; y++)
+ {
+ char1 = (case_sensitive) ?
+ (wint_t)utf8_char_int (ptr_str1) :
+ towlower (utf8_char_int (ptr_str1));
+ old_diag = column[y];
+ column[y] = MIN3(
+ column[y] + 1,
+ column[y - 1] + 1,
+ last_diag + ((char1 == char2) ? 0 : 1));
+ last_diag = old_diag;
+ ptr_str1 = utf8_next_char (ptr_str1);
+ }
+ ptr_str2 = utf8_next_char (ptr_str2);
+ }
+
+ return column[length1];
+}
+
+/*
* Replaces ${vars} using a callback that returns replacement value (this value
* must be newly allocated because it will be freed in this function).
*
diff --git a/src/core/wee-string.h b/src/core/wee-string.h
index 785e41bac..641855f7f 100644
--- a/src/core/wee-string.h
+++ b/src/core/wee-string.h
@@ -134,6 +134,8 @@ extern char *string_hex_dump (const char *data, int data_size,
const char *prefix, const char *suffix);
extern int string_is_command_char (const char *string);
extern const char *string_input_for_buffer (const char *string);
+extern int string_levenshtein (const char *string1, const char *string2,
+ int case_sensitive);
extern char *string_replace_with_callback (const char *string,
const char *prefix,
const char *suffix,
diff --git a/tests/unit/core/test-core-string.cpp b/tests/unit/core/test-core-string.cpp
index 7c14345d9..671a1327c 100644
--- a/tests/unit/core/test-core-string.cpp
+++ b/tests/unit/core/test-core-string.cpp
@@ -2542,6 +2542,60 @@ TEST(CoreString, InputForBuffer)
/*
* Tests functions:
+ * string_levenshtein
+ */
+
+TEST(CoreString, Levenshtein)
+{
+ LONGS_EQUAL(0, string_levenshtein (NULL, NULL, 1));
+ LONGS_EQUAL(0, string_levenshtein ("", "", 1));
+ LONGS_EQUAL(3, string_levenshtein (NULL, "abc", 1));
+ LONGS_EQUAL(3, string_levenshtein ("abc", NULL, 1));
+ LONGS_EQUAL(3, string_levenshtein ("", "abc", 1));
+ LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
+
+ LONGS_EQUAL(0, string_levenshtein ("abc", "abc", 1));
+ LONGS_EQUAL(1, string_levenshtein ("abc", "ab", 1));
+ LONGS_EQUAL(1, string_levenshtein ("ab", "abc", 1));
+ LONGS_EQUAL(2, string_levenshtein ("abc", "a", 1));
+ LONGS_EQUAL(2, string_levenshtein ("a", "abc", 1));
+ LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
+ LONGS_EQUAL(3, string_levenshtein ("", "abc", 1));
+
+ LONGS_EQUAL(3, string_levenshtein ("abc", "ABC", 1));
+ LONGS_EQUAL(3, string_levenshtein ("abc", "AB", 1));
+ LONGS_EQUAL(3, string_levenshtein ("ab", "ABC", 1));
+ LONGS_EQUAL(3, string_levenshtein ("abc", "A", 1));
+ LONGS_EQUAL(3, string_levenshtein ("a", "ABC", 1));
+ LONGS_EQUAL(3, string_levenshtein ("abc", "", 1));
+ LONGS_EQUAL(3, string_levenshtein ("", "ABC", 1));
+
+ LONGS_EQUAL(0, string_levenshtein ("abc", "ABC", 0));
+ LONGS_EQUAL(1, string_levenshtein ("abc", "AB", 0));
+ LONGS_EQUAL(1, string_levenshtein ("ab", "ABC", 0));
+ LONGS_EQUAL(2, string_levenshtein ("abc", "A", 0));
+ LONGS_EQUAL(2, string_levenshtein ("a", "ABC", 0));
+ LONGS_EQUAL(3, string_levenshtein ("abc", "", 0));
+ LONGS_EQUAL(3, string_levenshtein ("", "ABC", 0));
+
+ LONGS_EQUAL(2, string_levenshtein ("response", "respond", 1));
+ LONGS_EQUAL(4, string_levenshtein ("response", "resist", 1));
+
+ LONGS_EQUAL(2, string_levenshtein ("response", "responsive", 1));
+
+ /* with UTF-8 chars */
+ LONGS_EQUAL(1, string_levenshtein ("é", "É", 1));
+ LONGS_EQUAL(0, string_levenshtein ("é", "É", 0));
+ LONGS_EQUAL(1, string_levenshtein ("é", "à", 1));
+ LONGS_EQUAL(1, string_levenshtein ("é", "à", 0));
+ LONGS_EQUAL(1, string_levenshtein ("té", "to", 1));
+ LONGS_EQUAL(1, string_levenshtein ("noël", "noel", 1));
+ LONGS_EQUAL(2, string_levenshtein ("bôô", "boo", 1));
+ LONGS_EQUAL(2, string_levenshtein ("界世", "こん", 1));
+}
+
+/*
+ * Tests functions:
* string_get_priority_and_name
*/