aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-08-20 14:19:55 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-08-20 14:19:55 +0100
commita696a5e1c89118c44ff88ee15c3a662d07015119 (patch)
tree382cdcf87b82a9f4b0da947f20378709bf4d65c7 /src/libutil
parent04764a1d23491bb1029d090fd011f072e5326f24 (diff)
downloadrspamd-a696a5e1c89118c44ff88ee15c3a662d07015119.tar.gz
rspamd-a696a5e1c89118c44ff88ee15c3a662d07015119.zip
Add function to calculate lev distance between strings
Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/str_util.c49
-rw-r--r--src/libutil/str_util.h11
2 files changed, 60 insertions, 0 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 780339e37..00ef4e4ed 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -795,3 +795,52 @@ rspamd_decode_url (gchar *dst, const gchar *src, gsize size)
return (d - dst);
}
+#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
+
+gint
+rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
+ const gchar *s2, gsize s2len)
+{
+ guint x, y, lastdiag, olddiag;
+ gchar c1, c2;
+ guint *column;
+ gint eq;
+ static const guint max_cmp = 8192;
+
+ g_assert (s1 != NULL);
+ g_assert (s2 != NULL);
+
+ if (s1len == 0) {
+ s1len = strlen (s1);
+ }
+ if (s2len == 0) {
+ s2len = strlen (s2);
+ }
+
+ if (MAX(s1len, s2len) > max_cmp) {
+ /* Cannot compare too many characters */
+ return 0;
+ }
+
+ column = g_alloca ((s1len + 1) * sizeof (guint));
+
+ for (y = 1; y <= s1len; y++) {
+ column[y] = y;
+ }
+
+ for (x = 1; x <= s2len; x++) {
+ column[0] = x;
+
+ for (y = 1, lastdiag = x - 1; y <= s1len; y++) {
+ olddiag = column[y];
+ c1 = s1[y - 1];
+ c2 = s2[x - 1];
+ eq = (c1 == c2) ? 0 : 1;
+ column[y] = MIN3 (column[y] + 1, column[y - 1] + 1,
+ lastdiag + (eq));
+ lastdiag = olddiag;
+ }
+ }
+
+ return column[s1len];
+}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 986fc7f03..e07c51701 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -141,4 +141,15 @@ gsize rspamd_decode_url (gchar *dst, const gchar *src, gsize size);
# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
#endif
+/**
+ * Return levenstein distance between two strings
+ * @param s1
+ * @param s1len
+ * @param s2
+ * @param s2len
+ * @return
+ */
+gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
+ const gchar *s2, gsize s2len);
+
#endif /* SRC_LIBUTIL_STR_UTIL_H_ */