aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil/str_util.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-05-04 15:20:24 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-05-04 15:20:24 +0100
commit798bd5e86f7941dd87884a4ea15c9a15179ac811 (patch)
tree94823482c4b436b2e5cc4186b495c2545a4ee7ec /src/libutil/str_util.c
parent6bb2daddb07642bbd5acb6ae8e5070d7eba49352 (diff)
downloadrspamd-798bd5e86f7941dd87884a4ea15c9a15179ac811.tar.gz
rspamd-798bd5e86f7941dd87884a4ea15c9a15179ac811.zip
[Feature] Improve levenshtein distance function
- Use g_malloc instead of alloca - Allow to set variable replacement cost - Update lua util.levenshtein_distance
Diffstat (limited to 'src/libutil/str_util.c')
-rw-r--r--src/libutil/str_util.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 457e1fe5b..7d40b15fa 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -963,13 +963,15 @@ rspamd_decode_url (gchar *dst, const gchar *src, gsize size)
gint
rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
- const gchar *s2, gsize s2len)
+ const gchar *s2, gsize s2len,
+ guint replace_cost)
{
guint x, y, lastdiag, olddiag;
gchar c1, c2;
guint *column;
gint eq;
static const guint max_cmp = 8192;
+ gint ret;
g_assert (s1 != NULL);
g_assert (s2 != NULL);
@@ -986,7 +988,7 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
return 0;
}
- column = g_alloca ((s1len + 1) * sizeof (guint));
+ column = g_malloc0 ((s1len + 1) * sizeof (guint));
for (y = 1; y <= s1len; y++) {
column[y] = y;
@@ -999,14 +1001,17 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
olddiag = column[y];
c1 = s1[y - 1];
c2 = s2[x - 1];
- eq = (c1 == c2) ? 0 : 1;
+ eq = (c1 == c2) ? 0 : replace_cost;
column[y] = MIN3 (column[y] + 1, column[y - 1] + 1,
lastdiag + (eq));
lastdiag = olddiag;
}
}
- return column[s1len];
+ ret = column[s1len];
+ g_free (column);
+
+ return ret;
}
GString *