diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-05-04 15:20:24 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-05-04 15:20:24 +0100 |
commit | 798bd5e86f7941dd87884a4ea15c9a15179ac811 (patch) | |
tree | 94823482c4b436b2e5cc4186b495c2545a4ee7ec /src/libutil/str_util.c | |
parent | 6bb2daddb07642bbd5acb6ae8e5070d7eba49352 (diff) | |
download | rspamd-798bd5e86f7941dd87884a4ea15c9a15179ac811.tar.gz rspamd-798bd5e86f7941dd87884a4ea15c9a15179ac811.zip |
[Feature] Improve levenshtein distance function
- Use g_malloc instead of alloca
- Allow to set variable replacement cost
- Update lua util.levenshtein_distance
Diffstat (limited to 'src/libutil/str_util.c')
-rw-r--r-- | src/libutil/str_util.c | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 457e1fe5b..7d40b15fa 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -963,13 +963,15 @@ rspamd_decode_url (gchar *dst, const gchar *src, gsize size) gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, - const gchar *s2, gsize s2len) + const gchar *s2, gsize s2len, + guint replace_cost) { guint x, y, lastdiag, olddiag; gchar c1, c2; guint *column; gint eq; static const guint max_cmp = 8192; + gint ret; g_assert (s1 != NULL); g_assert (s2 != NULL); @@ -986,7 +988,7 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, return 0; } - column = g_alloca ((s1len + 1) * sizeof (guint)); + column = g_malloc0 ((s1len + 1) * sizeof (guint)); for (y = 1; y <= s1len; y++) { column[y] = y; @@ -999,14 +1001,17 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, olddiag = column[y]; c1 = s1[y - 1]; c2 = s2[x - 1]; - eq = (c1 == c2) ? 0 : 1; + eq = (c1 == c2) ? 0 : replace_cost; column[y] = MIN3 (column[y] + 1, column[y - 1] + 1, lastdiag + (eq)); lastdiag = olddiag; } } - return column[s1len]; + ret = column[s1len]; + g_free (column); + + return ret; } GString * |