diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-05-04 15:20:24 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-05-04 15:20:24 +0100 |
commit | 798bd5e86f7941dd87884a4ea15c9a15179ac811 (patch) | |
tree | 94823482c4b436b2e5cc4186b495c2545a4ee7ec /src | |
parent | 6bb2daddb07642bbd5acb6ae8e5070d7eba49352 (diff) | |
download | rspamd-798bd5e86f7941dd87884a4ea15c9a15179ac811.tar.gz rspamd-798bd5e86f7941dd87884a4ea15c9a15179ac811.zip |
[Feature] Improve levenshtein distance function
- Use g_malloc instead of alloca
- Allow to set variable replacement cost
- Update lua util.levenshtein_distance
Diffstat (limited to 'src')
-rw-r--r-- | src/libmime/filter.c | 1 | ||||
-rw-r--r-- | src/libutil/str_util.c | 13 | ||||
-rw-r--r-- | src/libutil/str_util.h | 2 | ||||
-rw-r--r-- | src/lua/lua_util.c | 8 |
4 files changed, 18 insertions, 6 deletions
diff --git a/src/libmime/filter.c b/src/libmime/filter.c index f810f7508..e1a33f3e2 100644 --- a/src/libmime/filter.c +++ b/src/libmime/filter.c @@ -19,6 +19,7 @@ #include "rspamd.h" #include "message.h" #include "lua/lua_common.h" +#include "xxhash.h" #include <math.h> diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 457e1fe5b..7d40b15fa 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -963,13 +963,15 @@ rspamd_decode_url (gchar *dst, const gchar *src, gsize size) gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, - const gchar *s2, gsize s2len) + const gchar *s2, gsize s2len, + guint replace_cost) { guint x, y, lastdiag, olddiag; gchar c1, c2; guint *column; gint eq; static const guint max_cmp = 8192; + gint ret; g_assert (s1 != NULL); g_assert (s2 != NULL); @@ -986,7 +988,7 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, return 0; } - column = g_alloca ((s1len + 1) * sizeof (guint)); + column = g_malloc0 ((s1len + 1) * sizeof (guint)); for (y = 1; y <= s1len; y++) { column[y] = y; @@ -999,14 +1001,17 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, olddiag = column[y]; c1 = s1[y - 1]; c2 = s2[x - 1]; - eq = (c1 == c2) ? 0 : 1; + eq = (c1 == c2) ? 0 : replace_cost; column[y] = MIN3 (column[y] + 1, column[y - 1] + 1, lastdiag + (eq)); lastdiag = olddiag; } } - return column[s1len]; + ret = column[s1len]; + g_free (column); + + return ret; } GString * diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 68f84f7bc..a63b160dd 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -217,7 +217,7 @@ gsize rspamd_decode_url (gchar *dst, const gchar *src, gsize size); * @return */ gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, - const gchar *s2, gsize s2len); + const gchar *s2, gsize s2len, guint replace_cost); /** * Fold header using rfc822 rules, return new GString from the previous one diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 1506676ea..05a9a4452 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -826,12 +826,18 @@ lua_util_levenshtein_distance (lua_State *L) const gchar *s1, *s2; gsize s1len, s2len; gint dist = 0; + guint replace_cost = 1; s1 = luaL_checklstring (L, 1, &s1len); s2 = luaL_checklstring (L, 2, &s2len); + if (lua_isnumber (L, 3)) { + replace_cost = lua_tonumber (L, 3); + } + if (s1 && s2) { - dist = rspamd_strings_levenshtein_distance (s1, s1len, s2, s2len); + dist = rspamd_strings_levenshtein_distance (s1, s1len, s2, s2len, + replace_cost); } lua_pushnumber (L, dist); |