diff options
author | Miecio Za <miecio@miecio.net> | 2019-03-18 14:06:56 +0100 |
---|---|---|
committer | Miecio Za <miecio@miecio.net> | 2019-03-18 14:06:56 +0100 |
commit | cd08c8845f6ea0bac789ea8a49f7d8537f598b7d (patch) | |
tree | dee0d9dbc67008cd190c502393098c5f2772a764 | |
parent | 7160b4ace6665d808b636b0617f081f51a849617 (diff) | |
download | rspamd-cd08c8845f6ea0bac789ea8a49f7d8537f598b7d.tar.gz rspamd-cd08c8845f6ea0bac789ea8a49f7d8537f598b7d.zip |
[Minor] Fix performance issue with is_utf_outside_range
Fix performace issue, add some checking and add few tests
-rw-r--r-- | src/lua/lua_util.c | 67 | ||||
-rw-r--r-- | test/lua/unit/rspamd_util.lua | 67 |
2 files changed, 116 insertions, 18 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 71d61da62..7c98a0989 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -22,6 +22,7 @@ #include "libmime/email_addr.h" #include "libmime/content_type.h" #include "libmime/mime_headers.h" +#include "libutil/hash.h" #include "linenoise.h" #include <math.h> #include <glob.h> @@ -2458,6 +2459,12 @@ lua_util_is_utf_spoofed (lua_State *L) uspoof_setChecks (spc_sgl, USPOOF_INVISIBLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, &uc_err); + if (uc_err != U_ZERO_ERROR) { + msg_err ("Cannot set proper checks for uspoof: %s", u_errorName (uc_err)); + lua_pushboolean (L, false); + uspoof_close(spc); + return 1; + } } ret = uspoof_checkUTF8 (spc_sgl, s1, l1, NULL, &uc_err); @@ -2533,28 +2540,52 @@ lua_util_is_utf_outside_range(lua_State *L) guint32 range_start = lua_tointeger (L, 2); guint32 range_end = lua_tointeger (L, 3); - USpoofChecker *spc_sgl; - USet * allowed_chars; - UErrorCode uc_err = U_ZERO_ERROR; + static rspamd_lru_hash_t *validators; + + if (validators == NULL) { + validators = rspamd_lru_hash_new(16, g_free, (GDestroyNotify)uspoof_close); + } if (string_to_check) { - spc_sgl = uspoof_open (&uc_err); - if (uc_err != U_ZERO_ERROR) { - msg_err ("cannot init spoof checker: %s", u_errorName (uc_err)); - lua_pushboolean (L, false); - uspoof_close(spc_sgl); - return 1; - } + guint64 hash_key = (guint64)range_end << 32 || range_start; + + USpoofChecker *validator = rspamd_lru_hash_lookup(validators, &hash_key, time(NULL)); + + UErrorCode uc_err = U_ZERO_ERROR; + + if (validator == NULL) { + USet * allowed_chars; + guint64 * creation_hash_key = g_malloc(sizeof(guint64)); + *creation_hash_key = hash_key; + + validator = uspoof_open (&uc_err); + if (uc_err != U_ZERO_ERROR) { + msg_err ("cannot init spoof checker: %s", u_errorName (uc_err)); + lua_pushboolean (L, false); + uspoof_close(validator); + return 1; + } + + allowed_chars = uset_openEmpty(); + uset_addRange(allowed_chars, range_start, range_end); + uspoof_setAllowedChars(validator, allowed_chars, &uc_err); + + uspoof_setChecks (validator, + USPOOF_CHAR_LIMIT | USPOOF_ANY_CASE, &uc_err); - allowed_chars = uset_openEmpty(); - uset_addRange(allowed_chars, range_start, range_end); - uspoof_setAllowedChars(spc_sgl, allowed_chars, &uc_err); + uset_close(allowed_chars); + + if (uc_err != U_ZERO_ERROR) { + msg_err ("Cannot configure uspoof: %s", u_errorName (uc_err)); + lua_pushboolean (L, false); + uspoof_close(validator); + return 1; + } + + rspamd_lru_hash_insert(validators, creation_hash_key, validator, time(NULL), 0); + } - uspoof_setChecks (spc_sgl, - USPOOF_CHAR_LIMIT | USPOOF_ANY_CASE, &uc_err); - ret = uspoof_checkUTF8 (spc_sgl, string_to_check, len_of_string, NULL, &uc_err); - uset_close(allowed_chars); - uspoof_close(spc_sgl); + ret = uspoof_checkUTF8 (validator, string_to_check, len_of_string, NULL, &uc_err); } else { return luaL_error (L, "invalid arguments"); diff --git a/test/lua/unit/rspamd_util.lua b/test/lua/unit/rspamd_util.lua new file mode 100644 index 000000000..802b400d2 --- /dev/null +++ b/test/lua/unit/rspamd_util.lua @@ -0,0 +1,67 @@ +context("Rspamd util for lua - check generic functions", function() + local util = require 'rspamd_util' + + local cases = { + { + input = "test1", + result = false, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "test test xxx", + result = false, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "АбЫрвАлг", + result = true, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "АбЫрвАлг example", + result = true, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "example ąłśćżłóę", + result = false, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "ąłśćżłóę АбЫрвАлг", + result = true, + range_start = 0x0000, + range_end = 0x017f + }, + } + + for i,c in ipairs(cases) do + test("is_utf_outside_range, test case #" .. i, function() + local actual = util.is_utf_outside_range(c.input, c.range_start, c.range_end) + + assert_equal(c.result, actual) + end) + end + + test("is_utf_outside_range, check cache", function () + cache_size = 20 + for i = 1,cache_size do + local res = util.is_utf_outside_range("a", 0x0000, 0x0000+i) + end + end) + + test("is_utf_outside_range, check empty string", function () + assert_error(util.is_utf_outside_range) + end) + + test("get_string_stats, test case", function() + local res = util.get_string_stats("this is test 99") + assert_equal(res["letters"], 10) + assert_equal(res["digits"], 2) + end) +end) |