diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-04 13:41:21 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-04 13:41:38 +0100 |
commit | 6c8b19b9422f6fb4b776a59617750d53a48da5f1 (patch) | |
tree | 1df2c37cb4027e258ef953bda529de7f473041e6 | |
parent | 6670767e3a55bc9313ac5019e36e510c49ef3709 (diff) | |
download | rspamd-6c8b19b9422f6fb4b776a59617750d53a48da5f1.tar.gz rspamd-6c8b19b9422f6fb4b776a59617750d53a48da5f1.zip |
[Feature] Detect URLs with suspicious omographs
-rw-r--r-- | rules/misc.lua | 26 | ||||
-rw-r--r-- | src/lua/lua_util.c | 42 |
2 files changed, 67 insertions, 1 deletions
diff --git a/rules/misc.lua b/rules/misc.lua index 6a1eec4fc..56de79a6b 100644 --- a/rules/misc.lua +++ b/rules/misc.lua @@ -756,4 +756,28 @@ local freemail_reply_neq_from_id = rspamd_config:register_symbol({ score = 3.0 }) rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_REPLYTO') -rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM')
\ No newline at end of file +rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM') + +rspamd_config.OMOGRAPH_URL = { + callback = function(task) + local urls = task:get_urls() + + if urls then + for _,u in ipairs(urls) do + local h = u:get_host() + + if h then + local non_latin,total = util.count_non_ascii(h) + + if non_latin ~= total and non_latin > 0 then + return true, 1.0, h + end + end + end + end + + return false + end, + score = 5.0, + description = 'Url contains both latin and non-latin characters' +} diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index b1bfdce28..7b858f4ce 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -362,6 +362,13 @@ LUA_FUNCTION_DEF (util, zstd_decompress); * @return {number} normalized number */ LUA_FUNCTION_DEF (util, normalize_prob); +/*** + * @function util.count_non_ascii(str) + * Returns number of non ascii characters in a specified string counting merely alpha + * characters. A string can be in non-utf form. + * @return {number,number} number of non-ascii alphas and total number of alphas + */ +LUA_FUNCTION_DEF (util, count_non_ascii); /*** * @function util.pack(fmt, ...) @@ -485,6 +492,7 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, zstd_decompress), LUA_INTERFACE_DEF (util, normalize_prob), LUA_INTERFACE_DEF (util, caseless_hash), + LUA_INTERFACE_DEF (util, count_non_ascii), LUA_INTERFACE_DEF (util, pack), LUA_INTERFACE_DEF (util, unpack), LUA_INTERFACE_DEF (util, packsize), @@ -1825,6 +1833,40 @@ lua_util_caseless_hash (lua_State *L) return 1; } +static gint +lua_util_count_non_ascii (lua_State *L) +{ + gsize len; + const gchar *str = lua_tolstring (L, 1, &len); + const gchar *p, *end; + gint ret = 0, total = 0; + + if (str != NULL) { + end = str + len; + p = str; + + while (p < end) { + if (*p & 0x80) { + ret ++; + total ++; + } + else if (g_ascii_isalpha (*p)) { + total ++; + } + + p ++; + } + + lua_pushnumber (L, ret); + lua_pushnumber (L, total); + } + else { + return luaL_error (L, "invalid arguments"); + } + + return 2; +} + /* Backport from Lua 5.3 */ /****************************************************************************** |