Browse Source

[Feature] Detect URLs with suspicious omographs

tags/1.5.0
Vsevolod Stakhov 7 years ago
parent
commit
6c8b19b942
2 changed files with 67 additions and 1 deletions
  1. 25
    1
      rules/misc.lua
  2. 42
    0
      src/lua/lua_util.c

+ 25
- 1
rules/misc.lua View File

@@ -756,4 +756,28 @@ local freemail_reply_neq_from_id = rspamd_config:register_symbol({
score = 3.0
})
rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_REPLYTO')
rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM')
rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM')

rspamd_config.OMOGRAPH_URL = {
callback = function(task)
local urls = task:get_urls()

if urls then
for _,u in ipairs(urls) do
local h = u:get_host()

if h then
local non_latin,total = util.count_non_ascii(h)

if non_latin ~= total and non_latin > 0 then
return true, 1.0, h
end
end
end
end

return false
end,
score = 5.0,
description = 'Url contains both latin and non-latin characters'
}

+ 42
- 0
src/lua/lua_util.c View File

@@ -362,6 +362,13 @@ LUA_FUNCTION_DEF (util, zstd_decompress);
* @return {number} normalized number
*/
LUA_FUNCTION_DEF (util, normalize_prob);
/***
* @function util.count_non_ascii(str)
* Returns number of non ascii characters in a specified string counting merely alpha
* characters. A string can be in non-utf form.
* @return {number,number} number of non-ascii alphas and total number of alphas
*/
LUA_FUNCTION_DEF (util, count_non_ascii);

/***
* @function util.pack(fmt, ...)
@@ -485,6 +492,7 @@ static const struct luaL_reg utillib_f[] = {
LUA_INTERFACE_DEF (util, zstd_decompress),
LUA_INTERFACE_DEF (util, normalize_prob),
LUA_INTERFACE_DEF (util, caseless_hash),
LUA_INTERFACE_DEF (util, count_non_ascii),
LUA_INTERFACE_DEF (util, pack),
LUA_INTERFACE_DEF (util, unpack),
LUA_INTERFACE_DEF (util, packsize),
@@ -1825,6 +1833,40 @@ lua_util_caseless_hash (lua_State *L)
return 1;
}

static gint
lua_util_count_non_ascii (lua_State *L)
{
gsize len;
const gchar *str = lua_tolstring (L, 1, &len);
const gchar *p, *end;
gint ret = 0, total = 0;

if (str != NULL) {
end = str + len;
p = str;

while (p < end) {
if (*p & 0x80) {
ret ++;
total ++;
}
else if (g_ascii_isalpha (*p)) {
total ++;
}

p ++;
}

lua_pushnumber (L, ret);
lua_pushnumber (L, total);
}
else {
return luaL_error (L, "invalid arguments");
}

return 2;
}

/* Backport from Lua 5.3 */

/******************************************************************************

Loading…
Cancel
Save