diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-13 12:17:58 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-13 12:19:20 +0000 |
commit | ce8352554afa648b81400e3267587e22705b08cf (patch) | |
tree | 1593c3ca8ddb3a52359f50c4784bf2797a26e13b | |
parent | 3125cf20b87ff211cf2326b9682b39703a996066 (diff) | |
download | rspamd-ce8352554afa648b81400e3267587e22705b08cf.tar.gz rspamd-ce8352554afa648b81400e3267587e22705b08cf.zip |
[Fix] Improve OMOGRAPH_URL rule
- Calculate omographs in each dot component separately
- Normalize omographs
- Count utf8 characters properly
-rw-r--r-- | rules/misc.lua | 17 | ||||
-rw-r--r-- | src/lua/lua_util.c | 7 |
2 files changed, 20 insertions, 4 deletions
diff --git a/rules/misc.lua b/rules/misc.lua index 56de79a6b..75f31f39c 100644 --- a/rules/misc.lua +++ b/rules/misc.lua @@ -767,10 +767,21 @@ rspamd_config.OMOGRAPH_URL = { local h = u:get_host() if h then - local non_latin,total = util.count_non_ascii(h) + local parts = rspamd_str_split(h, '.') - if non_latin ~= total and non_latin > 0 then - return true, 1.0, h + local bad_omographs = 0 + + for _,p in ipairs(parts) do + local cnlat,ctot = util.count_non_ascii(p) + + if cnlat > 0 and cnlat ~= ctot then + bad_omographs = bad_omographs + 1.0 / cnlat + end + end + + if bad_omographs > 0 then + if bad_omographs > 1 then bad_omographs = 1.0 end + return true, bad_omographs, h end end end diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 220467664..669ac5bd9 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -1891,7 +1891,7 @@ lua_util_count_non_ascii (lua_State *L) { gsize len; const gchar *str = lua_tolstring (L, 1, &len); - const gchar *p, *end; + const gchar *p, *end, *np; gint ret = 0, total = 0; if (str != NULL) { @@ -1900,8 +1900,13 @@ lua_util_count_non_ascii (lua_State *L) while (p < end) { if (*p & 0x80) { + np = g_utf8_find_next_char (p, end); ret ++; total ++; + + p = (np != p) ? np : p + 1; + + continue; } else if (g_ascii_isalpha (*p)) { total ++; |