diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-13 12:17:58 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-13 12:19:20 +0000 |
commit | ce8352554afa648b81400e3267587e22705b08cf (patch) | |
tree | 1593c3ca8ddb3a52359f50c4784bf2797a26e13b /rules | |
parent | 3125cf20b87ff211cf2326b9682b39703a996066 (diff) | |
download | rspamd-ce8352554afa648b81400e3267587e22705b08cf.tar.gz rspamd-ce8352554afa648b81400e3267587e22705b08cf.zip |
[Fix] Improve OMOGRAPH_URL rule
- Calculate omographs in each dot component separately
- Normalize omographs
- Count utf8 characters properly
Diffstat (limited to 'rules')
-rw-r--r-- | rules/misc.lua | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/rules/misc.lua b/rules/misc.lua index 56de79a6b..75f31f39c 100644 --- a/rules/misc.lua +++ b/rules/misc.lua @@ -767,10 +767,21 @@ rspamd_config.OMOGRAPH_URL = { local h = u:get_host() if h then - local non_latin,total = util.count_non_ascii(h) + local parts = rspamd_str_split(h, '.') - if non_latin ~= total and non_latin > 0 then - return true, 1.0, h + local bad_omographs = 0 + + for _,p in ipairs(parts) do + local cnlat,ctot = util.count_non_ascii(p) + + if cnlat > 0 and cnlat ~= ctot then + bad_omographs = bad_omographs + 1.0 / cnlat + end + end + + if bad_omographs > 0 then + if bad_omographs > 1 then bad_omographs = 1.0 end + return true, bad_omographs, h end end end |