aboutsummaryrefslogtreecommitdiffstats
path: root/rules
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-13 12:17:58 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-13 12:19:20 +0000
commitce8352554afa648b81400e3267587e22705b08cf (patch)
tree1593c3ca8ddb3a52359f50c4784bf2797a26e13b /rules
parent3125cf20b87ff211cf2326b9682b39703a996066 (diff)
downloadrspamd-ce8352554afa648b81400e3267587e22705b08cf.tar.gz
rspamd-ce8352554afa648b81400e3267587e22705b08cf.zip
[Fix] Improve OMOGRAPH_URL rule
- Calculate omographs in each dot component separately - Normalize omographs - Count utf8 characters properly
Diffstat (limited to 'rules')
-rw-r--r--rules/misc.lua17
1 files changed, 14 insertions, 3 deletions
diff --git a/rules/misc.lua b/rules/misc.lua
index 56de79a6b..75f31f39c 100644
--- a/rules/misc.lua
+++ b/rules/misc.lua
@@ -767,10 +767,21 @@ rspamd_config.OMOGRAPH_URL = {
local h = u:get_host()
if h then
- local non_latin,total = util.count_non_ascii(h)
+ local parts = rspamd_str_split(h, '.')
- if non_latin ~= total and non_latin > 0 then
- return true, 1.0, h
+ local bad_omographs = 0
+
+ for _,p in ipairs(parts) do
+ local cnlat,ctot = util.count_non_ascii(p)
+
+ if cnlat > 0 and cnlat ~= ctot then
+ bad_omographs = bad_omographs + 1.0 / cnlat
+ end
+ end
+
+ if bad_omographs > 0 then
+ if bad_omographs > 1 then bad_omographs = 1.0 end
+ return true, bad_omographs, h
end
end
end