diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-25 17:48:42 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-25 17:48:42 +0000 |
commit | 98267373c58ecf8d8f6db94d8fea41a1528b5376 (patch) | |
tree | 305f496e8de19cc72bfd08f0bc8ce66dfd98b9d9 /src/plugins | |
parent | 9969a6acd332e45f3c09b8f8c131f2c56a441d33 (diff) | |
download | rspamd-98267373c58ecf8d8f6db94d8fea41a1528b5376.tar.gz rspamd-98267373c58ecf8d8f6db94d8fea41a1528b5376.zip |
[Minor] Penalise URLs with IDNA <-> nonIDNA representations
Issue: #842
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/lua/phishing.lua | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua index 202943fb1..6c42c96f2 100644 --- a/src/plugins/lua/phishing.lua +++ b/src/plugins/lua/phishing.lua @@ -158,9 +158,21 @@ local function phishing_cb(task) -- Use distance to penalize the total weight weight = util.tanh(3 * (1 - dist + 0.1)) elseif dist > 1 then - -- We have totally different strings in tld, so penalize it significantly - if dist > 2 then dist = 2 end - weight = util.tanh((2 - dist) * 0.5) + -- We also check if two labels are in the same ascii/non-ascii representation + local a1, a2 = false,false + + if string.match(tld, '^[\001-\127]*$') then a1 = true end + if string.match(ptld, '^[\001-\127]*$') then a2 = true end + + if a1 ~= a2 then + weight = 1 + rspamd_logger.debugm(N, task, "confusable: %1 -> %2: different characters", + tld, ptld, why) + else + -- We have totally different strings in tld, so penalize it significantly + if dist > 2 then dist = 2 end + weight = util.tanh((2 - dist) * 0.5) + end end rspamd_logger.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist) |