aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-25 17:48:42 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-25 17:48:42 +0000
commit98267373c58ecf8d8f6db94d8fea41a1528b5376 (patch)
tree305f496e8de19cc72bfd08f0bc8ce66dfd98b9d9 /src/plugins
parent9969a6acd332e45f3c09b8f8c131f2c56a441d33 (diff)
downloadrspamd-98267373c58ecf8d8f6db94d8fea41a1528b5376.tar.gz
rspamd-98267373c58ecf8d8f6db94d8fea41a1528b5376.zip
[Minor] Penalise URLs with IDNA <-> nonIDNA representations
Issue: #842
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/lua/phishing.lua18
1 files changed, 15 insertions, 3 deletions
diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua
index 202943fb1..6c42c96f2 100644
--- a/src/plugins/lua/phishing.lua
+++ b/src/plugins/lua/phishing.lua
@@ -158,9 +158,21 @@ local function phishing_cb(task)
-- Use distance to penalize the total weight
weight = util.tanh(3 * (1 - dist + 0.1))
elseif dist > 1 then
- -- We have totally different strings in tld, so penalize it significantly
- if dist > 2 then dist = 2 end
- weight = util.tanh((2 - dist) * 0.5)
+ -- We also check if two labels are in the same ascii/non-ascii representation
+ local a1, a2 = false,false
+
+ if string.match(tld, '^[\001-\127]*$') then a1 = true end
+ if string.match(ptld, '^[\001-\127]*$') then a2 = true end
+
+ if a1 ~= a2 then
+ weight = 1
+ rspamd_logger.debugm(N, task, "confusable: %1 -> %2: different characters",
+ tld, ptld, why)
+ else
+ -- We have totally different strings in tld, so penalize it significantly
+ if dist > 2 then dist = 2 end
+ weight = util.tanh((2 - dist) * 0.5)
+ end
end
rspamd_logger.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)