Browse Source

[Minor] Penalise URLs with IDNA <-> nonIDNA representations

Issue: #842
tags/1.5.0
Vsevolod Stakhov 7 years ago
parent
commit
98267373c5
1 changed files with 15 additions and 3 deletions
  1. 15
    3
      src/plugins/lua/phishing.lua

+ 15
- 3
src/plugins/lua/phishing.lua View File

@@ -158,9 +158,21 @@ local function phishing_cb(task)
-- Use distance to penalize the total weight
weight = util.tanh(3 * (1 - dist + 0.1))
elseif dist > 1 then
-- We have totally different strings in tld, so penalize it significantly
if dist > 2 then dist = 2 end
weight = util.tanh((2 - dist) * 0.5)
-- We also check if two labels are in the same ascii/non-ascii representation
local a1, a2 = false,false

if string.match(tld, '^[\001-\127]*$') then a1 = true end
if string.match(ptld, '^[\001-\127]*$') then a2 = true end

if a1 ~= a2 then
weight = 1
rspamd_logger.debugm(N, task, "confusable: %1 -> %2: different characters",
tld, ptld, why)
else
-- We have totally different strings in tld, so penalize it significantly
if dist > 2 then dist = 2 end
weight = util.tanh((2 - dist) * 0.5)
end
end

rspamd_logger.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)

Loading…
Cancel
Save