From: Vsevolod Stakhov Date: Sat, 25 Feb 2017 17:48:42 +0000 (+0000) Subject: [Minor] Penalise URLs with IDNA <-> nonIDNA representations X-Git-Tag: 1.5.0~30 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=98267373c58ecf8d8f6db94d8fea41a1528b5376;p=rspamd.git [Minor] Penalise URLs with IDNA <-> nonIDNA representations Issue: #842 --- diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua index 202943fb1..6c42c96f2 100644 --- a/src/plugins/lua/phishing.lua +++ b/src/plugins/lua/phishing.lua @@ -158,9 +158,21 @@ local function phishing_cb(task) -- Use distance to penalize the total weight weight = util.tanh(3 * (1 - dist + 0.1)) elseif dist > 1 then - -- We have totally different strings in tld, so penalize it significantly - if dist > 2 then dist = 2 end - weight = util.tanh((2 - dist) * 0.5) + -- We also check if two labels are in the same ascii/non-ascii representation + local a1, a2 = false,false + + if string.match(tld, '^[\001-\127]*$') then a1 = true end + if string.match(ptld, '^[\001-\127]*$') then a2 = true end + + if a1 ~= a2 then + weight = 1 + rspamd_logger.debugm(N, task, "confusable: %1 -> %2: different characters", + tld, ptld, why) + else + -- We have totally different strings in tld, so penalize it significantly + if dist > 2 then dist = 2 end + weight = util.tanh((2 - dist) * 0.5) + end end rspamd_logger.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)