]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Penalise URLs with IDNA <-> nonIDNA representations
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 25 Feb 2017 17:48:42 +0000 (17:48 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 25 Feb 2017 17:48:42 +0000 (17:48 +0000)
Issue: #842

src/plugins/lua/phishing.lua

index 202943fb11717228695ddd4e06372e162988c9c4..6c42c96f2a1ce9119f169670297bb47d2358a9f8 100644 (file)
@@ -158,9 +158,21 @@ local function phishing_cb(task)
             -- Use distance to penalize the total weight
             weight = util.tanh(3 * (1 - dist + 0.1))
           elseif dist > 1 then
-            -- We have totally different strings in tld, so penalize it significantly
-            if dist > 2 then dist = 2 end
-            weight = util.tanh((2 - dist) * 0.5)
+            -- We also check if two labels are in the same ascii/non-ascii representation
+            local a1, a2 = false,false
+
+            if string.match(tld, '^[\001-\127]*$') then a1 = true end
+            if string.match(ptld, '^[\001-\127]*$') then a2 = true end
+
+            if a1 ~= a2 then
+              weight = 1
+              rspamd_logger.debugm(N, task, "confusable: %1 -> %2: different characters",
+                tld, ptld, why)
+            else
+              -- We have totally different strings in tld, so penalize it significantly
+              if dist > 2 then dist = 2 end
+              weight = util.tanh((2 - dist) * 0.5)
+            end
           end
 
           rspamd_logger.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)