From: Vsevolod Stakhov Date: Mon, 13 Feb 2017 15:43:18 +0000 (+0000) Subject: [Fix] More heuristic fixes for phishing detection X-Git-Tag: 1.5.0~110 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=ce060358955a96d65196a9ee431be8d9db87d46e;p=rspamd.git [Fix] More heuristic fixes for phishing detection --- diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua index 6a16e3225..8a56903d7 100644 --- a/src/plugins/lua/phishing.lua +++ b/src/plugins/lua/phishing.lua @@ -130,6 +130,21 @@ local function phishing_cb(task) return end + -- Now we can safely remove the last dot component if it is the same + local b,e = string.find(tld, '%.[^%.]+$') + local b1,e1 = string.find(ptld, '%.[^%.]+$') + + if b1 and b then + if string.sub(tld, b) == string.sub(ptld, b1) then + ptld = string.gsub(ptld, '%.[^%.]+$', '') + tld = string.gsub(tld, '%.[^%.]+$', '') + end + + if #ptld == 0 or #tld == 0 then + return false + end + end + local weight = 1.0 local dist = util.levenshtein_distance(tld, ptld, 2) dist = 2 * dist / (#tld + #ptld) @@ -137,6 +152,10 @@ local function phishing_cb(task) if dist > 0.3 and dist <= 1.0 then -- Use distance to penalize the total weight weight = util.tanh(3 * (1 - dist + 0.1)) + elseif dist > 1 then + -- We have totally different strings in tld, so penalize it significantly + if dist > 2 then dist = 2 end + weight = util.tanh((2 - dist) * 0.5) end rspamd_logger.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)