aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/lua/phishing.lua
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-13 15:43:18 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-13 15:43:18 +0000
commitce060358955a96d65196a9ee431be8d9db87d46e (patch)
treee14e5a321aa46f92177db033665eda7d57dd7415 /src/plugins/lua/phishing.lua
parent62c73abd1c2c75e93204875a0cb7be58c66273b4 (diff)
downloadrspamd-ce060358955a96d65196a9ee431be8d9db87d46e.tar.gz
rspamd-ce060358955a96d65196a9ee431be8d9db87d46e.zip
[Fix] More heuristic fixes for phishing detection
Diffstat (limited to 'src/plugins/lua/phishing.lua')
-rw-r--r--src/plugins/lua/phishing.lua19
1 files changed, 19 insertions, 0 deletions
diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua
index 6a16e3225..8a56903d7 100644
--- a/src/plugins/lua/phishing.lua
+++ b/src/plugins/lua/phishing.lua
@@ -130,6 +130,21 @@ local function phishing_cb(task)
return
end
+ -- Now we can safely remove the last dot component if it is the same
+ local b,e = string.find(tld, '%.[^%.]+$')
+ local b1,e1 = string.find(ptld, '%.[^%.]+$')
+
+ if b1 and b then
+ if string.sub(tld, b) == string.sub(ptld, b1) then
+ ptld = string.gsub(ptld, '%.[^%.]+$', '')
+ tld = string.gsub(tld, '%.[^%.]+$', '')
+ end
+
+ if #ptld == 0 or #tld == 0 then
+ return false
+ end
+ end
+
local weight = 1.0
local dist = util.levenshtein_distance(tld, ptld, 2)
dist = 2 * dist / (#tld + #ptld)
@@ -137,6 +152,10 @@ local function phishing_cb(task)
if dist > 0.3 and dist <= 1.0 then
-- Use distance to penalize the total weight
weight = util.tanh(3 * (1 - dist + 0.1))
+ elseif dist > 1 then
+ -- We have totally different strings in tld, so penalize it significantly
+ if dist > 2 then dist = 2 end
+ weight = util.tanh((2 - dist) * 0.5)
end
rspamd_logger.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)