Browse Source

[Fix] More heuristic fixes for phishing detection

tags/1.5.0
Vsevolod Stakhov 7 years ago
parent
commit
ce06035895
1 changed files with 19 additions and 0 deletions
  1. 19
    0
      src/plugins/lua/phishing.lua

+ 19
- 0
src/plugins/lua/phishing.lua View File

@@ -130,6 +130,21 @@ local function phishing_cb(task)
return
end

-- Now we can safely remove the last dot component if it is the same
local b,e = string.find(tld, '%.[^%.]+$')
local b1,e1 = string.find(ptld, '%.[^%.]+$')

if b1 and b then
if string.sub(tld, b) == string.sub(ptld, b1) then
ptld = string.gsub(ptld, '%.[^%.]+$', '')
tld = string.gsub(tld, '%.[^%.]+$', '')
end

if #ptld == 0 or #tld == 0 then
return false
end
end

local weight = 1.0
local dist = util.levenshtein_distance(tld, ptld, 2)
dist = 2 * dist / (#tld + #ptld)
@@ -137,6 +152,10 @@ local function phishing_cb(task)
if dist > 0.3 and dist <= 1.0 then
-- Use distance to penalize the total weight
weight = util.tanh(3 * (1 - dist + 0.1))
elseif dist > 1 then
-- We have totally different strings in tld, so penalize it significantly
if dist > 2 then dist = 2 end
weight = util.tanh((2 - dist) * 0.5)
end
rspamd_logger.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)


Loading…
Cancel
Save