123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569 |
- --[[
- Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ]]--
-
- if confighelp then
- return
- end
-
- local rspamd_logger = require "rspamd_logger"
- local util = require "rspamd_util"
- local lua_util = require "lua_util"
- local lua_maps = require "lua_maps"
-
- -- Phishing detection interface for selecting phished urls and inserting corresponding symbol
- --
- --
- local N = 'phishing'
- local symbol = 'PHISHED_URL'
- local generic_service_symbol = 'PHISHED_GENERIC_SERVICE'
- local openphish_symbol = 'PHISHED_OPENPHISH'
- local phishtank_symbol = 'PHISHED_PHISHTANK'
- local generic_service_name = 'generic service'
- local domains = nil
- local strict_domains = {}
- local exceptions_maps = {}
- local generic_service_map = nil
- local openphish_map = 'https://www.openphish.com/feed.txt'
- local phishtank_suffix = 'phishtank.rspamd.com'
- -- Not enabled by default as their feed is quite large
- local openphish_premium = false
- -- Published via DNS
- local phishtank_enabled = false
- local generic_service_hash
- local openphish_hash
- local generic_service_data = {}
- local openphish_data = {}
-
-
- local opts = rspamd_config:get_all_opt(N)
- if not (opts and type(opts) == 'table') then
- rspamd_logger.infox(rspamd_config, 'Module is unconfigured')
- return
- end
-
- local function phishing_cb(task)
- local function check_phishing_map(map, url, phish_symbol)
- local host = url:get_host()
-
- if host then
- local elt = map[host]
- local found_path = false
- local found_query = false
- local data = nil
-
- if elt then
- local path = url:get_path()
- local query = url:get_query()
-
- if path then
- for _,d in ipairs(elt) do
- if d['path'] == path then
- found_path = true
- data = d['data']
-
- if query and d['query'] and query == d['query'] then
- found_query = true
- elseif not d['query'] then
- found_query = true
- end
- end
- end
- else
- for _,d in ipairs(elt) do
- if not d['path'] then
- found_path = true
- end
-
- if query and d['query'] and query == d['query'] then
- found_query = true
- elseif not d['query'] then
- found_query = true
- end
- end
- end
-
- if found_path then
- local args
-
- if type(data) == 'table' then
- args = {
- data['tld'],
- data['sector'],
- data['brand'],
- }
- elseif type(data) == 'string' then
- args = data
- else
- args = host
- end
-
- if found_query then
- -- Query + path match
- task:insert_result(phish_symbol, 1.0, args)
- else
- -- Host + path match
- if path then
- task:insert_result(phish_symbol, 0.3, args)
- end
- -- No path, no symbol
- end
- else
- if url:is_phished() then
- -- Only host matches
- task:insert_result(phish_symbol, 0.1, host)
- end
- end
- end
- end
- end
-
- local function check_phishing_dns(dns_suffix, url, phish_symbol)
- local function compose_dns_query(elts)
- local cr = require "rspamd_cryptobox_hash"
- local h = cr.create()
- for _,elt in ipairs(elts) do h:update(elt) end
- return string.format("%s.%s", h:base32():sub(1, 32), dns_suffix)
- end
- local r = task:get_resolver()
- local host = url:get_host()
- local path = url:get_path()
- local query = url:get_query()
-
- if host and path then
- local function host_host_path_cb(_, _, results, err)
- if not err and results then
- if not query then
- task:insert_result(phish_symbol, 1.0, results)
- else
- task:insert_result(phish_symbol, 0.3, results)
- end
- end
- end
-
- local to_resolve_hp = compose_dns_query({host, path})
- rspamd_logger.debugm(N, task, 'try to resolve {%s, %s} -> %s',
- host, path, to_resolve_hp)
- r:resolve_txt({
- task = task,
- name = to_resolve_hp,
- callback = host_host_path_cb})
-
-
- if query then
- local function host_host_path_query_cb(_, _, results, err)
- if not err and results then
- task:insert_result(phish_symbol, 1.0, results)
- end
- end
-
- local to_resolve_hpq = compose_dns_query({host, path, query})
- rspamd_logger.debugm(N, task, 'try to resolve {%s, %s, %s} -> %s',
- host, path, query, to_resolve_hpq)
- r:resolve_txt({
- task = task,
- name = to_resolve_hpq,
- callback = host_host_path_query_cb})
- end
-
- end
- end
-
- -- Process all urls
- local dmarc_dom
- local dsym = task:get_symbol('DMARC_POLICY_ALLOW')
- if dsym then
- dsym = dsym[1] -- legacy stuff, need to take the first element
- if dsym.options then
- dmarc_dom = dsym.options[1]
- end
- end
-
- local urls = task:get_urls() or {}
- for _,url_iter in ipairs(urls) do
- local function do_loop_iter() -- to emulate continue
- local url = url_iter
- if generic_service_hash then
- check_phishing_map(generic_service_data, url, generic_service_symbol)
- end
-
- if openphish_hash then
- check_phishing_map(openphish_data, url, openphish_symbol)
- end
-
- if phishtank_enabled then
- check_phishing_dns(phishtank_suffix, url, phishtank_symbol)
- end
-
- if url:is_phished() then
- local purl
-
- if url:is_redirected() then
- local rspamd_url = require "rspamd_url"
- -- Examine the real redirect target instead of the url
- local redirected_url = url:get_redirected()
- if not redirected_url then
- return
- end
-
- purl = rspamd_url.create(task:get_mempool(), url:get_visible())
- url = redirected_url
- else
- purl = url:get_phished()
- end
-
-
- if not purl then
- return
- end
-
- local tld = url:get_tld()
- local ptld = purl:get_tld()
-
- if not ptld or not tld then
- return
- end
-
- if dmarc_dom and tld == dmarc_dom then
- lua_util.debugm(N, 'exclude phishing from %s -> %s by dmarc domain', tld,
- ptld)
- return
- end
-
- -- Now we can safely remove the last dot component if it is the same
- local b,_ = string.find(tld, '%.[^%.]+$')
- local b1,_ = string.find(ptld, '%.[^%.]+$')
-
- local stripped_tld,stripped_ptld = tld, ptld
- if b1 and b then
- if string.sub(tld, b) == string.sub(ptld, b1) then
- stripped_ptld = string.gsub(ptld, '%.[^%.]+$', '')
- stripped_tld = string.gsub(tld, '%.[^%.]+$', '')
- end
-
- if #ptld == 0 or #tld == 0 then
- return false
- end
- end
-
- local weight = 1.0
- local spoofed,why = util.is_utf_spoofed(tld, ptld)
- if spoofed then
- lua_util.debugm(N, task, "confusable: %1 -> %2: %3", tld, ptld, why)
- weight = 1.0
- else
- local dist = util.levenshtein_distance(stripped_tld, stripped_ptld, 2)
- dist = 2 * dist / (#stripped_tld + #stripped_ptld)
-
- if dist > 0.3 and dist <= 1.0 then
- -- Use distance to penalize the total weight
- weight = util.tanh(3 * (1 - dist + 0.1))
- elseif dist > 1 then
- -- We also check if two labels are in the same ascii/non-ascii representation
- local a1, a2 = false,false
-
- if string.match(tld, '^[\001-\127]*$') then a1 = true end
- if string.match(ptld, '^[\001-\127]*$') then a2 = true end
-
- if a1 ~= a2 then
- weight = 1
- lua_util.debugm(N, task, "confusable: %1 -> %2: different characters",
- tld, ptld, why)
- else
- -- We have totally different strings in tld, so penalize it somehow
- weight = 0.5
- end
- end
-
- lua_util.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)
- end
-
- local function is_url_in_map(map, furl)
- for _,dn in ipairs({furl:get_tld(), furl:get_host()}) do
- if map:get_key(dn) then
- return true,dn
- end
- end
-
- return false
- end
- local function found_in_map(map, furl, sweight)
- if not furl then furl = url end
- if not sweight then sweight = weight end
- if #map > 0 then
- for _,rule in ipairs(map) do
- local found,dn = is_url_in_map(rule.map, furl)
- if found then
- task:insert_result(rule.symbol, sweight, ptld .. '->' .. dn)
- return true
- end
- end
- end
- end
-
- if not found_in_map(exceptions_maps) then
- if not found_in_map(strict_domains, purl, 1.0) then
- if domains then
- if is_url_in_map(domains, purl) then
- task:insert_result(symbol, weight, ptld .. '->' .. tld)
- end
- else
- task:insert_result(symbol, weight, ptld .. '->' .. tld)
- end
- end
- end
- end
- end
-
- do_loop_iter()
- end
- end
-
- local function phishing_map(mapname, phishmap, id)
- if opts[mapname] then
- local xd
- if type(opts[mapname]) == 'table' then
- xd = opts[mapname]
- else
- rspamd_logger.errx(rspamd_config, 'invalid exception table')
- end
-
-
- for sym,map_data in pairs(xd) do
- local rmap = lua_maps.map_add_from_ucl (map_data, 'set',
- 'Phishing ' .. mapname .. ' map')
- if rmap then
- rspamd_config:register_virtual_symbol(sym, 1, id)
- local rule = {symbol = sym, map = rmap}
- table.insert(phishmap, rule)
- else
- rspamd_logger.infox(rspamd_config, 'cannot add map for symbol: %s', sym)
- end
- end
- end
- end
-
- local function rspamd_str_split_fun(s, sep, func)
- local lpeg = require "lpeg"
- sep = lpeg.P(sep)
- local elem = lpeg.P((1 - sep)^0 / func)
- local p = lpeg.P(elem * (sep * elem)^0)
- return p:match(s)
- end
-
- local function insert_url_from_string(pool, tbl, str, data)
- local rspamd_url = require "rspamd_url"
-
- local u = rspamd_url.create(pool, str)
-
- if u then
- local host = u:get_host()
- if host then
- local elt = {
- data = data,
- path = u:get_path(),
- query = u:get_query()
- }
-
- if tbl[host] then
- table.insert(tbl[host], elt)
- else
- tbl[host] = {elt}
- end
-
- return true
- end
- end
-
- return false
- end
-
- local function generic_service_plain_cb(string)
- local nelts = 0
- local new_data = {}
- local rspamd_mempool = require "rspamd_mempool"
- local pool = rspamd_mempool.create()
-
- local function generic_service_elt_parser(cap)
- if insert_url_from_string(pool, new_data, cap, nil) then
- nelts = nelts + 1
- end
- end
-
- rspamd_str_split_fun(string, '\n', generic_service_elt_parser)
-
- generic_service_data = new_data
- rspamd_logger.infox(generic_service_hash, "parsed %s elements from %s feed",
- nelts, generic_service_name)
- pool:destroy()
- end
-
- local function openphish_json_cb(string)
- local ucl = require "ucl"
- local rspamd_mempool = require "rspamd_mempool"
- local nelts = 0
- local new_json_map = {}
- local valid = true
-
- local pool = rspamd_mempool.create()
-
- local function openphish_elt_parser(cap)
- if valid then
- local parser = ucl.parser()
- local res,err = parser:parse_string(cap)
- if not res then
- valid = false
- rspamd_logger.warnx(openphish_hash, 'cannot parse openphish map: ' .. err)
- else
- local obj = parser:get_object()
-
- if obj['url'] then
- if insert_url_from_string(pool, new_json_map, obj['url'], obj) then
- nelts = nelts + 1
- end
- end
- end
- end
- end
-
- rspamd_str_split_fun(string, '\n', openphish_elt_parser)
-
- if valid then
- openphish_data = new_json_map
- rspamd_logger.infox(openphish_hash, "parsed %s elements from openphish feed",
- nelts)
- end
-
- pool:destroy()
- end
-
- local function openphish_plain_cb(s)
- local nelts = 0
- local new_data = {}
- local rspamd_mempool = require "rspamd_mempool"
- local pool = rspamd_mempool.create()
-
- local function openphish_elt_parser(cap)
- if insert_url_from_string(pool, new_data, cap, nil) then
- nelts = nelts + 1
- end
- end
-
- rspamd_str_split_fun(s, '\n', openphish_elt_parser)
-
- openphish_data = new_data
- rspamd_logger.infox(openphish_hash, "parsed %s elements from openphish feed",
- nelts)
- pool:destroy()
- end
-
- if opts then
- local id
- if opts['symbol'] then
- symbol = opts['symbol']
- -- Register symbol's callback
- id = rspamd_config:register_symbol({
- name = symbol,
- callback = phishing_cb
- })
-
- -- To exclude from domains for dmarc verified messages
- rspamd_config:register_dependency(symbol, 'DMARC_CHECK')
-
- if opts['generic_service_symbol'] then
- generic_service_symbol = opts['generic_service_symbol']
- end
- if opts['generic_service_map'] then
- generic_service_map = opts['generic_service_map']
- end
- if opts['generic_service_url'] then
- generic_service_map = opts['generic_service_url']
- end
- if opts['generic_service_name'] then
- generic_service_name = opts['generic_service_name']
- end
-
- if opts['generic_service_enabled'] then
- generic_service_hash = rspamd_config:add_map({
- type = 'callback',
- url = generic_service_map,
- callback = generic_service_plain_cb,
- description = 'Generic feed'
- })
- end
-
- if opts['openphish_map'] then
- openphish_map = opts['openphish_map']
- end
- if opts['openphish_url'] then
- openphish_map = opts['openphish_url']
- end
-
- if opts['openphish_premium'] then
- openphish_premium = true
- end
-
- if opts['openphish_enabled'] then
- if not openphish_premium then
- openphish_hash = rspamd_config:add_map({
- type = 'callback',
- url = openphish_map,
- callback = openphish_plain_cb,
- description = 'Open phishing feed map (see https://www.openphish.com for details)',
- opaque_data = true,
- })
- else
- openphish_hash = rspamd_config:add_map({
- type = 'callback',
- url = openphish_map,
- callback = openphish_json_cb,
- opaque_data = true,
- description = 'Open phishing premium feed map (see https://www.openphish.com for details)'
- })
- end
- end
-
- if opts['phishtank_enabled'] then
- phishtank_enabled = true
- if opts['phishtank_suffix'] then
- phishtank_suffix = opts['phishtank_suffix']
- end
- end
-
- rspamd_config:register_symbol({
- type = 'virtual',
- parent = id,
- name = generic_service_symbol,
- })
-
- rspamd_config:register_symbol({
- type = 'virtual',
- parent = id,
- name = openphish_symbol,
- })
-
- rspamd_config:register_symbol({
- type = 'virtual',
- parent = id,
- name = phishtank_symbol,
- })
- end
- if opts['domains'] and type(opts['domains']) == 'string' then
- domains = lua_maps.map_add_from_ucl(opts['domains'], 'set',
- 'Phishing domains')
- end
- phishing_map('strict_domains', strict_domains, id)
- phishing_map('exceptions', exceptions_maps, id)
- end
|