You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

phishing.lua 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. if confighelp then
  14. return
  15. end
  16. local rspamd_logger = require "rspamd_logger"
  17. local util = require "rspamd_util"
  18. local lua_util = require "lua_util"
  19. local lua_maps = require "lua_maps"
  20. -- Phishing detection interface for selecting phished urls and inserting corresponding symbol
  21. --
  22. --
  23. local N = 'phishing'
  24. local symbol = 'PHISHED_URL'
  25. local generic_service_symbol = 'PHISHED_GENERIC_SERVICE'
  26. local openphish_symbol = 'PHISHED_OPENPHISH'
  27. local phishtank_symbol = 'PHISHED_PHISHTANK'
  28. local generic_service_name = 'generic service'
  29. local domains = nil
  30. local strict_domains = {}
  31. local exceptions_maps = {}
  32. local generic_service_map = nil
  33. local openphish_map = 'https://www.openphish.com/feed.txt'
  34. local phishtank_suffix = 'phishtank.rspamd.com'
  35. -- Not enabled by default as their feed is quite large
  36. local openphish_premium = false
  37. -- Published via DNS
  38. local phishtank_enabled = false
  39. local generic_service_hash
  40. local openphish_hash
  41. local generic_service_data = {}
  42. local openphish_data = {}
  43. local opts = rspamd_config:get_all_opt(N)
  44. if not (opts and type(opts) == 'table') then
  45. rspamd_logger.infox(rspamd_config, 'Module is unconfigured')
  46. return
  47. end
  48. local function phishing_cb(task)
  49. local function check_phishing_map(map, url, phish_symbol)
  50. local host = url:get_host()
  51. if host then
  52. local elt = map[host]
  53. local found_path = false
  54. local found_query = false
  55. local data = nil
  56. if elt then
  57. local path = url:get_path()
  58. local query = url:get_query()
  59. if path then
  60. for _,d in ipairs(elt) do
  61. if d['path'] == path then
  62. found_path = true
  63. data = d['data']
  64. if query and d['query'] and query == d['query'] then
  65. found_query = true
  66. elseif not d['query'] then
  67. found_query = true
  68. end
  69. end
  70. end
  71. else
  72. for _,d in ipairs(elt) do
  73. if not d['path'] then
  74. found_path = true
  75. end
  76. if query and d['query'] and query == d['query'] then
  77. found_query = true
  78. elseif not d['query'] then
  79. found_query = true
  80. end
  81. end
  82. end
  83. if found_path then
  84. local args
  85. if type(data) == 'table' then
  86. args = {
  87. data['tld'],
  88. data['sector'],
  89. data['brand'],
  90. }
  91. elseif type(data) == 'string' then
  92. args = data
  93. else
  94. args = host
  95. end
  96. if found_query then
  97. -- Query + path match
  98. task:insert_result(phish_symbol, 1.0, args)
  99. else
  100. -- Host + path match
  101. if path then
  102. task:insert_result(phish_symbol, 0.3, args)
  103. end
  104. -- No path, no symbol
  105. end
  106. else
  107. if url:is_phished() then
  108. -- Only host matches
  109. task:insert_result(phish_symbol, 0.1, host)
  110. end
  111. end
  112. end
  113. end
  114. end
  115. local function check_phishing_dns(dns_suffix, url, phish_symbol)
  116. local function compose_dns_query(elts)
  117. local cr = require "rspamd_cryptobox_hash"
  118. local h = cr.create()
  119. for _,elt in ipairs(elts) do h:update(elt) end
  120. return string.format("%s.%s", h:base32():sub(1, 32), dns_suffix)
  121. end
  122. local r = task:get_resolver()
  123. local host = url:get_host()
  124. local path = url:get_path()
  125. local query = url:get_query()
  126. if host and path then
  127. local function host_host_path_cb(_, _, results, err)
  128. if not err and results then
  129. if not query then
  130. task:insert_result(phish_symbol, 1.0, results)
  131. else
  132. task:insert_result(phish_symbol, 0.3, results)
  133. end
  134. end
  135. end
  136. local to_resolve_hp = compose_dns_query({host, path})
  137. rspamd_logger.debugm(N, task, 'try to resolve {%s, %s} -> %s',
  138. host, path, to_resolve_hp)
  139. r:resolve_txt({
  140. task = task,
  141. name = to_resolve_hp,
  142. callback = host_host_path_cb})
  143. if query then
  144. local function host_host_path_query_cb(_, _, results, err)
  145. if not err and results then
  146. task:insert_result(phish_symbol, 1.0, results)
  147. end
  148. end
  149. local to_resolve_hpq = compose_dns_query({host, path, query})
  150. rspamd_logger.debugm(N, task, 'try to resolve {%s, %s, %s} -> %s',
  151. host, path, query, to_resolve_hpq)
  152. r:resolve_txt({
  153. task = task,
  154. name = to_resolve_hpq,
  155. callback = host_host_path_query_cb})
  156. end
  157. end
  158. end
  159. -- Process all urls
  160. local dmarc_dom
  161. local dsym = task:get_symbol('DMARC_POLICY_ALLOW')
  162. if dsym then
  163. dsym = dsym[1] -- legacy stuff, need to take the first element
  164. if dsym.options then
  165. dmarc_dom = dsym.options[1]
  166. end
  167. end
  168. local urls = task:get_urls() or {}
  169. for _,url_iter in ipairs(urls) do
  170. local function do_loop_iter() -- to emulate continue
  171. local url = url_iter
  172. if generic_service_hash then
  173. check_phishing_map(generic_service_data, url, generic_service_symbol)
  174. end
  175. if openphish_hash then
  176. check_phishing_map(openphish_data, url, openphish_symbol)
  177. end
  178. if phishtank_enabled then
  179. check_phishing_dns(phishtank_suffix, url, phishtank_symbol)
  180. end
  181. if url:is_phished() then
  182. local purl
  183. if url:is_redirected() then
  184. local rspamd_url = require "rspamd_url"
  185. -- Examine the real redirect target instead of the url
  186. local redirected_url = url:get_redirected()
  187. if not redirected_url then
  188. return
  189. end
  190. purl = rspamd_url.create(task:get_mempool(), url:get_visible())
  191. url = redirected_url
  192. else
  193. purl = url:get_phished()
  194. end
  195. if not purl then
  196. return
  197. end
  198. local tld = url:get_tld()
  199. local ptld = purl:get_tld()
  200. if not ptld or not tld then
  201. return
  202. end
  203. if dmarc_dom and tld == dmarc_dom then
  204. lua_util.debugm(N, 'exclude phishing from %s -> %s by dmarc domain', tld,
  205. ptld)
  206. return
  207. end
  208. -- Now we can safely remove the last dot component if it is the same
  209. local b,_ = string.find(tld, '%.[^%.]+$')
  210. local b1,_ = string.find(ptld, '%.[^%.]+$')
  211. local stripped_tld,stripped_ptld = tld, ptld
  212. if b1 and b then
  213. if string.sub(tld, b) == string.sub(ptld, b1) then
  214. stripped_ptld = string.gsub(ptld, '%.[^%.]+$', '')
  215. stripped_tld = string.gsub(tld, '%.[^%.]+$', '')
  216. end
  217. if #ptld == 0 or #tld == 0 then
  218. return false
  219. end
  220. end
  221. local weight = 1.0
  222. local spoofed,why = util.is_utf_spoofed(tld, ptld)
  223. if spoofed then
  224. lua_util.debugm(N, task, "confusable: %1 -> %2: %3", tld, ptld, why)
  225. weight = 1.0
  226. else
  227. local dist = util.levenshtein_distance(stripped_tld, stripped_ptld, 2)
  228. dist = 2 * dist / (#stripped_tld + #stripped_ptld)
  229. if dist > 0.3 and dist <= 1.0 then
  230. -- Use distance to penalize the total weight
  231. weight = util.tanh(3 * (1 - dist + 0.1))
  232. elseif dist > 1 then
  233. -- We also check if two labels are in the same ascii/non-ascii representation
  234. local a1, a2 = false,false
  235. if string.match(tld, '^[\001-\127]*$') then a1 = true end
  236. if string.match(ptld, '^[\001-\127]*$') then a2 = true end
  237. if a1 ~= a2 then
  238. weight = 1
  239. lua_util.debugm(N, task, "confusable: %1 -> %2: different characters",
  240. tld, ptld, why)
  241. else
  242. -- We have totally different strings in tld, so penalize it somehow
  243. weight = 0.5
  244. end
  245. end
  246. lua_util.debugm(N, task, "distance: %1 -> %2: %3", tld, ptld, dist)
  247. end
  248. local function is_url_in_map(map, furl)
  249. for _,dn in ipairs({furl:get_tld(), furl:get_host()}) do
  250. if map:get_key(dn) then
  251. return true,dn
  252. end
  253. end
  254. return false
  255. end
  256. local function found_in_map(map, furl, sweight)
  257. if not furl then furl = url end
  258. if not sweight then sweight = weight end
  259. if #map > 0 then
  260. for _,rule in ipairs(map) do
  261. local found,dn = is_url_in_map(rule.map, furl)
  262. if found then
  263. task:insert_result(rule.symbol, sweight, ptld .. '->' .. dn)
  264. return true
  265. end
  266. end
  267. end
  268. end
  269. if not found_in_map(exceptions_maps) then
  270. if not found_in_map(strict_domains, purl, 1.0) then
  271. if domains then
  272. if is_url_in_map(domains, purl) then
  273. task:insert_result(symbol, weight, ptld .. '->' .. tld)
  274. end
  275. else
  276. task:insert_result(symbol, weight, ptld .. '->' .. tld)
  277. end
  278. end
  279. end
  280. end
  281. end
  282. do_loop_iter()
  283. end
  284. end
  285. local function phishing_map(mapname, phishmap, id)
  286. if opts[mapname] then
  287. local xd
  288. if type(opts[mapname]) == 'table' then
  289. xd = opts[mapname]
  290. else
  291. rspamd_logger.errx(rspamd_config, 'invalid exception table')
  292. end
  293. for sym,map_data in pairs(xd) do
  294. local rmap = lua_maps.map_add_from_ucl (map_data, 'set',
  295. 'Phishing ' .. mapname .. ' map')
  296. if rmap then
  297. rspamd_config:register_virtual_symbol(sym, 1, id)
  298. local rule = {symbol = sym, map = rmap}
  299. table.insert(phishmap, rule)
  300. else
  301. rspamd_logger.infox(rspamd_config, 'cannot add map for symbol: %s', sym)
  302. end
  303. end
  304. end
  305. end
  306. local function rspamd_str_split_fun(s, sep, func)
  307. local lpeg = require "lpeg"
  308. sep = lpeg.P(sep)
  309. local elem = lpeg.P((1 - sep)^0 / func)
  310. local p = lpeg.P(elem * (sep * elem)^0)
  311. return p:match(s)
  312. end
  313. local function insert_url_from_string(pool, tbl, str, data)
  314. local rspamd_url = require "rspamd_url"
  315. local u = rspamd_url.create(pool, str)
  316. if u then
  317. local host = u:get_host()
  318. if host then
  319. local elt = {
  320. data = data,
  321. path = u:get_path(),
  322. query = u:get_query()
  323. }
  324. if tbl[host] then
  325. table.insert(tbl[host], elt)
  326. else
  327. tbl[host] = {elt}
  328. end
  329. return true
  330. end
  331. end
  332. return false
  333. end
  334. local function generic_service_plain_cb(string)
  335. local nelts = 0
  336. local new_data = {}
  337. local rspamd_mempool = require "rspamd_mempool"
  338. local pool = rspamd_mempool.create()
  339. local function generic_service_elt_parser(cap)
  340. if insert_url_from_string(pool, new_data, cap, nil) then
  341. nelts = nelts + 1
  342. end
  343. end
  344. rspamd_str_split_fun(string, '\n', generic_service_elt_parser)
  345. generic_service_data = new_data
  346. rspamd_logger.infox(generic_service_hash, "parsed %s elements from %s feed",
  347. nelts, generic_service_name)
  348. pool:destroy()
  349. end
  350. local function openphish_json_cb(string)
  351. local ucl = require "ucl"
  352. local rspamd_mempool = require "rspamd_mempool"
  353. local nelts = 0
  354. local new_json_map = {}
  355. local valid = true
  356. local pool = rspamd_mempool.create()
  357. local function openphish_elt_parser(cap)
  358. if valid then
  359. local parser = ucl.parser()
  360. local res,err = parser:parse_string(cap)
  361. if not res then
  362. valid = false
  363. rspamd_logger.warnx(openphish_hash, 'cannot parse openphish map: ' .. err)
  364. else
  365. local obj = parser:get_object()
  366. if obj['url'] then
  367. if insert_url_from_string(pool, new_json_map, obj['url'], obj) then
  368. nelts = nelts + 1
  369. end
  370. end
  371. end
  372. end
  373. end
  374. rspamd_str_split_fun(string, '\n', openphish_elt_parser)
  375. if valid then
  376. openphish_data = new_json_map
  377. rspamd_logger.infox(openphish_hash, "parsed %s elements from openphish feed",
  378. nelts)
  379. end
  380. pool:destroy()
  381. end
  382. local function openphish_plain_cb(s)
  383. local nelts = 0
  384. local new_data = {}
  385. local rspamd_mempool = require "rspamd_mempool"
  386. local pool = rspamd_mempool.create()
  387. local function openphish_elt_parser(cap)
  388. if insert_url_from_string(pool, new_data, cap, nil) then
  389. nelts = nelts + 1
  390. end
  391. end
  392. rspamd_str_split_fun(s, '\n', openphish_elt_parser)
  393. openphish_data = new_data
  394. rspamd_logger.infox(openphish_hash, "parsed %s elements from openphish feed",
  395. nelts)
  396. pool:destroy()
  397. end
  398. if opts then
  399. local id
  400. if opts['symbol'] then
  401. symbol = opts['symbol']
  402. -- Register symbol's callback
  403. id = rspamd_config:register_symbol({
  404. name = symbol,
  405. callback = phishing_cb
  406. })
  407. -- To exclude from domains for dmarc verified messages
  408. rspamd_config:register_dependency(symbol, 'DMARC_CHECK')
  409. if opts['generic_service_symbol'] then
  410. generic_service_symbol = opts['generic_service_symbol']
  411. end
  412. if opts['generic_service_map'] then
  413. generic_service_map = opts['generic_service_map']
  414. end
  415. if opts['generic_service_url'] then
  416. generic_service_map = opts['generic_service_url']
  417. end
  418. if opts['generic_service_name'] then
  419. generic_service_name = opts['generic_service_name']
  420. end
  421. if opts['generic_service_enabled'] then
  422. generic_service_hash = rspamd_config:add_map({
  423. type = 'callback',
  424. url = generic_service_map,
  425. callback = generic_service_plain_cb,
  426. description = 'Generic feed'
  427. })
  428. end
  429. if opts['openphish_map'] then
  430. openphish_map = opts['openphish_map']
  431. end
  432. if opts['openphish_url'] then
  433. openphish_map = opts['openphish_url']
  434. end
  435. if opts['openphish_premium'] then
  436. openphish_premium = true
  437. end
  438. if opts['openphish_enabled'] then
  439. if not openphish_premium then
  440. openphish_hash = rspamd_config:add_map({
  441. type = 'callback',
  442. url = openphish_map,
  443. callback = openphish_plain_cb,
  444. description = 'Open phishing feed map (see https://www.openphish.com for details)',
  445. opaque_data = true,
  446. })
  447. else
  448. openphish_hash = rspamd_config:add_map({
  449. type = 'callback',
  450. url = openphish_map,
  451. callback = openphish_json_cb,
  452. opaque_data = true,
  453. description = 'Open phishing premium feed map (see https://www.openphish.com for details)'
  454. })
  455. end
  456. end
  457. if opts['phishtank_enabled'] then
  458. phishtank_enabled = true
  459. if opts['phishtank_suffix'] then
  460. phishtank_suffix = opts['phishtank_suffix']
  461. end
  462. end
  463. rspamd_config:register_symbol({
  464. type = 'virtual',
  465. parent = id,
  466. name = generic_service_symbol,
  467. })
  468. rspamd_config:register_symbol({
  469. type = 'virtual',
  470. parent = id,
  471. name = openphish_symbol,
  472. })
  473. rspamd_config:register_symbol({
  474. type = 'virtual',
  475. parent = id,
  476. name = phishtank_symbol,
  477. })
  478. end
  479. if opts['domains'] and type(opts['domains']) == 'string' then
  480. domains = lua_maps.map_add_from_ucl(opts['domains'], 'set',
  481. 'Phishing domains')
  482. end
  483. phishing_map('strict_domains', strict_domains, id)
  484. phishing_map('exceptions', exceptions_maps, id)
  485. end