You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

misc.lua 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. --[[
  2. Copyright (c) 2011-2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This is main lua config file for rspamd
  14. local E = {}
  15. local fun = require "fun"
  16. local util = require "rspamd_util"
  17. local rspamd_regexp = require "rspamd_regexp"
  18. local rspamd_lua_utils = require "lua_util"
  19. -- Different text parts
  20. rspamd_config.R_PARTS_DIFFER = {
  21. callback = function(task)
  22. local distance = task:get_mempool():get_variable('parts_distance', 'double')
  23. if distance then
  24. local nd = tonumber(distance)
  25. -- ND is relation of different words to total words
  26. if nd >= 0.5 then
  27. local tw = task:get_mempool():get_variable('total_words', 'int')
  28. if tw then
  29. local score
  30. if tw > 30 then
  31. -- We are confident about difference
  32. score = (nd - 0.5) * 2.0
  33. else
  34. -- We are not so confident about difference
  35. score = (nd - 0.5)
  36. end
  37. task:insert_result('R_PARTS_DIFFER', score,
  38. string.format('%.1f%%', tostring(100.0 * nd)))
  39. end
  40. end
  41. end
  42. return false
  43. end,
  44. score = 1.0,
  45. description = 'Text and HTML parts differ',
  46. group = 'body'
  47. }
  48. -- Date issues
  49. rspamd_config.MISSING_DATE = {
  50. callback = function(task)
  51. local date = task:get_header_raw('Date')
  52. if date == nil or date == '' then
  53. return true
  54. end
  55. return false
  56. end,
  57. score = 1.0,
  58. description = 'Message date is missing',
  59. group = 'date'
  60. }
  61. rspamd_config.DATE_IN_FUTURE = {
  62. callback = function(task)
  63. local dm = task:get_date{format = 'message', gmt = true}
  64. local dt = task:get_date{format = 'connect', gmt = true}
  65. -- 2 hours
  66. if dm > 0 and dm - dt > 7200 then
  67. return true
  68. end
  69. return false
  70. end,
  71. score = 4.0,
  72. description = 'Message date is in future',
  73. group = 'date'
  74. }
  75. rspamd_config.DATE_IN_PAST = {
  76. callback = function(task)
  77. local dm = task:get_date{format = 'message', gmt = true}
  78. local dt = task:get_date{format = 'connect', gmt = true}
  79. -- A day
  80. if dm > 0 and dt - dm > 86400 then
  81. return true
  82. end
  83. return false
  84. end,
  85. score = 1.0,
  86. description = 'Message date is in past',
  87. group = 'date'
  88. }
  89. rspamd_config.R_SUSPICIOUS_URL = {
  90. callback = function(task)
  91. local urls = task:get_urls()
  92. if urls then
  93. for _,u in ipairs(urls) do
  94. if u:is_obscured() then
  95. task:insert_result('R_SUSPICIOUS_URL', 1.0, u:get_host())
  96. end
  97. end
  98. end
  99. return false
  100. end,
  101. score = 5.0,
  102. one_shot = true,
  103. description = 'Obfusicated or suspicious URL has been found in a message',
  104. group = 'url'
  105. }
  106. rspamd_config.ENVFROM_PRVS = {
  107. callback = function (task)
  108. --[[
  109. Detect PRVS/BATV addresses to avoid FORGED_SENDER
  110. https://en.wikipedia.org/wiki/Bounce_Address_Tag_Validation
  111. Signature syntax:
  112. prvs=TAG=USER@example.com BATV draft (https://tools.ietf.org/html/draft-levine-smtp-batv-01)
  113. prvs=USER=TAG@example.com
  114. btv1==TAG==USER@example.com Barracuda appliance
  115. msprvs1=TAG=USER@example.com Sparkpost email delivery service
  116. ]]--
  117. if not (task:has_from(1) and task:has_from(2)) then
  118. return false
  119. end
  120. local envfrom = task:get_from(1)
  121. local re_text = '^(?:(prvs|msprvs1)=([^=]+)=|btv1==[^=]+==)(.+@(.+))$'
  122. local re = rspamd_regexp.create_cached(re_text)
  123. local c = re:search(envfrom[1].addr:lower(), false, true)
  124. if not c then return false end
  125. local ef = c[1][4]
  126. -- See if it matches the From header
  127. local from = task:get_from(2)
  128. if ef == from[1].addr:lower() then
  129. return true
  130. end
  131. -- Check for prvs=USER=TAG@example.com
  132. local t = c[1][2]
  133. if t == 'prvs' then
  134. local efr = c[1][3] .. '@' .. c[1][5]
  135. if efr == from[1].addr:lower() then
  136. return true
  137. end
  138. end
  139. return false
  140. end,
  141. score = 0.0,
  142. description = "Envelope From is a PRVS address that matches the From address",
  143. group = 'prvs'
  144. }
  145. rspamd_config.ENVFROM_VERP = {
  146. callback = function (task)
  147. if not (task:has_from(1) and task:has_recipients(1)) then
  148. return false
  149. end
  150. local envfrom = task:get_from(1)
  151. local envrcpts = task:get_recipients(1)
  152. -- VERP only works for single recipient messages
  153. if #envrcpts > 1 then return false end
  154. -- Get recipient and compute VERP address
  155. local rcpt = envrcpts[1].addr:lower()
  156. local verp = rcpt:gsub('@','=')
  157. -- Get the user portion of the envfrom
  158. local ef_user = envfrom[1].user:lower()
  159. -- See if the VERP representation of the recipient appears in it
  160. if ef_user:find(verp, 1, true)
  161. and not ef_user:find('+caf_=' .. verp, 1, true) -- Google Forwarding
  162. and not ef_user:find('^srs[01]=') -- SRS
  163. then
  164. return true
  165. end
  166. return false
  167. end,
  168. score = 0.0,
  169. description = "Envelope From is a VERP address",
  170. group = "mailing_list"
  171. }
  172. local check_rcvd = rspamd_config:register_symbol{
  173. name = 'CHECK_RCVD',
  174. callback = function (task)
  175. local rcvds = task:get_received_headers()
  176. if not rcvds then return false end
  177. local all_tls = fun.all(function(rc)
  178. return rc.flags and rc.flags['ssl']
  179. end, fun.filter(function(rc)
  180. return rc.by_hostname and rc.by_hostname ~= 'localhost'
  181. end, rcvds))
  182. -- See if only the last hop was encrypted
  183. if all_tls then
  184. task:insert_result('RCVD_TLS_ALL', 1.0)
  185. else
  186. local rcvd = rcvds[1]
  187. if rcvd.by_hostname and rcvd.by_hostname == 'localhost' then
  188. -- Ignore artificial header from Rmilter
  189. rcvd = rcvds[2] or {}
  190. end
  191. if rcvd.flags and rcvd.flags['ssl'] then
  192. task:insert_result('RCVD_TLS_LAST', 1.0)
  193. else
  194. task:insert_result('RCVD_NO_TLS_LAST', 1.0)
  195. end
  196. end
  197. local auth = fun.any(function(rc)
  198. return rc.flags and rc.flags['authenticated']
  199. end, rcvds)
  200. if auth then
  201. task:insert_result('RCVD_VIA_SMTP_AUTH', 1.0)
  202. end
  203. end
  204. }
  205. rspamd_config:register_symbol{
  206. type = 'virtual',
  207. parent = check_rcvd,
  208. name = 'RCVD_TLS_ALL',
  209. description = 'All hops used encrypted transports',
  210. score = 0.0,
  211. group = 'encryption'
  212. }
  213. rspamd_config:register_symbol{
  214. type = 'virtual',
  215. parent = check_rcvd,
  216. name = 'RCVD_TLS_LAST',
  217. description = 'Last hop used encrypted transports',
  218. score = 0.0,
  219. group = 'encryption'
  220. }
  221. rspamd_config:register_symbol{
  222. type = 'virtual',
  223. parent = check_rcvd,
  224. name = 'RCVD_NO_TLS_LAST',
  225. description = 'Last hop did not use encrypted transports',
  226. score = 0.0,
  227. group = 'encryption'
  228. }
  229. rspamd_config:register_symbol{
  230. type = 'virtual',
  231. parent = check_rcvd,
  232. name = 'RCVD_VIA_SMTP_AUTH',
  233. -- NB This does not mean sender was authenticated; see task:get_user()
  234. description = 'Authenticated hand-off was seen in Received headers',
  235. score = 0.0,
  236. group = 'authentication'
  237. }
  238. rspamd_config.RCVD_HELO_USER = {
  239. callback = function (task)
  240. -- Check HELO argument from MTA
  241. local helo = task:get_helo()
  242. if (helo and helo:lower():find('^user$')) then
  243. return true
  244. end
  245. -- Check Received headers
  246. local rcvds = task:get_header_full('Received')
  247. if not rcvds then return false end
  248. for _, rcvd in ipairs(rcvds) do
  249. local r = rcvd['decoded']:lower()
  250. if (r:find("^%s*from%suser%s")) then return true end
  251. if (r:find("helo[%s=]user[%s%)]")) then return true end
  252. end
  253. end,
  254. description = 'HELO User spam pattern',
  255. score = 3.0
  256. }
  257. rspamd_config.URI_COUNT_ODD = {
  258. callback = function (task)
  259. local ct = task:get_header('Content-Type')
  260. if (ct and ct:lower():find('^multipart/alternative')) then
  261. local urls = task:get_urls() or {}
  262. local nurls = fun.filter(function(url)
  263. return not url:is_html_displayed()
  264. end, urls):foldl(function(acc, val) return acc + val:get_count() end, 0)
  265. if nurls % 2 == 1 then
  266. return true, 1.0, tostring(nurls)
  267. end
  268. end
  269. end,
  270. description = 'Odd number of URIs in multipart/alternative message',
  271. score = 1.0
  272. }
  273. rspamd_config.HAS_ATTACHMENT = {
  274. callback = function (task)
  275. local parts = task:get_parts()
  276. if parts and #parts > 1 then
  277. for _, p in ipairs(parts) do
  278. local cd = p:get_header('Content-Disposition')
  279. if (cd and cd:lower():match('^attachment')) then
  280. return true
  281. end
  282. end
  283. end
  284. end,
  285. description = 'Message contains attachments'
  286. }
  287. -- Requires freemail maps loaded in multimap
  288. local function freemail_reply_neq_from(task)
  289. local frt = task:get_symbol('FREEMAIL_REPLYTO')
  290. local ff = task:get_symbol('FREEMAIL_FROM')
  291. if (frt and ff and frt['options'] and ff['options'] and
  292. frt['options'][1] ~= ff['options'][1])
  293. then
  294. return true
  295. end
  296. return false
  297. end
  298. local freemail_reply_neq_from_id = rspamd_config:register_symbol({
  299. name = 'FREEMAIL_REPLYTO_NEQ_FROM_DOM',
  300. callback = freemail_reply_neq_from,
  301. description = 'Freemail From and Reply-To, but to different Freemail services',
  302. score = 3.0
  303. })
  304. rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_REPLYTO')
  305. rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM')
  306. rspamd_config.OMOGRAPH_URL = {
  307. callback = function(task)
  308. local urls = task:get_urls()
  309. if urls then
  310. local bad_omographs = 0
  311. local single_bad_omograps = 0
  312. local bad_urls = {}
  313. fun.each(function(u)
  314. if u:is_phished() then
  315. local h1 = u:get_host()
  316. local h2 = u:get_phished():get_host()
  317. if h1 and h2 then
  318. if util.is_utf_spoofed(h1, h2) then
  319. table.insert(bad_urls, string.format('%s->%s', h1, h2))
  320. bad_omographs = bad_omographs + 1
  321. end
  322. end
  323. end
  324. if not u:is_html_displayed() then
  325. local h = u:get_tld()
  326. if h then
  327. if util.is_utf_spoofed(h) then
  328. table.insert(bad_urls, string.format('%s', h))
  329. single_bad_omograps = single_bad_omograps + 1
  330. end
  331. end
  332. end
  333. end, urls)
  334. if bad_omographs > 0 then
  335. return true, 1.0, bad_urls
  336. elseif single_bad_omograps > 0 then
  337. return true, 0.5, bad_urls
  338. end
  339. end
  340. return false
  341. end,
  342. score = 5.0,
  343. description = 'Url contains both latin and non-latin characters'
  344. }
  345. rspamd_config.URL_IN_SUBJECT = {
  346. callback = function(task)
  347. local urls = task:get_urls()
  348. if urls then
  349. for _,u in ipairs(urls) do
  350. if u:is_subject() then
  351. local subject = task:get_subject()
  352. if subject then
  353. if tostring(u) == subject then
  354. return true,1.0,u:get_host()
  355. end
  356. end
  357. return true,0.25,u:get_host()
  358. end
  359. end
  360. end
  361. return false
  362. end,
  363. score = 4.0,
  364. description = 'Url found in Subject'
  365. }
  366. local aliases_id = rspamd_config:register_symbol{
  367. type = 'prefilter',
  368. name = 'EMAIL_PLUS_ALIASES',
  369. callback = function(task)
  370. local function check_from(type)
  371. if task:has_from(type) then
  372. local addr = task:get_from(type)[1]
  373. local na,tags = rspamd_lua_utils.remove_email_aliases(addr)
  374. if na then
  375. task:set_from(type, addr)
  376. task:insert_result('TAGGED_FROM', 1.0, fun.totable(
  377. fun.filter(function(t) return t and #t > 0 end, tags)))
  378. end
  379. end
  380. end
  381. check_from('smtp')
  382. check_from('mime')
  383. local function check_rcpt(type)
  384. if task:has_recipients(type) then
  385. local modified = false
  386. local all_tags = {}
  387. local addrs = task:get_recipients(type)
  388. for _, addr in ipairs(addrs) do
  389. local na,tags = rspamd_lua_utils.remove_email_aliases(addr)
  390. if na then
  391. modified = true
  392. fun.each(function(t) table.insert(all_tags, t) end,
  393. fun.filter(function(t) return t and #t > 0 end, tags))
  394. end
  395. end
  396. if modified then
  397. task:set_recipients(type, addrs)
  398. task:insert_result('TAGGED_RCPT', 1.0, all_tags)
  399. end
  400. end
  401. end
  402. check_rcpt('smtp')
  403. check_rcpt('mime')
  404. end,
  405. priority = 150,
  406. description = 'Removes plus aliases from the email',
  407. }
  408. rspamd_config:register_symbol{
  409. type = 'virtual',
  410. parent = aliases_id,
  411. name = 'TAGGED_RCPT',
  412. description = 'SMTP recipients have plus tags',
  413. score = 0,
  414. }
  415. rspamd_config:register_symbol{
  416. type = 'virtual',
  417. parent = aliases_id,
  418. name = 'TAGGED_FROM',
  419. description = 'SMTP from has plus tags',
  420. score = 0,
  421. }
  422. local check_from_display_name = rspamd_config:register_symbol{
  423. type = 'callback',
  424. callback = function (task)
  425. local from = task:get_from(2)
  426. if not (from and from[1] and from[1].name) then return false end
  427. -- See if we can parse an email address from the name
  428. local parsed = util.parse_mail_address(from[1].name)
  429. if not parsed then return false end
  430. if not (parsed[1] and parsed[1]['addr']) then return false end
  431. -- Make sure we did not mistake e.g. <something>@<name> for an email address
  432. if not parsed[1]['domain'] or not parsed[1]['domain']:find('%.') then return false end
  433. -- See if the parsed domains differ
  434. if not util.strequal_caseless(from[1]['domain'], parsed[1]['domain']) then
  435. -- See if the destination domain is the same as the spoof
  436. local mto = task:get_recipients(2)
  437. local sto = task:get_recipients(1)
  438. if mto then
  439. for _, to in ipairs(mto) do
  440. if to['domain'] ~= '' and util.strequal_caseless(to['domain'], parsed[1]['domain']) then
  441. task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
  442. return false
  443. end
  444. end
  445. end
  446. if sto then
  447. for _, to in ipairs(sto) do
  448. if to['domain'] ~= '' and util.strequal_caseless(to['domain'], parsed[1]['domain']) then
  449. task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
  450. return false
  451. end
  452. end
  453. end
  454. task:insert_result('FROM_NEQ_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
  455. end
  456. return false
  457. end,
  458. }
  459. rspamd_config:register_symbol{
  460. type = 'virtual',
  461. parent = check_from_display_name,
  462. name = 'SPOOF_DISPLAY_NAME',
  463. description = 'Display name is being used to spoof and trick the recipient',
  464. score = 8,
  465. }
  466. rspamd_config:register_symbol{
  467. type = 'virtual',
  468. parent = check_from_display_name,
  469. name = 'FROM_NEQ_DISPLAY_NAME',
  470. description = 'Display name contains an email address different to the From address',
  471. score = 4,
  472. }
  473. rspamd_config.SPOOF_REPLYTO = {
  474. callback = function (task)
  475. -- First check for a Reply-To header
  476. local rt = task:get_header_full('Reply-To')
  477. if not rt or not rt[1] then return false end
  478. -- Get From and To headers
  479. rt = rt[1]['value']
  480. local from = task:get_from(2)
  481. local to = task:get_recipients(2)
  482. if not (from and from[1] and from[1].addr) then return false end
  483. if (to and to[1] and to[1].addr) then
  484. -- Handle common case for Web Contact forms of From = To
  485. if util.strequal_caseless(from[1].addr, to[1].addr) then
  486. return false
  487. end
  488. end
  489. -- SMTP recipients must contain From domain
  490. to = task:get_recipients(1)
  491. if not to then return false end
  492. -- Try mitigate some possible FPs on mailing list posts
  493. if #to == 1 and util.strequal_caseless(to[1].addr, from[1].addr) then return false end
  494. local found_fromdom = false
  495. for _, t in ipairs(to) do
  496. if util.strequal_caseless(t.domain, from[1].domain) then
  497. found_fromdom = true
  498. break
  499. end
  500. end
  501. if not found_fromdom then return false end
  502. -- Parse Reply-To header
  503. local parsed = ((util.parse_mail_address(rt) or E)[1] or E).domain
  504. if not parsed then return false end
  505. -- Reply-To domain must be different to From domain
  506. if not util.strequal_caseless(parsed, from[1].domain) then
  507. return true, from[1].domain, parsed
  508. end
  509. return false
  510. end,
  511. description = 'Reply-To is being used to spoof and trick the recipient to send an off-domain reply',
  512. score = 6.0
  513. }
  514. rspamd_config.INFO_TO_INFO_LU = {
  515. callback = function(task)
  516. local lu = task:get_header('List-Unsubscribe')
  517. if not lu then return false end
  518. local from = task:get_from('mime')
  519. if not (from and from[1] and util.strequal_caseless(from[1].user, 'info')) then
  520. return false
  521. end
  522. local to = task:get_recipients('smtp')
  523. if not to then return false end
  524. local found = false
  525. for _,r in ipairs(to) do
  526. if util.strequal_caseless(r['user'], 'info') then
  527. found = true
  528. end
  529. end
  530. if found then return true end
  531. return false
  532. end,
  533. description = 'info@ From/To address with List-Unsubscribe headers',
  534. score = 2.0
  535. }
  536. -- Detects bad content-transfer-encoding for text parts
  537. rspamd_config.R_BAD_CTE_7BIT = {
  538. callback = function(task)
  539. local tp = task:get_text_parts() or {}
  540. for _,p in ipairs(tp) do
  541. local cte = p:get_mimepart():get_cte() or ''
  542. if cte ~= '8bit' and p:has_8bit_raw() then
  543. local _,_,attrs = p:get_mimepart():get_type_full()
  544. local mul = 1.0
  545. local params = {cte}
  546. if attrs then
  547. if attrs.charset and attrs.charset:lower() == "utf-8" then
  548. -- Penalise rule as people don't know that utf8 is surprisingly
  549. -- eight bit encoding
  550. mul = 0.3
  551. table.insert(params, "utf8")
  552. end
  553. end
  554. return true,mul,params
  555. end
  556. end
  557. return false
  558. end,
  559. score = 3.5,
  560. description = 'Detects bad content-transfer-encoding for text parts',
  561. group = 'header'
  562. }