You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

misc.lua 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641
  1. --[[
  2. Copyright (c) 2011-2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- This is main lua config file for rspamd
  14. local E = {}
  15. local fun = require "fun"
  16. local util = require "rspamd_util"
  17. local rspamd_regexp = require "rspamd_regexp"
  18. local rspamd_lua_utils = require "lua_util"
  19. -- Different text parts
  20. rspamd_config.R_PARTS_DIFFER = {
  21. callback = function(task)
  22. local distance = task:get_mempool():get_variable('parts_distance', 'double')
  23. if distance then
  24. local nd = tonumber(distance)
  25. -- ND is relation of different words to total words
  26. if nd >= 0.5 then
  27. local tw = task:get_mempool():get_variable('total_words', 'int')
  28. if tw then
  29. local score
  30. if tw > 30 then
  31. -- We are confident about difference
  32. score = (nd - 0.5) * 2.0
  33. else
  34. -- We are not so confident about difference
  35. score = (nd - 0.5)
  36. end
  37. task:insert_result('R_PARTS_DIFFER', score,
  38. string.format('%.1f%%', tostring(100.0 * nd)))
  39. end
  40. end
  41. end
  42. return false
  43. end,
  44. score = 1.0,
  45. description = 'Text and HTML parts differ',
  46. group = 'body'
  47. }
  48. -- Date issues
  49. rspamd_config.MISSING_DATE = {
  50. callback = function(task)
  51. local date = task:get_header_raw('Date')
  52. if date == nil or date == '' then
  53. return true
  54. end
  55. return false
  56. end,
  57. score = 1.0,
  58. description = 'Message date is missing',
  59. group = 'headers'
  60. }
  61. rspamd_config.DATE_IN_FUTURE = {
  62. callback = function(task)
  63. local dm = task:get_date{format = 'message', gmt = true}
  64. local dt = task:get_date{format = 'connect', gmt = true}
  65. -- 2 hours
  66. if dm > 0 and dm - dt > 7200 then
  67. return true
  68. end
  69. return false
  70. end,
  71. score = 4.0,
  72. description = 'Message date is in future',
  73. group = 'headers'
  74. }
  75. rspamd_config.DATE_IN_PAST = {
  76. callback = function(task)
  77. local dm = task:get_date{format = 'message', gmt = true}
  78. local dt = task:get_date{format = 'connect', gmt = true}
  79. -- A day
  80. if dm > 0 and dt - dm > 86400 then
  81. return true
  82. end
  83. return false
  84. end,
  85. score = 1.0,
  86. description = 'Message date is in past',
  87. group = 'headers'
  88. }
  89. rspamd_config.R_SUSPICIOUS_URL = {
  90. callback = function(task)
  91. local urls = task:get_urls()
  92. if urls then
  93. for _,u in ipairs(urls) do
  94. if u:is_obscured() then
  95. task:insert_result('R_SUSPICIOUS_URL', 1.0, u:get_host())
  96. end
  97. end
  98. end
  99. return false
  100. end,
  101. score = 5.0,
  102. one_shot = true,
  103. description = 'Obfusicated or suspicious URL has been found in a message',
  104. group = 'url'
  105. }
  106. rspamd_config.ENVFROM_PRVS = {
  107. callback = function (task)
  108. --[[
  109. Detect PRVS/BATV addresses to avoid FORGED_SENDER
  110. https://en.wikipedia.org/wiki/Bounce_Address_Tag_Validation
  111. Signature syntax:
  112. prvs=TAG=USER@example.com BATV draft (https://tools.ietf.org/html/draft-levine-smtp-batv-01)
  113. prvs=USER=TAG@example.com
  114. btv1==TAG==USER@example.com Barracuda appliance
  115. msprvs1=TAG=USER@example.com Sparkpost email delivery service
  116. ]]--
  117. if not (task:has_from(1) and task:has_from(2)) then
  118. return false
  119. end
  120. local envfrom = task:get_from(1)
  121. local re_text = '^(?:(prvs|msprvs1)=([^=]+)=|btv1==[^=]+==)(.+@(.+))$'
  122. local re = rspamd_regexp.create_cached(re_text)
  123. local c = re:search(envfrom[1].addr:lower(), false, true)
  124. if not c then return false end
  125. local ef = c[1][4]
  126. -- See if it matches the From header
  127. local from = task:get_from(2)
  128. if ef == from[1].addr:lower() then
  129. return true
  130. end
  131. -- Check for prvs=USER=TAG@example.com
  132. local t = c[1][2]
  133. if t == 'prvs' then
  134. local efr = c[1][3] .. '@' .. c[1][5]
  135. if efr == from[1].addr:lower() then
  136. return true
  137. end
  138. end
  139. return false
  140. end,
  141. score = 0.0,
  142. description = "Envelope From is a PRVS address that matches the From address",
  143. group = 'headers'
  144. }
  145. rspamd_config.ENVFROM_VERP = {
  146. callback = function (task)
  147. if not (task:has_from(1) and task:has_recipients(1)) then
  148. return false
  149. end
  150. local envfrom = task:get_from(1)
  151. local envrcpts = task:get_recipients(1)
  152. -- VERP only works for single recipient messages
  153. if #envrcpts > 1 then return false end
  154. -- Get recipient and compute VERP address
  155. local rcpt = envrcpts[1].addr:lower()
  156. local verp = rcpt:gsub('@','=')
  157. -- Get the user portion of the envfrom
  158. local ef_user = envfrom[1].user:lower()
  159. -- See if the VERP representation of the recipient appears in it
  160. if ef_user:find(verp, 1, true)
  161. and not ef_user:find('+caf_=' .. verp, 1, true) -- Google Forwarding
  162. and not ef_user:find('^srs[01]=') -- SRS
  163. then
  164. return true
  165. end
  166. return false
  167. end,
  168. score = 0.0,
  169. description = "Envelope From is a VERP address",
  170. group = "headers"
  171. }
  172. local check_rcvd = rspamd_config:register_symbol{
  173. name = 'CHECK_RCVD',
  174. callback = function (task)
  175. local rcvds = task:get_received_headers()
  176. if not rcvds then return false end
  177. local all_tls = fun.all(function(rc)
  178. return rc.flags and rc.flags['ssl']
  179. end, fun.filter(function(rc)
  180. return rc.by_hostname and rc.by_hostname ~= 'localhost'
  181. end, rcvds))
  182. -- See if only the last hop was encrypted
  183. if all_tls then
  184. task:insert_result('RCVD_TLS_ALL', 1.0)
  185. else
  186. local rcvd = rcvds[1]
  187. if rcvd.by_hostname and rcvd.by_hostname == 'localhost' then
  188. -- Ignore artificial header from Rmilter
  189. rcvd = rcvds[2] or {}
  190. end
  191. if rcvd.flags and rcvd.flags['ssl'] then
  192. task:insert_result('RCVD_TLS_LAST', 1.0)
  193. else
  194. task:insert_result('RCVD_NO_TLS_LAST', 1.0)
  195. end
  196. end
  197. local auth = fun.any(function(rc)
  198. return rc.flags and rc.flags['authenticated']
  199. end, rcvds)
  200. if auth then
  201. task:insert_result('RCVD_VIA_SMTP_AUTH', 1.0)
  202. end
  203. end
  204. }
  205. rspamd_config:register_symbol{
  206. type = 'virtual',
  207. parent = check_rcvd,
  208. name = 'RCVD_TLS_ALL',
  209. description = 'All hops used encrypted transports',
  210. score = 0.0,
  211. group = 'headers'
  212. }
  213. rspamd_config:register_symbol{
  214. type = 'virtual',
  215. parent = check_rcvd,
  216. name = 'RCVD_TLS_LAST',
  217. description = 'Last hop used encrypted transports',
  218. score = 0.0,
  219. group = 'headers'
  220. }
  221. rspamd_config:register_symbol{
  222. type = 'virtual',
  223. parent = check_rcvd,
  224. name = 'RCVD_NO_TLS_LAST',
  225. description = 'Last hop did not use encrypted transports',
  226. score = 0.0,
  227. group = 'headers'
  228. }
  229. rspamd_config:register_symbol{
  230. type = 'virtual',
  231. parent = check_rcvd,
  232. name = 'RCVD_VIA_SMTP_AUTH',
  233. -- NB This does not mean sender was authenticated; see task:get_user()
  234. description = 'Authenticated hand-off was seen in Received headers',
  235. score = 0.0,
  236. group = 'headers'
  237. }
  238. rspamd_config.RCVD_HELO_USER = {
  239. callback = function (task)
  240. -- Check HELO argument from MTA
  241. local helo = task:get_helo()
  242. if (helo and helo:lower():find('^user$')) then
  243. return true
  244. end
  245. -- Check Received headers
  246. local rcvds = task:get_header_full('Received')
  247. if not rcvds then return false end
  248. for _, rcvd in ipairs(rcvds) do
  249. local r = rcvd['decoded']:lower()
  250. if (r:find("^%s*from%suser%s")) then return true end
  251. if (r:find("helo[%s=]user[%s%)]")) then return true end
  252. end
  253. end,
  254. description = 'HELO User spam pattern',
  255. group = 'headers',
  256. score = 3.0
  257. }
  258. rspamd_config.URI_COUNT_ODD = {
  259. callback = function (task)
  260. local ct = task:get_header('Content-Type')
  261. if (ct and ct:lower():find('^multipart/alternative')) then
  262. local urls = task:get_urls() or {}
  263. local nurls = fun.filter(function(url)
  264. return not url:is_html_displayed()
  265. end, urls):foldl(function(acc, val) return acc + val:get_count() end, 0)
  266. if nurls % 2 == 1 then
  267. return true, 1.0, tostring(nurls)
  268. end
  269. end
  270. end,
  271. description = 'Odd number of URIs in multipart/alternative message',
  272. score = 1.0,
  273. group = 'url',
  274. }
  275. rspamd_config.HAS_ATTACHMENT = {
  276. callback = function (task)
  277. local parts = task:get_parts()
  278. if parts and #parts > 1 then
  279. for _, p in ipairs(parts) do
  280. local cd = p:get_header('Content-Disposition')
  281. if (cd and cd:lower():match('^attachment')) then
  282. return true
  283. end
  284. end
  285. end
  286. end,
  287. description = 'Message contains attachments',
  288. group = 'body',
  289. }
  290. -- Requires freemail maps loaded in multimap
  291. local function freemail_reply_neq_from(task)
  292. local frt = task:get_symbol('FREEMAIL_REPLYTO')
  293. local ff = task:get_symbol('FREEMAIL_FROM')
  294. if (frt and ff and frt['options'] and ff['options'] and
  295. frt['options'][1] ~= ff['options'][1])
  296. then
  297. return true
  298. end
  299. return false
  300. end
  301. rspamd_config:register_symbol({
  302. name = 'FREEMAIL_REPLYTO_NEQ_FROM_DOM',
  303. callback = freemail_reply_neq_from,
  304. description = 'Freemail From and Reply-To, but to different Freemail services',
  305. score = 3.0,
  306. group = 'headers',
  307. })
  308. rspamd_config:register_dependency('FREEMAIL_REPLYTO_NEQ_FROM_DOM', 'FREEMAIL_REPLYTO')
  309. rspamd_config:register_dependency('FREEMAIL_REPLYTO_NEQ_FROM_DOM', 'FREEMAIL_FROM')
  310. rspamd_config.OMOGRAPH_URL = {
  311. callback = function(task)
  312. local urls = task:get_urls()
  313. if urls then
  314. local bad_omographs = 0
  315. local single_bad_omograps = 0
  316. local bad_urls = {}
  317. fun.each(function(u)
  318. if u:is_phished() then
  319. local h1 = u:get_host()
  320. local h2 = u:get_phished():get_host()
  321. if h1 and h2 then
  322. if util.is_utf_spoofed(h1, h2) then
  323. table.insert(bad_urls, string.format('%s->%s', h1, h2))
  324. bad_omographs = bad_omographs + 1
  325. end
  326. end
  327. end
  328. if not u:is_html_displayed() then
  329. local h = u:get_tld()
  330. if h then
  331. if util.is_utf_spoofed(h) then
  332. table.insert(bad_urls, string.format('%s', h))
  333. single_bad_omograps = single_bad_omograps + 1
  334. end
  335. end
  336. end
  337. end, urls)
  338. if bad_omographs > 0 then
  339. return true, 1.0, bad_urls
  340. elseif single_bad_omograps > 0 then
  341. return true, 0.5, bad_urls
  342. end
  343. end
  344. return false
  345. end,
  346. score = 5.0,
  347. group = 'url',
  348. description = 'Url contains both latin and non-latin characters'
  349. }
  350. rspamd_config.URL_IN_SUBJECT = {
  351. callback = function(task)
  352. local urls = task:get_urls()
  353. if urls then
  354. for _,u in ipairs(urls) do
  355. local flags = u:get_flags()
  356. if flags.subject then
  357. if flags.schemaless then
  358. return true,0.1,u:get_host()
  359. end
  360. local subject = task:get_subject()
  361. if subject then
  362. if tostring(u) == subject then
  363. return true,1.0,u:get_host()
  364. end
  365. end
  366. return true,0.25,u:get_host()
  367. end
  368. end
  369. end
  370. return false
  371. end,
  372. score = 4.0,
  373. group = 'subject',
  374. description = 'Url found in Subject'
  375. }
  376. local aliases_id = rspamd_config:register_symbol{
  377. type = 'prefilter',
  378. name = 'EMAIL_PLUS_ALIASES',
  379. callback = function(task)
  380. local function check_from(type)
  381. if task:has_from(type) then
  382. local addr = task:get_from(type)[1]
  383. local na,tags = rspamd_lua_utils.remove_email_aliases(addr)
  384. if na then
  385. task:set_from(type, addr)
  386. task:insert_result('TAGGED_FROM', 1.0, fun.totable(
  387. fun.filter(function(t) return t and #t > 0 end, tags)))
  388. end
  389. end
  390. end
  391. check_from('smtp')
  392. check_from('mime')
  393. local function check_rcpt(type)
  394. if task:has_recipients(type) then
  395. local modified = false
  396. local all_tags = {}
  397. local addrs = task:get_recipients(type)
  398. for _, addr in ipairs(addrs) do
  399. local na,tags = rspamd_lua_utils.remove_email_aliases(addr)
  400. if na then
  401. modified = true
  402. fun.each(function(t) table.insert(all_tags, t) end,
  403. fun.filter(function(t) return t and #t > 0 end, tags))
  404. end
  405. end
  406. if modified then
  407. task:set_recipients(type, addrs)
  408. task:insert_result('TAGGED_RCPT', 1.0, all_tags)
  409. end
  410. end
  411. end
  412. check_rcpt('smtp')
  413. check_rcpt('mime')
  414. end,
  415. priority = 150,
  416. description = 'Removes plus aliases from the email',
  417. group = 'headers',
  418. }
  419. rspamd_config:register_symbol{
  420. type = 'virtual',
  421. parent = aliases_id,
  422. name = 'TAGGED_RCPT',
  423. description = 'SMTP recipients have plus tags',
  424. group = 'headers',
  425. score = 0,
  426. }
  427. rspamd_config:register_symbol{
  428. type = 'virtual',
  429. parent = aliases_id,
  430. name = 'TAGGED_FROM',
  431. description = 'SMTP from has plus tags',
  432. group = 'headers',
  433. score = 0,
  434. }
  435. local check_from_display_name = rspamd_config:register_symbol{
  436. type = 'callback',
  437. name = 'FROM_DISPLAY_CALLBACK',
  438. callback = function (task)
  439. local from = task:get_from(2)
  440. if not (from and from[1] and from[1].name) then return false end
  441. -- See if we can parse an email address from the name
  442. local parsed = util.parse_mail_address(from[1].name, task:get_mempool())
  443. if not parsed then return false end
  444. if not (parsed[1] and parsed[1]['addr']) then return false end
  445. -- Make sure we did not mistake e.g. <something>@<name> for an email address
  446. if not parsed[1]['domain'] or not parsed[1]['domain']:find('%.') then return false end
  447. -- See if the parsed domains differ
  448. if not util.strequal_caseless(from[1]['domain'], parsed[1]['domain']) then
  449. -- See if the destination domain is the same as the spoof
  450. local mto = task:get_recipients(2)
  451. local sto = task:get_recipients(1)
  452. if mto then
  453. for _, to in ipairs(mto) do
  454. if to['domain'] ~= '' and util.strequal_caseless(to['domain'], parsed[1]['domain']) then
  455. task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
  456. return false
  457. end
  458. end
  459. end
  460. if sto then
  461. for _, to in ipairs(sto) do
  462. if to['domain'] ~= '' and util.strequal_caseless(to['domain'], parsed[1]['domain']) then
  463. task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
  464. return false
  465. end
  466. end
  467. end
  468. task:insert_result('FROM_NEQ_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
  469. end
  470. return false
  471. end,
  472. }
  473. rspamd_config:register_symbol{
  474. type = 'virtual',
  475. parent = check_from_display_name,
  476. name = 'SPOOF_DISPLAY_NAME',
  477. description = 'Display name is being used to spoof and trick the recipient',
  478. group = 'headers',
  479. score = 8,
  480. }
  481. rspamd_config:register_symbol{
  482. type = 'virtual',
  483. parent = check_from_display_name,
  484. name = 'FROM_NEQ_DISPLAY_NAME',
  485. group = 'headers',
  486. description = 'Display name contains an email address different to the From address',
  487. score = 4,
  488. }
  489. rspamd_config.SPOOF_REPLYTO = {
  490. callback = function (task)
  491. -- First check for a Reply-To header
  492. local rt = task:get_header_full('Reply-To')
  493. if not rt or not rt[1] then return false end
  494. -- Get From and To headers
  495. rt = rt[1]['value']
  496. local from = task:get_from(2)
  497. local to = task:get_recipients(2)
  498. if not (from and from[1] and from[1].addr) then return false end
  499. if (to and to[1] and to[1].addr) then
  500. -- Handle common case for Web Contact forms of From = To
  501. if util.strequal_caseless(from[1].addr, to[1].addr) then
  502. return false
  503. end
  504. end
  505. -- SMTP recipients must contain From domain
  506. to = task:get_recipients(1)
  507. if not to then return false end
  508. -- Try mitigate some possible FPs on mailing list posts
  509. if #to == 1 and util.strequal_caseless(to[1].addr, from[1].addr) then return false end
  510. local found_fromdom = false
  511. for _, t in ipairs(to) do
  512. if util.strequal_caseless(t.domain, from[1].domain) then
  513. found_fromdom = true
  514. break
  515. end
  516. end
  517. if not found_fromdom then return false end
  518. -- Parse Reply-To header
  519. local parsed = ((util.parse_mail_address(rt, task:get_mempool()) or E)[1] or E).domain
  520. if not parsed then return false end
  521. -- Reply-To domain must be different to From domain
  522. if not util.strequal_caseless(parsed, from[1].domain) then
  523. return true, from[1].domain, parsed
  524. end
  525. return false
  526. end,
  527. group = 'headers',
  528. description = 'Reply-To is being used to spoof and trick the recipient to send an off-domain reply',
  529. score = 6.0
  530. }
  531. rspamd_config.INFO_TO_INFO_LU = {
  532. callback = function(task)
  533. local lu = task:get_header('List-Unsubscribe')
  534. if not lu then return false end
  535. local from = task:get_from('mime')
  536. if not (from and from[1] and util.strequal_caseless(from[1].user, 'info')) then
  537. return false
  538. end
  539. local to = task:get_recipients('smtp')
  540. if not to then return false end
  541. local found = false
  542. for _,r in ipairs(to) do
  543. if util.strequal_caseless(r['user'], 'info') then
  544. found = true
  545. end
  546. end
  547. if found then return true end
  548. return false
  549. end,
  550. description = 'info@ From/To address with List-Unsubscribe headers',
  551. group = 'headers',
  552. score = 2.0
  553. }
  554. -- Detects bad content-transfer-encoding for text parts
  555. rspamd_config.R_BAD_CTE_7BIT = {
  556. callback = function(task)
  557. local tp = task:get_text_parts() or {}
  558. for _,p in ipairs(tp) do
  559. local cte = p:get_mimepart():get_cte() or ''
  560. if cte ~= '8bit' and p:has_8bit_raw() then
  561. local _,_,attrs = p:get_mimepart():get_type_full()
  562. local mul = 1.0
  563. local params = {cte}
  564. if attrs then
  565. if attrs.charset and attrs.charset:lower() == "utf-8" then
  566. -- Penalise rule as people don't know that utf8 is surprisingly
  567. -- eight bit encoding
  568. mul = 0.3
  569. table.insert(params, "utf8")
  570. end
  571. end
  572. return true,mul,params
  573. end
  574. end
  575. return false
  576. end,
  577. score = 3.5,
  578. description = 'Detects bad content-transfer-encoding for text parts',
  579. group = 'headers'
  580. }