You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hfilter.lua 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. --[[
  2. Copyright (c) 2011-2016, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Copyright (c) 2013-2015, Alexey Savelyev <info@homeweb.ru>
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. ]]--
  14. -- Weight for checks_hellohost and checks_hello: 5 - very hard, 4 - hard, 3 - meduim, 2 - low, 1 - very low.
  15. -- From HFILTER_HELO_* and HFILTER_HOSTNAME_* symbols the maximum weight is selected in case of their actuating.
  16. --local dumper = require 'pl.pretty'.dump
  17. local rspamd_regexp = require "rspamd_regexp"
  18. local rspamd_logger = require "rspamd_logger"
  19. local rspamc_local_helo = "rspamc.local"
  20. local checks_hellohost = {
  21. ['[.-]gprs[.-]'] = 5, ['gprs[.-][0-9]'] = 5, ['[0-9][.-]?gprs'] = 5,
  22. ['[.-]cdma[.-]'] = 5, ['cdma[.-][0-9]'] = 5, ['[0-9][.-]?cdma'] = 5,
  23. ['[.-]homeuser[.-]'] = 5, ['homeuser[.-][0-9]'] = 5, ['[0-9][.-]?homeuser'] = 5,
  24. ['[.-]dhcp[.-]'] = 5, ['dhcp[.-][0-9]'] = 5, ['[0-9][.-]?dhcp'] = 5,
  25. ['[.-]catv[.-]'] = 5, ['catv[.-][0-9]'] = 5, ['[0-9][.-]?catv'] = 5,
  26. ['[.-]wifi[.-]'] = 5, ['wifi[.-][0-9]'] = 5, ['[0-9][.-]?wifi'] = 5,
  27. ['[.-]dial-?up[.-]'] = 5, ['dial-?up[.-][0-9]'] = 5, ['[0-9][.-]?dial-?up'] = 5,
  28. ['[.-]dynamic[.-]'] = 5, ['dynamic[.-][0-9]'] = 5, ['[0-9][.-]?dynamic'] = 5,
  29. ['[.-]dyn[.-]'] = 5, ['dyn[.-][0-9]'] = 5, ['[0-9][.-]?dyn'] = 5,
  30. ['[.-]clients?[.-]'] = 1, ['clients?[.-][0-9]{2,}'] = 5, ['[0-9]{3,}[.-]?clients?'] = 5,
  31. ['[.-]dynip[.-]'] = 5, ['dynip[.-][0-9]'] = 5, ['[0-9][.-]?dynip'] = 5,
  32. ['[.-]broadband[.-]'] = 5, ['broadband[.-][0-9]'] = 5, ['[0-9][.-]?broadband'] = 5,
  33. ['[.-]broad[.-]'] = 5, ['broad[.-][0-9]'] = 5, ['[0-9][.-]?broad'] = 5,
  34. ['[.-]bredband[.-]'] = 5, ['bredband[.-][0-9]'] = 5, ['[0-9][.-]?bredband'] = 5,
  35. ['[.-]nat[.-]'] = 5, ['nat[.-][0-9]'] = 5, ['[0-9][.-]?nat'] = 5,
  36. ['[.-]pptp[.-]'] = 5, ['pptp[.-][0-9]'] = 5, ['[0-9][.-]?pptp'] = 5,
  37. ['[.-]pppoe[.-]'] = 5, ['pppoe[.-][0-9]'] = 5, ['[0-9][.-]?pppoe'] = 5,
  38. ['[.-]ppp[.-]'] = 5, ['ppp[.-][0-9]'] = 5, ['[0-9][.-]?ppp'] = 5,
  39. ['[.-]modem[.-]'] = 5, ['modem[.-][0-9]'] = 5, ['[0-9][.-]?modem'] = 5,
  40. ['[.-]cablemodem[.-]'] = 5, ['cablemodem[.-][0-9]'] = 5, ['[0-9][.-]?cablemodem'] = 5,
  41. ['[.-]comcast[.-]'] = 5, ['comcast[.-][0-9]'] = 5, ['[0-9][.-]?comcast'] = 5,
  42. ['[.-][a|x]?dsl-dynamic[.-]'] = 5, ['[a|x]?dsl-dynamic[.-]?[0-9]'] = 5, ['[0-9][.-]?[a|x]?dsl-dynamic'] = 5,
  43. ['[.-][a|x]?dsl[.-]'] = 4, ['[a|x]?dsl[.-]?[0-9]'] = 4, ['[0-9][.-]?[a|x]?dsl'] = 4,
  44. ['[.-][a|x]?dsl-line[.-]'] = 4, ['[a|x]?dsl-line[.-]?[0-9]'] = 4, ['[0-9][.-]?[a|x]?dsl-line'] = 4,
  45. ['[.-]in-?addr[.-]'] = 4, ['in-?addr[.-][0-9]'] = 4, ['[0-9][.-]?in-?addr'] = 4,
  46. ['[.-]pool[.-]'] = 4, ['pool[.-][0-9]'] = 4, ['[0-9][.-]?pool'] = 4,
  47. ['[.-]fibertel[.-]'] = 4, ['fibertel[.-][0-9]'] = 4, ['[0-9][.-]?fibertel'] = 4,
  48. ['[.-]fbx[.-]'] = 4, ['fbx[.-][0-9]'] = 4, ['[0-9][.-]?fbx'] = 4,
  49. ['[.-]unused-addr[.-]'] = 3, ['unused-addr[.-][0-9]'] = 3, ['[0-9][.-]?unused-addr'] = 3,
  50. ['[.-]cable[.-]'] = 3, ['cable[.-][0-9]'] = 3, ['[0-9][.-]?cable'] = 3,
  51. ['[.-]kabel[.-]'] = 3, ['kabel[.-][0-9]'] = 3, ['[0-9][.-]?kabel'] = 3,
  52. ['[.-]host[.-]'] = 2, ['host[.-][0-9]'] = 2, ['[0-9][.-]?host'] = 2,
  53. ['[.-]customers?[.-]'] = 1, ['customers?[.-][0-9]'] = 1, ['[0-9][.-]?customers?'] = 1,
  54. ['[.-]user[.-]'] = 1, ['user[.-][0-9]'] = 1, ['[0-9][.-]?user'] = 1,
  55. ['[.-]peer[.-]'] = 1, ['peer[.-][0-9]'] = 1, ['[0-9][.-]?peer'] = 1
  56. }
  57. local checks_hello = {
  58. ['^[^\\.]+$'] = 5, -- for helo=COMPUTER, ANNA, etc... Without dot in helo
  59. ['^(dsl)?(device|speedtouch)\\.lan$'] = 5,
  60. ['\\.(lan|local|home|localdomain|intra|in-addr.arpa|priv|online|user|veloxzon)$'] = 5
  61. }
  62. local checks_hello_badip = {
  63. ['^0\\.'] = 5,
  64. ['^::1$'] = 5, --loopback ipv4, ipv6
  65. ['^127\\.'] = 5,
  66. ['^10\\.'] = 5,
  67. ['^192\\.168\\.'] = 5, --local ipv4
  68. ['^172\\.1[6-9]\\.'] = 5,
  69. ['^172\\.2[0-9]\\.'] = 5,
  70. ['^172\\.3[01]\\.'] = 5, --local ipv4
  71. ['^169\\.254\\.'] = 5, --chanel ipv4
  72. ['^192\\.0\\.0\\.'] = 5, --IETF Protocol
  73. ['^192\\.88\\.99\\.'] = 5, --RFC3068
  74. ['^100.6[4-9]\\.'] = 5,
  75. ['^100.[7-9]\\d\\.'] = 5,
  76. ['^100.1[01]\\d\\.'] = 5,
  77. ['^100.12[0-7]\\d\\.'] = 5, --RFC6598
  78. ['^\\d\\.\\d\\.\\d\\.255$'] = 5, --multicast ipv4
  79. ['^192\\.0\\.2\\.'] = 5,
  80. ['^198\\.51\\.100\\.'] = 5,
  81. ['^203\\.0\\.113\\.'] = 5, --sample
  82. ['^fe[89ab][0-9a-f]::'] = 5,
  83. ['^fe[cdf][0-9a-f]:'] = 5, --local ipv6 (fe80:: - febf::, fec0:: - feff::)
  84. ['^2001:db8::'] = 5, --reserved RFC 3849 for ipv6
  85. ['^fc00::'] = 5,
  86. ['^ffxx::'] = 5 --unicast, multicast ipv6
  87. }
  88. local checks_hello_bareip = {
  89. '^\\d+[x.-]\\d+[x.-]\\d+[x.-]\\d+$', --bareip ipv4,
  90. '^[0-9a-f]+:' --bareip ipv6
  91. }
  92. -- Table of compiled regexps indexed by pattern
  93. local compiled_regexp = {
  94. }
  95. local config = {
  96. ['helo_enabled'] = false,
  97. ['hostname_enabled'] = false,
  98. ['from_enabled'] = false,
  99. ['rcpt_enabled'] = false,
  100. ['mid_enabled'] = false,
  101. ['url_enabled'] = false
  102. }
  103. local function check_regexp(str, regexp_text)
  104. if not compiled_regexp[regexp_text] then
  105. compiled_regexp[regexp_text] = rspamd_regexp.create(regexp_text, 'i')
  106. end
  107. if compiled_regexp[regexp_text] then
  108. return compiled_regexp[regexp_text]:match(str)
  109. end
  110. return false
  111. end
  112. local function check_fqdn(domain)
  113. if check_regexp(domain, '(?=^.{4,253}$)(^((?!-)[a-zA-Z0-9-]{1,63}(?<!-)\\.)+[a-zA-Z0-9-]{2,63}\\.?$)') then
  114. return true
  115. end
  116. return false
  117. end
  118. -- host: host for check
  119. -- symbol_suffix: suffix for symbol
  120. -- eq_ip: ip for comparing or empty string
  121. -- eq_host: host for comparing or empty string
  122. local function check_host(task, host, symbol_suffix, eq_ip, eq_host)
  123. local failed_address = 0
  124. local resolved_address = {}
  125. local function check_host_cb_mx(resolver, to_resolve, results, err)
  126. task:inc_dns_req()
  127. if not results then
  128. task:insert_result('HFILTER_' .. symbol_suffix .. '_NORES_A_OR_MX', 1.0)
  129. else
  130. for _,mx in pairs(results) do
  131. if mx['name'] then
  132. local failed_mx_address = 0
  133. -- Capture failed_mx_address
  134. local function check_host_cb_mx_a(resolver, to_resolve, results, err)
  135. task:inc_dns_req()
  136. if not results then
  137. failed_mx_address = failed_mx_address + 1
  138. end
  139. if failed_mx_address >= 2 then
  140. task:insert_result('HFILTER_' .. symbol_suffix .. '_NORESOLVE_MX', 1.0)
  141. end
  142. end
  143. task:get_resolver():resolve('a', {
  144. task=task,
  145. name = mx['name'],
  146. callback = check_host_cb_mx_a
  147. })
  148. task:get_resolver():resolve('aaaa', {
  149. task = task,
  150. name = mx['name'],
  151. callback = check_host_cb_mx_a
  152. })
  153. end
  154. end
  155. end
  156. end
  157. local function check_host_cb_a(resolver, to_resolve, results, err)
  158. task:inc_dns_req()
  159. if not results then
  160. failed_address = failed_address + 1
  161. else
  162. for _,result in pairs(results) do
  163. table.insert(resolved_address, result:to_string())
  164. end
  165. end
  166. if failed_address >= 2 then
  167. -- No A or AAAA records
  168. if eq_ip and eq_ip ~= '' then
  169. for _,result in pairs(resolved_address) do
  170. if result == eq_ip then
  171. return true
  172. end
  173. end
  174. task:insert_result('HFILTER_' .. symbol_suffix .. '_IP_A', 1.0)
  175. end
  176. task:get_resolver():resolve_mx({
  177. task = task,
  178. name = host,
  179. callback = check_host_cb_mx
  180. })
  181. end
  182. end
  183. if host then
  184. host = string.lower(host)
  185. else
  186. return false
  187. end
  188. if eq_host then
  189. eq_host = string.lower(eq_host)
  190. else
  191. eq_host = ''
  192. end
  193. if check_fqdn(host) then
  194. if eq_host == '' or eq_host ~= 'unknown' or eq_host ~= host then
  195. task:get_resolver():resolve('a', {
  196. task=task,
  197. name = host,
  198. callback = check_host_cb_a
  199. })
  200. -- Check ipv6 as well
  201. task:get_resolver():resolve('aaaa', {
  202. task = task,
  203. name = host,
  204. callback = check_host_cb_a
  205. })
  206. end
  207. else
  208. task:insert_result('HFILTER_' .. symbol_suffix .. '_NOT_FQDN', 1.0)
  209. end
  210. return true
  211. end
  212. --
  213. local function hfilter(task)
  214. -- Links checks
  215. if config['url_enabled'] then
  216. local parts = task:get_text_parts()
  217. if parts then
  218. local plain_text_part = nil
  219. local html_text_part = nil
  220. for _,p in ipairs(parts) do
  221. if p:is_html() then
  222. html_text_part = p
  223. else
  224. plain_text_part = p
  225. end
  226. end
  227. if html_text_part then
  228. local hc = html_text_part:get_html()
  229. if hc then
  230. local url_len = 0
  231. hc:foreach_tag('a', function(tag, len)
  232. url_len = url_len + len
  233. return false
  234. end)
  235. local plen = html_text_part:get_length()
  236. if url_len > 0 and plen > 0 then
  237. local rel = url_len / plen
  238. if rel > 0.8 then
  239. local sc = (rel - 0.8) * 5.0
  240. if sc > 1.0 then sc = 1.0 end
  241. task:insert_result('HFILTER_URL_ONLY', sc)
  242. local lines = html_text_part:get_lines_count()
  243. if lines > 0 and lines < 2 then
  244. task:insert_result('HFILTER_URL_ONELINE', 1.00)
  245. end
  246. end
  247. end
  248. elseif plain_text_part then
  249. local url_len = plain_text_part:get_urls_length()
  250. local plen = plain_text_part:get_length()
  251. if plen > 0 and url_len > 0 then
  252. local rel = url_len / plen
  253. if rel > 0.8 then
  254. task:insert_result('HFILTER_URL_ONLY', (rel - 0.8) * 5.0)
  255. local lines = plain_text_part:get_lines_count()
  256. if lines > 0 and lines < 2 then
  257. task:insert_result('HFILTER_URL_ONELINE', 1.00)
  258. end
  259. end
  260. end
  261. end
  262. end
  263. end
  264. end
  265. --No more checks for auth user
  266. if task:get_user() ~= nil then
  267. return false
  268. end
  269. --local message = task:get_message()
  270. local ip = false
  271. local rip = task:get_from_ip()
  272. if rip and rip:is_valid() then
  273. ip = rip:to_string()
  274. end
  275. -- Check's HELO
  276. local weight_helo = 0
  277. if config['helo_enabled'] then
  278. local helo = task:get_helo()
  279. if helo then
  280. if helo ~= rspamc_local_helo then
  281. helo = string.gsub(helo, '[%[%]]', '')
  282. -- Regexp check HELO (checks_hello_badip)
  283. local find_badip = false
  284. for regexp,weight in pairs(checks_hello_badip) do
  285. if check_regexp(helo, regexp) then
  286. task:insert_result('HFILTER_HELO_BADIP', 1.0)
  287. find_badip = true
  288. break
  289. end
  290. end
  291. -- Regexp check HELO (checks_hello_bareip)
  292. local find_bareip = false
  293. if not find_badip then
  294. for _,regexp in pairs(checks_hello_bareip) do
  295. if check_regexp(helo, regexp) then
  296. task:insert_result('HFILTER_HELO_BAREIP', 1.0)
  297. find_bareip = true
  298. break
  299. end
  300. end
  301. end
  302. if not find_badip and not find_bareip then
  303. -- Regexp check HELO (checks_hello)
  304. for regexp,weight in pairs(checks_hello) do
  305. if check_regexp(helo, regexp) then
  306. weight_helo = weight
  307. break
  308. end
  309. end
  310. -- Regexp check HELO (checks_hellohost)
  311. for regexp,weight in pairs(checks_hellohost) do
  312. if check_regexp(helo, regexp) then
  313. if weight > weight_helo then
  314. weight_helo = weight
  315. end
  316. break
  317. end
  318. end
  319. --FQDN check HELO
  320. if ip and helo and weight_helo == 0 then
  321. check_host(task, helo, 'HELO', ip)
  322. end
  323. end
  324. end
  325. end
  326. end
  327. -- Check's HOSTNAME
  328. local weight_hostname = 0
  329. if config['hostname_enabled'] then
  330. local hostname = task:get_hostname()
  331. if hostname then
  332. -- Check regexp HOSTNAME
  333. if hostname == 'unknown' then
  334. task:insert_result('HFILTER_HOSTNAME_UNKNOWN', 1.00)
  335. else
  336. for regexp,weight in pairs(checks_hellohost) do
  337. if check_regexp(hostname, regexp) then
  338. weight_hostname = weight
  339. break
  340. end
  341. end
  342. end
  343. end
  344. end
  345. --Insert weight's for HELO or HOSTNAME
  346. if weight_helo > 0 and weight_helo >= weight_hostname then
  347. task:insert_result('HFILTER_HELO_' .. weight_helo, 1.0)
  348. elseif weight_hostname > 0 and weight_hostname > weight_helo then
  349. task:insert_result('HFILTER_HOSTNAME_' .. weight_hostname, 1.0)
  350. end
  351. -- MAILFROM checks --
  352. local frombounce = false
  353. if config['from_enabled'] then
  354. local from = task:get_from(1)
  355. if from then
  356. --FROM host check
  357. for _,fr in ipairs(from) do
  358. local fr_split = rspamd_str_split(fr['addr'], '@')
  359. if #fr_split == 2 then
  360. check_host(task, fr_split[2], 'FROMHOST', '', '')
  361. if fr_split[1] == 'postmaster' then
  362. frombounce = true
  363. end
  364. end
  365. end
  366. else
  367. if helo and helo ~= rspamc_local_helo then
  368. task:insert_result('HFILTER_FROM_BOUNCE', 1.00)
  369. frombounce = true
  370. end
  371. end
  372. end
  373. -- Recipients checks --
  374. if config['rcpt_enabled'] then
  375. local rcpt = task:get_recipients()
  376. if rcpt then
  377. local count_rcpt = #rcpt
  378. if frombounce then
  379. if count_rcpt > 1 then
  380. task:insert_result('HFILTER_RCPT_BOUNCEMOREONE', 1.00)
  381. end
  382. end
  383. end
  384. end
  385. --Message ID host check
  386. if config['mid_enabled'] then
  387. local message_id = task:get_message_id()
  388. if message_id then
  389. local mid_split = rspamd_str_split(message_id, '@')
  390. if #mid_split == 2 and not string.find(mid_split[2], 'local') then
  391. check_host(task, mid_split[2], 'MID')
  392. end
  393. end
  394. end
  395. return false
  396. end
  397. local symbols_enabled = {}
  398. local symbols_helo = {
  399. "HFILTER_HELO_BAREIP",
  400. "HFILTER_HELO_BADIP",
  401. "HFILTER_HELO_1",
  402. "HFILTER_HELO_2",
  403. "HFILTER_HELO_3",
  404. "HFILTER_HELO_4",
  405. "HFILTER_HELO_5",
  406. "HFILTER_HELO_NORESOLVE_MX",
  407. "HFILTER_HELO_NORES_A_OR_MX",
  408. "HFILTER_HELO_IP_A",
  409. "HFILTER_HELO_NOT_FQDN"
  410. }
  411. local symbols_hostname = {
  412. "HFILTER_HOSTNAME_1",
  413. "HFILTER_HOSTNAME_2",
  414. "HFILTER_HOSTNAME_3",
  415. "HFILTER_HOSTNAME_4",
  416. "HFILTER_HOSTNAME_5",
  417. "HFILTER_HOSTNAME_UNKNOWN"
  418. }
  419. local symbols_rcpt = {
  420. "HFILTER_RCPT_BOUNCEMOREONE"
  421. }
  422. local symbols_mid = {
  423. "HFILTER_MID_NORESOLVE_MX",
  424. "HFILTER_MID_NORES_A_OR_MX",
  425. "HFILTER_MID_NOT_FQDN"
  426. }
  427. local symbols_url = {
  428. "HFILTER_URL_ONLY",
  429. "HFILTER_URL_ONELINE"
  430. }
  431. local symbols_from = {
  432. "HFILTER_FROMHOST_NORESOLVE_MX",
  433. "HFILTER_FROMHOST_NORES_A_OR_MX",
  434. "HFILTER_FROMHOST_NOT_FQDN",
  435. "HFILTER_FROM_BOUNCE"
  436. }
  437. local opts = rspamd_config:get_all_opt('hfilter')
  438. if opts then
  439. for k,v in pairs(opts) do
  440. config[k] = v
  441. end
  442. end
  443. local function append_t(t, a)
  444. for _,v in ipairs(a) do table.insert(t, v) end
  445. end
  446. if config['helo_enabled'] then
  447. append_t(symbols_enabled, symbols_helo)
  448. end
  449. if config['hostname_enabled'] then
  450. append_t(symbols_enabled, symbols_hostname)
  451. end
  452. if config['from_enabled'] then
  453. append_t(symbols_enabled, symbols_from)
  454. end
  455. if config['rcpt_enabled'] then
  456. append_t(symbols_enabled, symbols_rcpt)
  457. end
  458. if config['mid_enabled'] then
  459. append_t(symbols_enabled, symbols_mid)
  460. end
  461. if config['url_enabled'] then
  462. append_t(symbols_enabled, symbols_url)
  463. end
  464. --dumper(symbols_enabled)
  465. if #symbols_enabled > 0 then
  466. rspamd_config:register_symbols(hfilter, 1.0, "HFILTER", symbols_enabled);
  467. end