You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.lua 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Copyright (c) 2019, Carsten Rosenberg <c.rosenberg@heinlein-support.de>
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. ]]--
  14. --[[[
  15. -- @module lua_scanners_common
  16. -- This module contains common external scanners functions
  17. --]]
  18. local rspamd_logger = require "rspamd_logger"
  19. local rspamd_regexp = require "rspamd_regexp"
  20. local lua_util = require "lua_util"
  21. local lua_redis = require "lua_redis"
  22. local lua_magic_types = require "lua_magic/types"
  23. local fun = require "fun"
  24. local exports = {}
  25. local function log_clean(task, rule, msg)
  26. msg = msg or 'message or mime_part is clean'
  27. if rule.log_clean then
  28. rspamd_logger.infox(task, '%s: %s', rule.log_prefix, msg)
  29. else
  30. lua_util.debugm(rule.name, task, '%s: %s', rule.log_prefix, msg)
  31. end
  32. end
  33. local function match_patterns(default_sym, found, patterns, dyn_weight)
  34. if type(patterns) ~= 'table' then
  35. return default_sym, dyn_weight
  36. end
  37. if not patterns[1] then
  38. for sym, pat in pairs(patterns) do
  39. if pat:match(found) then
  40. return sym, '1'
  41. end
  42. end
  43. return default_sym, dyn_weight
  44. else
  45. for _, p in ipairs(patterns) do
  46. for sym, pat in pairs(p) do
  47. if pat:match(found) then
  48. return sym, '1'
  49. end
  50. end
  51. end
  52. return default_sym, dyn_weight
  53. end
  54. end
  55. local function yield_result(task, rule, vname, dyn_weight, is_fail, maybe_part)
  56. local all_whitelisted = true
  57. local patterns
  58. local symbol
  59. local threat_table
  60. local threat_info
  61. local flags
  62. if type(vname) == 'string' then
  63. threat_table = { vname }
  64. elseif type(vname) == 'table' then
  65. threat_table = vname
  66. end
  67. -- This should be more generic
  68. if not is_fail then
  69. patterns = rule.patterns
  70. symbol = rule.symbol
  71. threat_info = rule.detection_category .. 'found'
  72. if not dyn_weight then
  73. dyn_weight = 1.0
  74. end
  75. elseif is_fail == 'fail' then
  76. patterns = rule.patterns_fail
  77. symbol = rule.symbol_fail
  78. threat_info = "FAILED with error"
  79. dyn_weight = 0.0
  80. elseif is_fail == 'encrypted' then
  81. patterns = rule.patterns
  82. symbol = rule.symbol_encrypted
  83. threat_info = "Scan has returned that input was encrypted"
  84. dyn_weight = 1.0
  85. elseif is_fail == 'macro' then
  86. patterns = rule.patterns
  87. symbol = rule.symbol_macro
  88. threat_info = "Scan has returned that input contains macros"
  89. dyn_weight = 1.0
  90. end
  91. for _, tm in ipairs(threat_table) do
  92. local symname, symscore = match_patterns(symbol, tm, patterns, dyn_weight)
  93. if rule.whitelist and rule.whitelist:get_key(tm) then
  94. rspamd_logger.infox(task, '%s: "%s" is in whitelist', rule.log_prefix, tm)
  95. else
  96. all_whitelisted = false
  97. rspamd_logger.infox(task, '%s: result - %s: "%s - score: %s"',
  98. rule.log_prefix, threat_info, tm, symscore)
  99. if maybe_part and rule.show_attachments and maybe_part:get_filename() then
  100. local fname = maybe_part:get_filename()
  101. task:insert_result(symname, symscore, string.format("%s|%s",
  102. tm, fname))
  103. else
  104. task:insert_result(symname, symscore, tm)
  105. end
  106. end
  107. end
  108. if rule.action and is_fail ~= 'fail' and not all_whitelisted then
  109. threat_table = table.concat(threat_table, '; ')
  110. if rule.action ~= 'reject' then
  111. flags = 'least'
  112. end
  113. task:set_pre_result(rule.action,
  114. lua_util.template(rule.message or 'Rejected', {
  115. SCANNER = rule.name,
  116. VIRUS = threat_table,
  117. }), rule.name, nil, nil, flags)
  118. end
  119. end
  120. local function message_not_too_large(task, content, rule)
  121. local max_size = tonumber(rule.max_size)
  122. if not max_size then
  123. return true
  124. end
  125. if #content > max_size then
  126. rspamd_logger.infox(task, "skip %s check as it is too large: %s (%s is allowed)",
  127. rule.log_prefix, #content, max_size)
  128. return false
  129. end
  130. return true
  131. end
  132. local function message_not_too_small(task, content, rule)
  133. local min_size = tonumber(rule.min_size)
  134. if not min_size then
  135. return true
  136. end
  137. if #content < min_size then
  138. rspamd_logger.infox(task, "skip %s check as it is too small: %s (%s is allowed)",
  139. rule.log_prefix, #content, min_size)
  140. return false
  141. end
  142. return true
  143. end
  144. local function message_min_words(task, rule)
  145. if rule.text_part_min_words and tonumber(rule.text_part_min_words) > 0 then
  146. local text_part_above_limit = false
  147. local text_parts = task:get_text_parts()
  148. local filter_func = function(p)
  149. return p:get_words_count() >= tonumber(rule.text_part_min_words)
  150. end
  151. fun.each(function(p)
  152. text_part_above_limit = true
  153. end, fun.filter(filter_func, text_parts))
  154. if not text_part_above_limit then
  155. rspamd_logger.infox(task, '%s: #words in all text parts is below text_part_min_words limit: %s',
  156. rule.log_prefix, rule.text_part_min_words)
  157. end
  158. return text_part_above_limit
  159. else
  160. return true
  161. end
  162. end
  163. local function dynamic_scan(task, rule)
  164. if rule.dynamic_scan then
  165. if rule.action ~= 'reject' then
  166. local metric_result = task:get_metric_score()
  167. local metric_action = task:get_metric_action()
  168. local has_pre_result = task:has_pre_result()
  169. -- ToDo: needed?
  170. -- Sometimes leads to FPs
  171. --if rule.symbol_type == 'postfilter' and metric_action == 'reject' then
  172. -- rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, "result is already reject")
  173. -- return false
  174. --elseif metric_result[1] > metric_result[2]*2 then
  175. if metric_result[1] > metric_result[2] * 2 then
  176. rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, 'score > 2 * reject_level: ' .. metric_result[1])
  177. return false
  178. elseif has_pre_result and metric_action == 'reject' then
  179. rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, 'pre_result reject is set')
  180. return false
  181. else
  182. return true, 'undecided'
  183. end
  184. else
  185. return true, 'dynamic_scan is not possible with config `action=reject;`'
  186. end
  187. else
  188. return true
  189. end
  190. end
  191. local function need_check(task, content, rule, digest, fn, maybe_part)
  192. local uncached = true
  193. local key = digest
  194. local function redis_av_cb(err, data)
  195. if data and type(data) == 'string' then
  196. -- Cached
  197. data = lua_util.str_split(data, '\t')
  198. local threat_string = lua_util.str_split(data[1], '\v')
  199. local score = data[2] or rule.default_score
  200. if threat_string[1] ~= 'OK' then
  201. if threat_string[1] == 'MACRO' then
  202. yield_result(task, rule, 'File contains macros',
  203. 0.0, 'macro', maybe_part)
  204. elseif threat_string[1] == 'ENCRYPTED' then
  205. yield_result(task, rule, 'File is encrypted',
  206. 0.0, 'encrypted', maybe_part)
  207. else
  208. lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - score: %s',
  209. rule.log_prefix, key, threat_string[1], score)
  210. yield_result(task, rule, threat_string, score, false, maybe_part)
  211. end
  212. else
  213. lua_util.debugm(rule.name, task, '%s: got cached negative result for %s: %s',
  214. rule.log_prefix, key, threat_string[1])
  215. end
  216. uncached = false
  217. else
  218. if err then
  219. rspamd_logger.errx(task, 'got error checking cache: %s', err)
  220. end
  221. end
  222. local f_message_not_too_large = message_not_too_large(task, content, rule)
  223. local f_message_not_too_small = message_not_too_small(task, content, rule)
  224. local f_message_min_words = message_min_words(task, rule)
  225. local f_dynamic_scan = dynamic_scan(task, rule)
  226. if uncached and
  227. f_message_not_too_large and
  228. f_message_not_too_small and
  229. f_message_min_words and
  230. f_dynamic_scan then
  231. fn()
  232. end
  233. end
  234. if rule.redis_params and not rule.no_cache then
  235. key = rule.prefix .. key
  236. if lua_redis.redis_make_request(task,
  237. rule.redis_params, -- connect params
  238. key, -- hash key
  239. false, -- is write
  240. redis_av_cb, --callback
  241. 'GET', -- command
  242. { key } -- arguments)
  243. ) then
  244. return true
  245. end
  246. end
  247. return false
  248. end
  249. local function save_cache(task, digest, rule, to_save, dyn_weight, maybe_part)
  250. local key = digest
  251. if not dyn_weight then
  252. dyn_weight = 1.0
  253. end
  254. local function redis_set_cb(err)
  255. -- Do nothing
  256. if err then
  257. rspamd_logger.errx(task, 'failed to save %s cache for %s -> "%s": %s',
  258. rule.detection_category, to_save, key, err)
  259. else
  260. lua_util.debugm(rule.name, task, '%s: saved cached result for %s: %s - score %s - ttl %s',
  261. rule.log_prefix, key, to_save, dyn_weight, rule.cache_expire)
  262. end
  263. end
  264. if type(to_save) == 'table' then
  265. to_save = table.concat(to_save, '\v')
  266. end
  267. local value_tbl = { to_save, dyn_weight }
  268. if maybe_part and rule.show_attachments and maybe_part:get_filename() then
  269. local fname = maybe_part:get_filename()
  270. table.insert(value_tbl, fname)
  271. end
  272. local value = table.concat(value_tbl, '\t')
  273. if rule.redis_params and rule.prefix then
  274. key = rule.prefix .. key
  275. lua_redis.redis_make_request(task,
  276. rule.redis_params, -- connect params
  277. key, -- hash key
  278. true, -- is write
  279. redis_set_cb, --callback
  280. 'SETEX', -- command
  281. { key, rule.cache_expire or 0, value }
  282. )
  283. end
  284. return false
  285. end
  286. local function create_regex_table(patterns)
  287. local regex_table = {}
  288. if patterns[1] then
  289. for i, p in ipairs(patterns) do
  290. if type(p) == 'table' then
  291. local new_set = {}
  292. for k, v in pairs(p) do
  293. new_set[k] = rspamd_regexp.create_cached(v)
  294. end
  295. regex_table[i] = new_set
  296. else
  297. regex_table[i] = {}
  298. end
  299. end
  300. else
  301. for k, v in pairs(patterns) do
  302. regex_table[k] = rspamd_regexp.create_cached(v)
  303. end
  304. end
  305. return regex_table
  306. end
  307. local function match_filter(task, rule, found, patterns, pat_type)
  308. if type(patterns) ~= 'table' or not found then
  309. return false
  310. end
  311. if not patterns[1] then
  312. for _, pat in pairs(patterns) do
  313. if pat_type == 'ext' and tostring(pat) == tostring(found) then
  314. return true
  315. elseif pat_type == 'regex' and pat:match(found) then
  316. return true
  317. end
  318. end
  319. return false
  320. else
  321. for _, p in ipairs(patterns) do
  322. for _, pat in ipairs(p) do
  323. if pat_type == 'ext' and tostring(pat) == tostring(found) then
  324. return true
  325. elseif pat_type == 'regex' and pat:match(found) then
  326. return true
  327. end
  328. end
  329. end
  330. return false
  331. end
  332. end
  333. -- borrowed from mime_types.lua
  334. -- ext is the last extension, LOWERCASED
  335. -- ext2 is the one before last extension LOWERCASED
  336. local function gen_extension(fname)
  337. local filename_parts = lua_util.str_split(fname, '.')
  338. local ext = {}
  339. for n = 1, 2 do
  340. ext[n] = #filename_parts > n and string.lower(filename_parts[#filename_parts + 1 - n]) or nil
  341. end
  342. return ext[1], ext[2], filename_parts
  343. end
  344. local function check_parts_match(task, rule)
  345. local filter_func = function(p)
  346. local mtype, msubtype = p:get_type()
  347. local detected_ext = p:get_detected_ext()
  348. local fname = p:get_filename()
  349. local ext, ext2
  350. if rule.scan_all_mime_parts == false then
  351. -- check file extension and filename regex matching
  352. --lua_util.debugm(rule.name, task, '%s: filename: |%s|%s|', rule.log_prefix, fname)
  353. if fname ~= nil then
  354. ext, ext2 = gen_extension(fname)
  355. --lua_util.debugm(rule.name, task, '%s: extension, fname: |%s|%s|%s|', rule.log_prefix, ext, ext2, fname)
  356. if match_filter(task, rule, ext, rule.mime_parts_filter_ext, 'ext')
  357. or match_filter(task, rule, ext2, rule.mime_parts_filter_ext, 'ext') then
  358. lua_util.debugm(rule.name, task, '%s: extension matched: |%s|%s|', rule.log_prefix, ext, ext2)
  359. return true
  360. elseif match_filter(task, rule, fname, rule.mime_parts_filter_regex, 'regex') then
  361. lua_util.debugm(rule.name, task, '%s: filename regex matched', rule.log_prefix)
  362. return true
  363. end
  364. end
  365. -- check content type string regex matching
  366. if mtype ~= nil and msubtype ~= nil then
  367. local ct = string.format('%s/%s', mtype, msubtype):lower()
  368. if match_filter(task, rule, ct, rule.mime_parts_filter_regex, 'regex') then
  369. lua_util.debugm(rule.name, task, '%s: regex content-type: %s', rule.log_prefix, ct)
  370. return true
  371. end
  372. end
  373. -- check detected content type (libmagic) regex matching
  374. if detected_ext then
  375. local magic = lua_magic_types[detected_ext] or {}
  376. if match_filter(task, rule, detected_ext, rule.mime_parts_filter_ext, 'ext') then
  377. lua_util.debugm(rule.name, task, '%s: detected extension matched: |%s|', rule.log_prefix, detected_ext)
  378. return true
  379. elseif magic.ct and match_filter(task, rule, magic.ct, rule.mime_parts_filter_regex, 'regex') then
  380. lua_util.debugm(rule.name, task, '%s: regex detected libmagic content-type: %s',
  381. rule.log_prefix, magic.ct)
  382. return true
  383. end
  384. end
  385. -- check filenames in archives
  386. if p:is_archive() then
  387. local arch = p:get_archive()
  388. local filelist = arch:get_files_full(1000)
  389. for _, f in ipairs(filelist) do
  390. ext, ext2 = gen_extension(f.name)
  391. if match_filter(task, rule, ext, rule.mime_parts_filter_ext, 'ext')
  392. or match_filter(task, rule, ext2, rule.mime_parts_filter_ext, 'ext') then
  393. lua_util.debugm(rule.name, task, '%s: extension matched in archive: |%s|%s|', rule.log_prefix, ext, ext2)
  394. --lua_util.debugm(rule.name, task, '%s: extension matched in archive: %s', rule.log_prefix, ext)
  395. return true
  396. elseif match_filter(task, rule, f.name, rule.mime_parts_filter_regex, 'regex') then
  397. lua_util.debugm(rule.name, task, '%s: filename regex matched in archive', rule.log_prefix)
  398. return true
  399. end
  400. end
  401. end
  402. end
  403. -- check text_part has more words than text_part_min_words_check
  404. if rule.scan_text_mime and rule.text_part_min_words and p:is_text() and
  405. p:get_words_count() >= tonumber(rule.text_part_min_words) then
  406. return true
  407. end
  408. if rule.scan_image_mime and p:is_image() then
  409. return true
  410. end
  411. if rule.scan_all_mime_parts ~= false then
  412. local is_part_checkable = (p:is_attachment() and (not p:is_image() or rule.scan_image_mime))
  413. if detected_ext then
  414. -- We know what to scan!
  415. local magic = lua_magic_types[detected_ext] or {}
  416. if magic.av_check ~= false or is_part_checkable then
  417. return true
  418. end
  419. elseif is_part_checkable then
  420. -- Just rely on attachment property
  421. return true
  422. end
  423. end
  424. return false
  425. end
  426. return fun.filter(filter_func, task:get_parts())
  427. end
  428. local function check_metric_results(task, rule)
  429. if rule.action ~= 'reject' then
  430. local metric_result = task:get_metric_score()
  431. local metric_action = task:get_metric_action()
  432. local has_pre_result = task:has_pre_result()
  433. if rule.symbol_type == 'postfilter' and metric_action == 'reject' then
  434. return true, 'result is already reject'
  435. elseif metric_result[1] > metric_result[2] * 2 then
  436. return true, 'score > 2 * reject_level: ' .. metric_result[1]
  437. elseif has_pre_result and metric_action == 'reject' then
  438. return true, 'pre_result reject is set'
  439. else
  440. return false, 'undecided'
  441. end
  442. else
  443. return false, 'dynamic_scan is not possible with config `action=reject;`'
  444. end
  445. end
  446. exports.log_clean = log_clean
  447. exports.yield_result = yield_result
  448. exports.match_patterns = match_patterns
  449. exports.condition_check_and_continue = need_check
  450. exports.save_cache = save_cache
  451. exports.create_regex_table = create_regex_table
  452. exports.check_parts_match = check_parts_match
  453. exports.check_metric_results = check_metric_results
  454. setmetatable(exports, {
  455. __call = function(t, override)
  456. for k, v in pairs(t) do
  457. if _G[k] ~= nil then
  458. local msg = 'function ' .. k .. ' already exists in global scope.'
  459. if override then
  460. _G[k] = v
  461. print('WARNING: ' .. msg .. ' Overwritten.')
  462. else
  463. print('NOTICE: ' .. msg .. ' Skipped.')
  464. end
  465. else
  466. _G[k] = v
  467. end
  468. end
  469. end,
  470. })
  471. return exports