You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.lua 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Copyright (c) 2019, Carsten Rosenberg <c.rosenberg@heinlein-support.de>
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. ]]--
  14. --[[[
  15. -- @module lua_scanners_common
  16. -- This module contains common external scanners functions
  17. --]]
  18. local rspamd_logger = require "rspamd_logger"
  19. local rspamd_regexp = require "rspamd_regexp"
  20. local lua_util = require "lua_util"
  21. local lua_redis = require "lua_redis"
  22. local lua_magic_types = require "lua_magic/types"
  23. local fun = require "fun"
  24. local exports = {}
  25. local function log_clean(task, rule, msg)
  26. msg = msg or 'message or mime_part is clean'
  27. if rule.log_clean then
  28. rspamd_logger.infox(task, '%s: %s', rule.log_prefix, msg)
  29. else
  30. lua_util.debugm(rule.name, task, '%s: %s', rule.log_prefix, msg)
  31. end
  32. end
  33. local function match_patterns(default_sym, found, patterns, dyn_weight)
  34. if type(patterns) ~= 'table' then return default_sym, dyn_weight end
  35. if not patterns[1] then
  36. for sym, pat in pairs(patterns) do
  37. if pat:match(found) then
  38. return sym, '1'
  39. end
  40. end
  41. return default_sym, dyn_weight
  42. else
  43. for _, p in ipairs(patterns) do
  44. for sym, pat in pairs(p) do
  45. if pat:match(found) then
  46. return sym, '1'
  47. end
  48. end
  49. end
  50. return default_sym, dyn_weight
  51. end
  52. end
  53. local function yield_result(task, rule, vname, dyn_weight, is_fail)
  54. local all_whitelisted = true
  55. local patterns
  56. local symbol
  57. local threat_table = {}
  58. local threat_info
  59. local flags
  60. -- This should be more generic
  61. if not is_fail then
  62. patterns = rule.patterns
  63. symbol = rule.symbol
  64. threat_info = rule.detection_category .. 'found'
  65. if not dyn_weight then dyn_weight = 1.0 end
  66. elseif is_fail == 'fail' then
  67. patterns = rule.patterns_fail
  68. symbol = rule.symbol_fail
  69. threat_info = "FAILED with error"
  70. dyn_weight = 0.0
  71. elseif is_fail == 'encrypted' then
  72. patterns = rule.patterns
  73. symbol = rule.symbol_encrypted
  74. threat_info = "Scan has returned that input was encrypted"
  75. dyn_weight = 1.0
  76. elseif is_fail == 'macro' then
  77. patterns = rule.patterns
  78. symbol = rule.symbol_macro
  79. threat_info = "Scan has returned that input contains macros"
  80. dyn_weight = 1.0
  81. end
  82. if type(vname) == 'string' then
  83. table.insert(threat_table, vname)
  84. elseif type(vname) == 'table' then
  85. threat_table = vname
  86. end
  87. for _, tm in ipairs(threat_table) do
  88. local symname, symscore = match_patterns(symbol, tm, patterns, dyn_weight)
  89. if rule.whitelist and rule.whitelist:get_key(tm) then
  90. rspamd_logger.infox(task, '%s: "%s" is in whitelist', rule.log_prefix, tm)
  91. else
  92. all_whitelisted = false
  93. rspamd_logger.infox(task, '%s: result - %s: "%s - score: %s"',
  94. rule.log_prefix, threat_info, tm, symscore)
  95. task:insert_result(symname, symscore, tm)
  96. end
  97. end
  98. if rule.action and is_fail ~= 'fail' and not all_whitelisted then
  99. threat_table = table.concat(threat_table, '; ')
  100. if rule.action ~= 'reject' then
  101. flags = 'least'
  102. end
  103. task:set_pre_result(rule.action,
  104. lua_util.template(rule.message or 'Rejected', {
  105. SCANNER = rule.name,
  106. VIRUS = threat_table,
  107. }), rule.name, nil, nil, flags)
  108. end
  109. end
  110. local function message_not_too_large(task, content, rule)
  111. local max_size = tonumber(rule.max_size)
  112. if not max_size then return true end
  113. if #content > max_size then
  114. rspamd_logger.infox(task, "skip %s check as it is too large: %s (%s is allowed)",
  115. rule.log_prefix, #content, max_size)
  116. return false
  117. end
  118. return true
  119. end
  120. local function message_not_too_small(task, content, rule)
  121. local min_size = tonumber(rule.min_size)
  122. if not min_size then return true end
  123. if #content < min_size then
  124. rspamd_logger.infox(task, "skip %s check as it is too small: %s (%s is allowed)",
  125. rule.log_prefix, #content, min_size)
  126. return false
  127. end
  128. return true
  129. end
  130. local function message_min_words(task, rule)
  131. if rule.text_part_min_words then
  132. local text_parts_empty = false
  133. local text_parts = task:get_text_parts()
  134. local filter_func = function(p)
  135. return p:get_words_count() <= tonumber(rule.text_part_min_words)
  136. end
  137. fun.each(function(p)
  138. text_parts_empty = true
  139. rspamd_logger.infox(task, '%s: #words is less then text_part_min_words: %s',
  140. rule.log_prefix, rule.text_part_min_words)
  141. end, fun.filter(filter_func, text_parts))
  142. return text_parts_empty
  143. else
  144. return true
  145. end
  146. end
  147. local function dynamic_scan(task, rule)
  148. if rule.dynamic_scan then
  149. if rule.action ~= 'reject' then
  150. local metric_result = task:get_metric_score('default')
  151. local metric_action = task:get_metric_action('default')
  152. local has_pre_result = task:has_pre_result()
  153. -- ToDo: needed?
  154. -- Sometimes leads to FPs
  155. --if rule.symbol_type == 'postfilter' and metric_action == 'reject' then
  156. -- rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, "result is already reject")
  157. -- return false
  158. --elseif metric_result[1] > metric_result[2]*2 then
  159. if metric_result[1] > metric_result[2]*2 then
  160. rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, 'score > 2 * reject_level: ' .. metric_result[1])
  161. return false
  162. elseif has_pre_result and metric_action == 'reject' then
  163. rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, 'pre_result reject is set')
  164. return false
  165. else
  166. return true, 'undecided'
  167. end
  168. else
  169. return true, 'dynamic_scan is not possible with config `action=reject;`'
  170. end
  171. else
  172. return true
  173. end
  174. end
  175. local function need_check(task, content, rule, digest, fn)
  176. local uncached = true
  177. local key = digest
  178. local function redis_av_cb(err, data)
  179. if data and type(data) == 'string' then
  180. -- Cached
  181. data = lua_util.str_split(data, '\t')
  182. local threat_string = lua_util.str_split(data[1], '\v')
  183. local score = data[2] or rule.default_score
  184. if threat_string[1] ~= 'OK' then
  185. if threat_string[1] == 'MACRO' then
  186. yield_result(task, rule, 'File contains macros', 0.0, 'macro')
  187. elseif threat_string[1] == 'ENCRYPTED' then
  188. yield_result(task, rule, 'File is encrypted', 0.0, 'encrypted')
  189. else
  190. lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - score: %s',
  191. rule.log_prefix, key, threat_string[1], score)
  192. yield_result(task, rule, threat_string, score)
  193. end
  194. else
  195. lua_util.debugm(rule.name, task, '%s: got cached negative result for %s: %s',
  196. rule.log_prefix, key, threat_string[1])
  197. end
  198. uncached = false
  199. else
  200. if err then
  201. rspamd_logger.errx(task, 'got error checking cache: %s', err)
  202. end
  203. end
  204. local f_message_not_too_large = message_not_too_large(task, content, rule)
  205. local f_message_not_too_small = message_not_too_small(task, content, rule)
  206. local f_message_min_words = message_min_words(task, rule)
  207. local f_dynamic_scan = dynamic_scan(task, rule)
  208. if uncached and
  209. f_message_not_too_large and
  210. f_message_not_too_small and
  211. f_message_min_words and
  212. f_dynamic_scan then
  213. fn()
  214. end
  215. end
  216. if rule.redis_params and not rule.no_cache then
  217. key = rule.prefix .. key
  218. if lua_redis.redis_make_request(task,
  219. rule.redis_params, -- connect params
  220. key, -- hash key
  221. false, -- is write
  222. redis_av_cb, --callback
  223. 'GET', -- command
  224. {key} -- arguments)
  225. ) then
  226. return true
  227. end
  228. end
  229. return false
  230. end
  231. local function save_cache(task, digest, rule, to_save, dyn_weight)
  232. local key = digest
  233. if not dyn_weight then dyn_weight = 1.0 end
  234. local function redis_set_cb(err)
  235. -- Do nothing
  236. if err then
  237. rspamd_logger.errx(task, 'failed to save %s cache for %s -> "%s": %s',
  238. rule.detection_category, to_save, key, err)
  239. else
  240. lua_util.debugm(rule.name, task, '%s: saved cached result for %s: %s - score %s - ttl %s',
  241. rule.log_prefix, key, to_save, dyn_weight, rule.cache_expire)
  242. end
  243. end
  244. if type(to_save) == 'table' then
  245. to_save = table.concat(to_save, '\v')
  246. end
  247. local value = table.concat({to_save, dyn_weight}, '\t')
  248. if rule.redis_params and rule.prefix then
  249. key = rule.prefix .. key
  250. lua_redis.redis_make_request(task,
  251. rule.redis_params, -- connect params
  252. key, -- hash key
  253. true, -- is write
  254. redis_set_cb, --callback
  255. 'SETEX', -- command
  256. { key, rule.cache_expire or 0, value }
  257. )
  258. end
  259. return false
  260. end
  261. local function create_regex_table(patterns)
  262. local regex_table = {}
  263. if patterns[1] then
  264. for i, p in ipairs(patterns) do
  265. if type(p) == 'table' then
  266. local new_set = {}
  267. for k, v in pairs(p) do
  268. new_set[k] = rspamd_regexp.create_cached(v)
  269. end
  270. regex_table[i] = new_set
  271. else
  272. regex_table[i] = {}
  273. end
  274. end
  275. else
  276. for k, v in pairs(patterns) do
  277. regex_table[k] = rspamd_regexp.create_cached(v)
  278. end
  279. end
  280. return regex_table
  281. end
  282. local function match_filter(task, rule, found, patterns, pat_type)
  283. if type(patterns) ~= 'table' or not found then
  284. return false
  285. end
  286. if not patterns[1] then
  287. for _, pat in pairs(patterns) do
  288. if pat_type == 'ext' and tostring(pat) == tostring(found) then
  289. return true
  290. elseif pat_type == 'regex' and pat:match(found) then
  291. return true
  292. end
  293. end
  294. return false
  295. else
  296. for _, p in ipairs(patterns) do
  297. for _, pat in ipairs(p) do
  298. if pat_type == 'ext' and tostring(pat) == tostring(found) then
  299. return true
  300. elseif pat_type == 'regex' and pat:match(found) then
  301. return true
  302. end
  303. end
  304. end
  305. return false
  306. end
  307. end
  308. -- borrowed from mime_types.lua
  309. -- ext is the last extension, LOWERCASED
  310. -- ext2 is the one before last extension LOWERCASED
  311. local function gen_extension(fname)
  312. local filename_parts = lua_util.str_split(fname, '.')
  313. local ext = {}
  314. for n = 1, 2 do
  315. ext[n] = #filename_parts > n and string.lower(filename_parts[#filename_parts + 1 - n]) or nil
  316. end
  317. return ext[1],ext[2],filename_parts
  318. end
  319. local function check_parts_match(task, rule)
  320. local filter_func = function(p)
  321. local mtype,msubtype = p:get_type()
  322. local detected_ext = p:get_detected_ext()
  323. local fname = p:get_filename()
  324. local ext, ext2
  325. if rule.scan_all_mime_parts == false then
  326. -- check file extension and filename regex matching
  327. --lua_util.debugm(rule.name, task, '%s: filename: |%s|%s|', rule.log_prefix, fname)
  328. if fname ~= nil then
  329. ext,ext2 = gen_extension(fname)
  330. --lua_util.debugm(rule.name, task, '%s: extension, fname: |%s|%s|%s|', rule.log_prefix, ext, ext2, fname)
  331. if match_filter(task, rule, ext, rule.mime_parts_filter_ext, 'ext')
  332. or match_filter(task, rule, ext2, rule.mime_parts_filter_ext, 'ext') then
  333. lua_util.debugm(rule.name, task, '%s: extension matched: |%s|%s|', rule.log_prefix, ext, ext2)
  334. return true
  335. elseif match_filter(task, rule, fname, rule.mime_parts_filter_regex, 'regex') then
  336. lua_util.debugm(rule.name, task, '%s: filname regex matched', rule.log_prefix)
  337. return true
  338. end
  339. end
  340. -- check content type string regex matching
  341. if mtype ~= nil and msubtype ~= nil then
  342. local ct = string.format('%s/%s', mtype, msubtype):lower()
  343. if match_filter(task, rule, ct, rule.mime_parts_filter_regex, 'regex') then
  344. lua_util.debugm(rule.name, task, '%s: regex content-type: %s', rule.log_prefix, ct)
  345. return true
  346. end
  347. end
  348. -- check detected content type (libmagic) regex matching
  349. if detected_ext then
  350. local magic = lua_magic_types[detected_ext] or {}
  351. if match_filter(task, rule, detected_ext, rule.mime_parts_filter_ext, 'ext') then
  352. lua_util.debugm(rule.name, task, '%s: detected extension matched: |%s|', rule.log_prefix, detected_ext)
  353. return true
  354. elseif magic.ct and match_filter(task, rule, magic.ct, rule.mime_parts_filter_regex, 'regex') then
  355. lua_util.debugm(rule.name, task, '%s: regex detected libmagic content-type: %s',
  356. rule.log_prefix, magic.ct)
  357. return true
  358. end
  359. end
  360. -- check filenames in archives
  361. if p:is_archive() then
  362. local arch = p:get_archive()
  363. local filelist = arch:get_files_full(1000)
  364. for _,f in ipairs(filelist) do
  365. ext,ext2 = gen_extension(f.name)
  366. if match_filter(task, rule, ext, rule.mime_parts_filter_ext, 'ext')
  367. or match_filter(task, rule, ext2, rule.mime_parts_filter_ext, 'ext') then
  368. lua_util.debugm(rule.name, task, '%s: extension matched in archive: |%s|%s|', rule.log_prefix, ext, ext2)
  369. --lua_util.debugm(rule.name, task, '%s: extension matched in archive: %s', rule.log_prefix, ext)
  370. return true
  371. elseif match_filter(task, rule, f.name, rule.mime_parts_filter_regex, 'regex') then
  372. lua_util.debugm(rule.name, task, '%s: filename regex matched in archive', rule.log_prefix)
  373. return true
  374. end
  375. end
  376. end
  377. end
  378. -- check text_part has more words than text_part_min_words_check
  379. if rule.scan_text_mime and rule.text_part_min_words and p:is_text() and
  380. p:get_words_count() >= tonumber(rule.text_part_min_words) then
  381. return true
  382. end
  383. if rule.scan_image_mime and p:is_image() then
  384. return true
  385. end
  386. if rule.scan_all_mime_parts ~= false then
  387. if detected_ext then
  388. -- We know what to scan!
  389. local magic = lua_magic_types[detected_ext] or {}
  390. if p:is_attachment() or magic.av_check ~= false then
  391. return true
  392. end
  393. elseif p:is_attachment() then
  394. -- Just rely on attachment property
  395. return true
  396. end
  397. end
  398. return false
  399. end
  400. return fun.filter(filter_func, task:get_parts())
  401. end
  402. local function check_metric_results(task, rule)
  403. if rule.action ~= 'reject' then
  404. local metric_result = task:get_metric_score('default')
  405. local metric_action = task:get_metric_action('default')
  406. local has_pre_result = task:has_pre_result()
  407. if rule.symbol_type == 'postfilter' and metric_action == 'reject' then
  408. return true, 'result is already reject'
  409. elseif metric_result[1] > metric_result[2]*2 then
  410. return true, 'score > 2 * reject_level: ' .. metric_result[1]
  411. elseif has_pre_result and metric_action == 'reject' then
  412. return true, 'pre_result reject is set'
  413. else
  414. return false, 'undecided'
  415. end
  416. else
  417. return false, 'dynamic_scan is not possible with config `action=reject;`'
  418. end
  419. end
  420. exports.log_clean = log_clean
  421. exports.yield_result = yield_result
  422. exports.match_patterns = match_patterns
  423. exports.condition_check_and_continue = need_check
  424. exports.save_cache = save_cache
  425. exports.create_regex_table = create_regex_table
  426. exports.check_parts_match = check_parts_match
  427. exports.check_metric_results = check_metric_results
  428. setmetatable(exports, {
  429. __call = function(t, override)
  430. for k, v in pairs(t) do
  431. if _G[k] ~= nil then
  432. local msg = 'function ' .. k .. ' already exists in global scope.'
  433. if override then
  434. _G[k] = v
  435. print('WARNING: ' .. msg .. ' Overwritten.')
  436. else
  437. print('NOTICE: ' .. msg .. ' Skipped.')
  438. end
  439. else
  440. _G[k] = v
  441. end
  442. end
  443. end,
  444. })
  445. return exports