You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.lua 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Copyright (c) 2019, Carsten Rosenberg <c.rosenberg@heinlein-support.de>
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. ]]--
  14. --[[[
  15. -- @module lua_scanners_common
  16. -- This module contains common external scanners functions
  17. --]]
  18. local rspamd_logger = require "rspamd_logger"
  19. local rspamd_regexp = require "rspamd_regexp"
  20. local lua_util = require "lua_util"
  21. local lua_redis = require "lua_redis"
  22. local fun = require "fun"
  23. local exports = {}
  24. local function log_clean(task, rule, msg)
  25. msg = msg or 'message or mime_part is clean'
  26. if rule.log_clean then
  27. rspamd_logger.infox(task, '%s: %s', rule.log_prefix, msg)
  28. else
  29. lua_util.debugm(rule.name, task, '%s: %s', rule.log_prefix, msg)
  30. end
  31. end
  32. local function match_patterns(default_sym, found, patterns, dyn_weight)
  33. if type(patterns) ~= 'table' then return default_sym, dyn_weight end
  34. if not patterns[1] then
  35. for sym, pat in pairs(patterns) do
  36. if pat:match(found) then
  37. return sym, '1'
  38. end
  39. end
  40. return default_sym, dyn_weight
  41. else
  42. for _, p in ipairs(patterns) do
  43. for sym, pat in pairs(p) do
  44. if pat:match(found) then
  45. return sym, '1'
  46. end
  47. end
  48. end
  49. return default_sym, dyn_weight
  50. end
  51. end
  52. local function yield_result(task, rule, vname, dyn_weight, is_fail)
  53. local all_whitelisted = true
  54. local patterns
  55. local symbol
  56. local threat_table = {}
  57. local threat_info
  58. -- This should be more generic
  59. if not is_fail then
  60. patterns = rule.patterns
  61. symbol = rule.symbol
  62. threat_info = rule.detection_category .. 'found'
  63. if not dyn_weight then dyn_weight = 1.0 end
  64. elseif is_fail == 'fail' then
  65. patterns = rule.patterns_fail
  66. symbol = rule.symbol_fail
  67. threat_info = "FAILED with error"
  68. dyn_weight = 0.0
  69. elseif is_fail == 'encrypted' then
  70. patterns = rule.patterns
  71. symbol = rule.symbol_encrypted
  72. threat_info = "Scan has returned that input was encrypted"
  73. dyn_weight = 1.0
  74. end
  75. if type(vname) == 'string' then
  76. table.insert(threat_table, vname)
  77. elseif type(vname) == 'table' then
  78. threat_table = vname
  79. end
  80. for _, tm in ipairs(threat_table) do
  81. local symname, symscore = match_patterns(symbol, tm, patterns, dyn_weight)
  82. if rule.whitelist and rule.whitelist:get_key(tm) then
  83. rspamd_logger.infox(task, '%s: "%s" is in whitelist', rule.log_prefix, tm)
  84. else
  85. all_whitelisted = false
  86. task:insert_result(symname, symscore, tm)
  87. rspamd_logger.infox(task, '%s: result - %s: "%s - score: %s"',
  88. rule.log_prefix, threat_info, tm, symscore)
  89. end
  90. end
  91. if rule.action and is_fail ~= 'fail' and not all_whitelisted then
  92. threat_table = table.concat(threat_table, '; ')
  93. task:set_pre_result(rule.action,
  94. lua_util.template(rule.message or 'Rejected', {
  95. SCANNER = rule.name,
  96. VIRUS = threat_table,
  97. }), rule.name)
  98. end
  99. end
  100. local function message_not_too_large(task, content, rule)
  101. local max_size = tonumber(rule.max_size)
  102. if not max_size then return true end
  103. if #content > max_size then
  104. rspamd_logger.infox(task, "skip %s check as it is too large: %s (%s is allowed)",
  105. rule.log_prefix, #content, max_size)
  106. return false
  107. end
  108. return true
  109. end
  110. local function message_not_too_small(task, content, rule)
  111. local min_size = tonumber(rule.min_size)
  112. if not min_size then return true end
  113. if #content < min_size then
  114. rspamd_logger.infox(task, "skip %s check as it is too small: %s (%s is allowed)",
  115. rule.log_prefix, #content, min_size)
  116. return false
  117. end
  118. return true
  119. end
  120. local function message_min_words(task, rule)
  121. if rule.text_part_min_words then
  122. local text_parts_empty = false
  123. local text_parts = task:get_text_parts()
  124. local filter_func = function(p)
  125. return p:get_words_count() <= tonumber(rule.text_part_min_words)
  126. end
  127. fun.each(function(p)
  128. text_parts_empty = true
  129. rspamd_logger.infox(task, '%s: #words is less then text_part_min_words: %s',
  130. rule.log_prefix, rule.text_part_min_words)
  131. end, fun.filter(filter_func, text_parts))
  132. return text_parts_empty
  133. else
  134. return true
  135. end
  136. end
  137. local function dynamic_scan(task, rule)
  138. if rule.dynamic_scan then
  139. if rule.action ~= 'reject' then
  140. local metric_result = task:get_metric_score('default')
  141. local metric_action = task:get_metric_action('default')
  142. local has_pre_result = task:has_pre_result()
  143. -- ToDo: needed?
  144. -- Sometimes leads to FPs
  145. --if rule.symbol_type == 'postfilter' and metric_action == 'reject' then
  146. -- rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, "result is already reject")
  147. -- return false
  148. --elseif metric_result[1] > metric_result[2]*2 then
  149. if metric_result[1] > metric_result[2]*2 then
  150. rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, 'score > 2 * reject_level: ' .. metric_result[1])
  151. return false
  152. elseif has_pre_result and metric_action == 'reject' then
  153. rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, 'pre_result reject is set')
  154. return false
  155. else
  156. return true, 'undecided'
  157. end
  158. else
  159. return true, 'dynamic_scan is not possible with config `action=reject;`'
  160. end
  161. else
  162. return true
  163. end
  164. end
  165. local function need_check(task, content, rule, digest, fn)
  166. local uncached = true
  167. local key = digest
  168. local function redis_av_cb(err, data)
  169. if data and type(data) == 'string' then
  170. -- Cached
  171. data = lua_util.str_split(data, '\t')
  172. local threat_string = lua_util.str_split(data[1], '\v')
  173. local score = data[2] or rule.default_score
  174. if threat_string[1] ~= 'OK' then
  175. lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - score: %s',
  176. rule.log_prefix, key, threat_string[1], score)
  177. yield_result(task, rule, threat_string, score)
  178. else
  179. lua_util.debugm(rule.name, task, '%s: got cached negative result for %s: %s',
  180. rule.log_prefix, key, threat_string[1])
  181. end
  182. uncached = false
  183. else
  184. if err then
  185. rspamd_logger.errx(task, 'got error checking cache: %s', err)
  186. end
  187. end
  188. local f_message_not_too_large = message_not_too_large(task, content, rule) or true
  189. local f_message_not_too_small = message_not_too_small(task, content, rule) or true
  190. local f_message_min_words = message_min_words(task, rule) or true
  191. local f_dynamic_scan = dynamic_scan(task, rule) or true
  192. if uncached and
  193. f_message_not_too_large and
  194. f_message_not_too_small and
  195. f_message_min_words and
  196. f_dynamic_scan then
  197. fn()
  198. end
  199. end
  200. if rule.redis_params then
  201. key = rule.prefix .. key
  202. if lua_redis.redis_make_request(task,
  203. rule.redis_params, -- connect params
  204. key, -- hash key
  205. false, -- is write
  206. redis_av_cb, --callback
  207. 'GET', -- command
  208. {key} -- arguments)
  209. ) then
  210. return true
  211. end
  212. end
  213. return false
  214. end
  215. local function save_cache(task, digest, rule, to_save, dyn_weight)
  216. local key = digest
  217. if not dyn_weight then dyn_weight = 1.0 end
  218. local function redis_set_cb(err)
  219. -- Do nothing
  220. if err then
  221. rspamd_logger.errx(task, 'failed to save %s cache for %s -> "%s": %s',
  222. rule.detection_category, to_save, key, err)
  223. else
  224. lua_util.debugm(rule.name, task, '%s: saved cached result for %s: %s - score %s - ttl %s',
  225. rule.log_prefix, key, to_save, dyn_weight, rule.cache_expire)
  226. end
  227. end
  228. if type(to_save) == 'table' then
  229. to_save = table.concat(to_save, '\v')
  230. end
  231. local value = table.concat({to_save, dyn_weight}, '\t')
  232. if rule.redis_params and rule.prefix then
  233. key = rule.prefix .. key
  234. lua_redis.redis_make_request(task,
  235. rule.redis_params, -- connect params
  236. key, -- hash key
  237. true, -- is write
  238. redis_set_cb, --callback
  239. 'SETEX', -- command
  240. { key, rule.cache_expire or 0, value }
  241. )
  242. end
  243. return false
  244. end
  245. local function create_regex_table(patterns)
  246. local regex_table = {}
  247. if patterns[1] then
  248. for i, p in ipairs(patterns) do
  249. if type(p) == 'table' then
  250. local new_set = {}
  251. for k, v in pairs(p) do
  252. new_set[k] = rspamd_regexp.create_cached(v)
  253. end
  254. regex_table[i] = new_set
  255. else
  256. regex_table[i] = {}
  257. end
  258. end
  259. else
  260. for k, v in pairs(patterns) do
  261. regex_table[k] = rspamd_regexp.create_cached(v)
  262. end
  263. end
  264. return regex_table
  265. end
  266. local function match_filter(task, found, patterns)
  267. if type(patterns) ~= 'table' then return false end
  268. if not patterns[1] then
  269. for _, pat in pairs(patterns) do
  270. if pat:match(found) then
  271. return true
  272. end
  273. end
  274. return false
  275. else
  276. for _, p in ipairs(patterns) do
  277. for _, pat in ipairs(p) do
  278. if pat:match(found) then
  279. return true
  280. end
  281. end
  282. end
  283. return false
  284. end
  285. end
  286. -- borrowed from mime_types.lua
  287. -- ext is the last extension, LOWERCASED
  288. -- ext2 is the one before last extension LOWERCASED
  289. local function gen_extension(fname)
  290. local filename_parts = lua_util.str_split(fname, '.')
  291. local ext = {}
  292. for n = 1, 2 do
  293. ext[n] = #filename_parts > n and string.lower(filename_parts[#filename_parts + 1 - n]) or nil
  294. end
  295. return ext[1],ext[2],filename_parts
  296. end
  297. local function check_parts_match(task, rule)
  298. local filter_func = function(p)
  299. local mtype,msubtype = p:get_type()
  300. local dmtype,dmsubtype = p:get_detected_type()
  301. local fname = p:get_filename()
  302. local ext, ext2
  303. local extension_check = false
  304. local content_type_check = false
  305. local text_part_min_words_check = true
  306. if rule.scan_all_mime_parts == false then
  307. -- check file extension and filename regex matching
  308. if fname ~= nil then
  309. ext,ext2 = gen_extension(fname)
  310. if match_filter(task, ext, rule.mime_parts_filter_ext)
  311. or match_filter(task, ext2, rule.mime_parts_filter_ext) then
  312. lua_util.debugm(rule.name, task, '%s: extension matched: %s', rule.log_prefix, ext)
  313. extension_check = true
  314. end
  315. if match_filter(task, fname, rule.mime_parts_filter_regex) then
  316. content_type_check = true
  317. end
  318. end
  319. -- check content type string regex matching
  320. if mtype ~= nil and msubtype ~= nil then
  321. local ct = string.format('%s/%s', mtype, msubtype):lower()
  322. if match_filter(task, ct, rule.mime_parts_filter_regex) then
  323. lua_util.debugm(rule.name, task, '%s: regex content-type: %s', rule.log_prefix, ct)
  324. content_type_check = true
  325. end
  326. end
  327. -- check detected content type (libmagic) regex matching
  328. if dmtype ~= nil and dmsubtype ~= nil then
  329. local ct = string.format('%s/%s', mtype, msubtype):lower()
  330. if match_filter(task, ct, rule.mime_parts_filter_regex) then
  331. lua_util.debugm(rule.name, task, '%s: regex detected libmagic content-type: %s', rule.log_prefix, ct)
  332. content_type_check = true
  333. end
  334. end
  335. -- check filenames in archives
  336. if p:is_archive() then
  337. local arch = p:get_archive()
  338. local filelist = arch:get_files_full()
  339. for _,f in ipairs(filelist) do
  340. ext,ext2 = gen_extension(f.name)
  341. if match_filter(task, ext, rule.mime_parts_filter_ext)
  342. or match_filter(task, ext2, rule.mime_parts_filter_ext) then
  343. lua_util.debugm(rule.name, task, '%s: extension matched in archive: %s', rule.log_prefix, ext)
  344. extension_check = true
  345. end
  346. if match_filter(task, f.name, rule.mime_parts_filter_regex) then
  347. content_type_check = true
  348. end
  349. end
  350. end
  351. end
  352. -- check text_part has more words than text_part_min_words_check
  353. if rule.text_part_min_words and p:is_text() then
  354. text_part_min_words_check = p:get_words_count() >= tonumber(rule.text_part_min_words)
  355. end
  356. return (rule.scan_image_mime and p:is_image())
  357. or (rule.scan_text_mime and text_part_min_words_check)
  358. or (p:is_attachment() and rule.scan_all_mime_parts ~= false)
  359. or extension_check
  360. or content_type_check
  361. end
  362. return fun.filter(filter_func, task:get_parts())
  363. end
  364. local function check_metric_results(task, rule)
  365. if rule.action ~= 'reject' then
  366. local metric_result = task:get_metric_score('default')
  367. local metric_action = task:get_metric_action('default')
  368. local has_pre_result = task:has_pre_result()
  369. if rule.symbol_type == 'postfilter' and metric_action == 'reject' then
  370. return true, 'result is already reject'
  371. elseif metric_result[1] > metric_result[2]*2 then
  372. return true, 'score > 2 * reject_level: ' .. metric_result[1]
  373. elseif has_pre_result and metric_action == 'reject' then
  374. return true, 'pre_result reject is set'
  375. else
  376. return false, 'undecided'
  377. end
  378. else
  379. return false, 'dynamic_scan is not possible with config `action=reject;`'
  380. end
  381. end
  382. exports.log_clean = log_clean
  383. exports.yield_result = yield_result
  384. exports.match_patterns = match_patterns
  385. exports.need_check = need_check
  386. exports.save_cache = save_cache
  387. exports.create_regex_table = create_regex_table
  388. exports.check_parts_match = check_parts_match
  389. exports.check_metric_results = check_metric_results
  390. setmetatable(exports, {
  391. __call = function(t, override)
  392. for k, v in pairs(t) do
  393. if _G[k] ~= nil then
  394. local msg = 'function ' .. k .. ' already exists in global scope.'
  395. if override then
  396. _G[k] = v
  397. print('WARNING: ' .. msg .. ' Overwritten.')
  398. else
  399. print('NOTICE: ' .. msg .. ' Skipped.')
  400. end
  401. else
  402. _G[k] = v
  403. end
  404. end
  405. end,
  406. })
  407. return exports