You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_fuzzy.lua 8.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. --[[
  2. Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module lua_fuzzy
  15. -- This module contains helper functions for supporting fuzzy check module
  16. --]]
  17. local N = "lua_fuzzy"
  18. local lua_util = require "lua_util"
  19. local rspamd_regexp = require "rspamd_regexp"
  20. local fun = require "fun"
  21. local rspamd_logger = require "rspamd_logger"
  22. local ts = require("tableshape").types
  23. -- Filled by C code, indexed by number in this table
  24. local rules = {}
  25. -- Pre-defined rules options
  26. local policies = {
  27. recommended = {
  28. min_bytes = 1024,
  29. min_height = 500,
  30. min_width = 500,
  31. min_length = 64,
  32. text_multiplier = 4.0, -- divide min_bytes by 4 for texts
  33. mime_types = {"application/*"},
  34. scan_archives = true,
  35. short_text_direct_hash = true,
  36. text_shingles = true,
  37. skip_images = false,
  38. }
  39. }
  40. local default_policy = policies.recommended
  41. local policy_schema = ts.shape{
  42. min_bytes = ts.number + ts.string / tonumber,
  43. min_height = ts.number + ts.string / tonumber,
  44. min_width = ts.number + ts.string / tonumber,
  45. min_length = ts.number + ts.string / tonumber,
  46. text_multiplier = ts.number,
  47. mime_types = ts.array_of(ts.string),
  48. scan_archives = ts.boolean,
  49. short_text_direct_hash = ts.boolean,
  50. text_shingles = ts.boolean,
  51. skip_images = ts.boolean,
  52. }
  53. local exports = {}
  54. --[[[
  55. -- @function lua_fuzzy.register_policy(name, policy)
  56. -- Adds a new policy with name `name`. Must be valid, checked using policy_schema
  57. --]]
  58. exports.register_policy = function(name, policy)
  59. if policies[name] then
  60. rspamd_logger.warnx(rspamd_config, "overriding policy %s", name)
  61. end
  62. local parsed_policy,err = policy_schema:transform(policy)
  63. if not parsed_policy then
  64. rspamd_logger.errx(rspamd_config, 'invalid fuzzy rule policy %s: %s',
  65. name, err)
  66. return
  67. else
  68. policies.name = parsed_policy
  69. end
  70. end
  71. --[[[
  72. -- @function lua_fuzzy.process_rule(rule)
  73. -- Processes fuzzy rule (applying policies or defaults if needed). Returns policy id
  74. --]]
  75. exports.process_rule = function(rule)
  76. local processed_rule = lua_util.shallowcopy(rule)
  77. local policy = default_policy
  78. if processed_rule.policy then
  79. policy = policies[processed_rule.policy]
  80. end
  81. if policy then
  82. processed_rule = lua_util.override_defaults(policy, processed_rule)
  83. else
  84. rspamd_logger.warnx(rspamd_config, "unknown policy %s", processed_rule.policy)
  85. end
  86. if processed_rule.mime_types then
  87. processed_rule.mime_types = fun.totable(fun.map(function(gl)
  88. return rspamd_regexp.import_glob(gl, 'i')
  89. end, processed_rule.mime_types))
  90. end
  91. table.insert(rules, processed_rule)
  92. return #rules
  93. end
  94. local function check_length(task, part, rule)
  95. local bytes = part:get_length()
  96. local length_ok = bytes > 0
  97. local id = part:get_id()
  98. lua_util.debugm(N, task, 'check size of part %s', id)
  99. if length_ok and rule.min_bytes > 0 then
  100. local adjusted_bytes = bytes
  101. if part:is_text() then
  102. bytes = part:get_text():get_length()
  103. if rule.text_multiplier then
  104. adjusted_bytes = bytes * rule.text_multiplier
  105. end
  106. end
  107. if rule.min_bytes > adjusted_bytes then
  108. lua_util.debugm(N, task, 'skip part of length %s (%s adjusted) ' ..
  109. 'as it has less than %s bytes',
  110. bytes, adjusted_bytes, rule.min_bytes)
  111. length_ok = false
  112. else
  113. lua_util.debugm(N, task, 'allow part of length %s (%s adjusted)',
  114. bytes, adjusted_bytes, rule.min_bytes)
  115. end
  116. else
  117. lua_util.debugm(N, task, 'allow part %s, no length limits', id)
  118. end
  119. return length_ok
  120. end
  121. local function check_text_part(task, part, rule, text)
  122. local allow_direct,allow_shingles = false,false
  123. local id = part:get_id()
  124. lua_util.debugm(N, task, 'check text part %s', id)
  125. local wcnt = text:get_words_count()
  126. if rule.text_shingles then
  127. -- Check number of words
  128. local min_words = rule.min_length or 0
  129. if min_words < 32 then
  130. min_words = 32 -- Minimum for shingles
  131. end
  132. if wcnt < min_words then
  133. lua_util.debugm(N, task, 'text has less than %s words: %s; disable shingles',
  134. rule.min_length, wcnt)
  135. allow_shingles = false
  136. else
  137. lua_util.debugm(N, task, 'allow shingles in text %s, %s words',
  138. id, wcnt)
  139. allow_shingles = true
  140. end
  141. if not rule.short_text_direct_hash and not allow_shingles then
  142. allow_direct = false
  143. else
  144. if not allow_shingles then
  145. lua_util.debugm(N, task,
  146. 'allow direct hash for short text %s, %s words',
  147. id, wcnt)
  148. allow_direct = check_length(task, part, rule)
  149. else
  150. allow_direct = wcnt > 0
  151. end
  152. end
  153. else
  154. lua_util.debugm(N, task,
  155. 'disable shingles in text %s', id)
  156. allow_direct = check_length(task, part, rule)
  157. end
  158. return allow_direct,allow_shingles
  159. end
  160. local function has_sane_text_parts(task)
  161. local text_parts = task:get_text_parts() or {}
  162. return fun.any(function(tp) return tp:get_words_count() > 32 end, text_parts)
  163. end
  164. local function check_image_part(task, part, rule, image)
  165. if rule.skip_images then
  166. lua_util.debugm(N, task, 'skip image part as images are disabled')
  167. return false,false
  168. end
  169. local id = part:get_id()
  170. lua_util.debugm(N, task, 'check image part %s', id)
  171. if rule.min_width > 0 or rule.min_height > 0 then
  172. -- Check dimensions
  173. local min_width = rule.min_width or rule.min_height
  174. local min_height = rule.min_height or rule.min_width
  175. local height = image:get_height()
  176. local width = image:get_width()
  177. if height and width then
  178. if height < min_height or width < min_width then
  179. if not has_sane_text_parts(task) then
  180. lua_util.debugm(N, task, 'allow image part %s (%sx%s): no large enough text part found',
  181. id, width, height)
  182. return true, false
  183. else
  184. lua_util.debugm(N, task, 'skip image part %s as it does not meet minimum sizes: %sx%s < %sx%s',
  185. id, width, height, min_width, min_height)
  186. return false, false
  187. end
  188. else
  189. lua_util.debugm(N, task, 'allow image part %s: %sx%s',
  190. id, width, height)
  191. end
  192. end
  193. end
  194. return check_length(task, part, rule),false
  195. end
  196. local function mime_types_check(task, part, rule)
  197. local t,st = part:get_type()
  198. if not t then return false, false end
  199. local ct = string.format('%s/%s', t, st)
  200. t,st = part:get_detected_type()
  201. local detected_ct = string.format('%s/%s', t, st)
  202. local id = part:get_id()
  203. lua_util.debugm(N, task, 'check binary part %s: %s', id, ct)
  204. -- For bad mime mime parts we implicitly enable fuzzy check
  205. local mime_trace = (task:get_symbol('MIME_TRACE') or {})[1]
  206. local opts = {}
  207. if mime_trace then
  208. opts = mime_trace.options or opts
  209. end
  210. opts = fun.tomap(fun.map(function(opt)
  211. local elts = lua_util.str_split(opt, ':')
  212. return elts[1],elts[2]
  213. end, opts))
  214. if opts[id] and opts[id] == '-' then
  215. lua_util.debugm(N, task, 'explicitly check binary part %s: bad mime type %s', id, ct)
  216. return check_length(task, part, rule),false
  217. end
  218. if rule.mime_types then
  219. if fun.any(function(gl_re)
  220. if gl_re:match(ct) or (detected_ct and gl_re:match(detected_ct)) then
  221. return true
  222. else
  223. return false
  224. end
  225. end, rule.mime_types) then
  226. lua_util.debugm(N, task, 'found mime type match for part %s: %s (%s detected)',
  227. id, ct, detected_ct)
  228. return check_length(task, part, rule),false
  229. end
  230. return false, false
  231. end
  232. return false,false
  233. end
  234. exports.check_mime_part = function(task, part, rule_id)
  235. local rule = rules[rule_id]
  236. if not rule then
  237. rspamd_logger.errx(task, 'cannot find rule with id %s', rule_id)
  238. return false,false
  239. end
  240. if part:is_text() then
  241. return check_text_part(task, part, rule, part:get_text())
  242. end
  243. if part:is_image() then
  244. return check_image_part(task, part, rule, part:get_image())
  245. end
  246. if part:is_archive() and rule.scan_archives then
  247. -- Always send archives
  248. lua_util.debugm(N, task, 'check archive part %s', part:get_id())
  249. return true,false
  250. end
  251. if part:is_specific() then
  252. local sp = part:get_specific()
  253. if type(sp) == 'table' and sp.fuzzy_hashes then
  254. lua_util.debugm(N, task, 'check specific part %s', part:get_id())
  255. return true,false
  256. end
  257. end
  258. if part:is_attachment() then
  259. return mime_types_check(task, part, rule)
  260. end
  261. return false,false
  262. end
  263. exports.cleanup_rules = function()
  264. rules = {}
  265. end
  266. return exports