123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321 |
- --[[
- Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ]]--
-
- --[[[
- -- @module lua_fuzzy
- -- This module contains helper functions for supporting fuzzy check module
- --]]
-
-
- local N = "lua_fuzzy"
- local lua_util = require "lua_util"
- local rspamd_regexp = require "rspamd_regexp"
- local fun = require "fun"
- local rspamd_logger = require "rspamd_logger"
- local ts = require("tableshape").types
-
- -- Filled by C code, indexed by number in this table
- local rules = {}
-
- -- Pre-defined rules options
- local policies = {
- recommended = {
- min_bytes = 1024,
- min_height = 500,
- min_width = 500,
- min_length = 32,
- text_multiplier = 4.0, -- divide min_bytes by 4 for texts
- mime_types = {"application/*"},
- scan_archives = true,
- short_text_direct_hash = true,
- text_shingles = true,
- skip_images = false,
- }
- }
-
- local default_policy = policies.recommended
-
- local policy_schema = ts.shape{
- min_bytes = ts.number + ts.string / tonumber,
- min_height = ts.number + ts.string / tonumber,
- min_width = ts.number + ts.string / tonumber,
- min_length = ts.number + ts.string / tonumber,
- text_multiplier = ts.number,
- mime_types = ts.array_of(ts.string),
- scan_archives = ts.boolean,
- short_text_direct_hash = ts.boolean,
- text_shingles = ts.boolean,
- skip_imagess = ts.boolean,
- }
-
-
- local exports = {}
-
-
- --[[[
- -- @function lua_fuzzy.register_policy(name, policy)
- -- Adds a new policy with name `name`. Must be valid, checked using policy_schema
- --]]
- exports.register_policy = function(name, policy)
- if policies[name] then
- rspamd_logger.warnx(rspamd_config, "overriding policy %s", name)
- end
-
- local parsed_policy,err = policy_schema:transform(policy)
-
- if not parsed_policy then
- rspamd_logger.errx(rspamd_config, 'invalid fuzzy rule policy %s: %s',
- name, err)
-
- return
- else
- policies.name = parsed_policy
- end
- end
-
- --[[[
- -- @function lua_fuzzy.process_rule(rule)
- -- Processes fuzzy rule (applying policies or defaults if needed). Returns policy id
- --]]
- exports.process_rule = function(rule)
- local processed_rule = lua_util.shallowcopy(rule)
- local policy = default_policy
-
- if processed_rule.policy then
- policy = policies[processed_rule.policy]
- end
-
- if policy then
- processed_rule = lua_util.override_defaults(policy, processed_rule)
- else
- rspamd_logger.warnx(rspamd_config, "unknown policy %s", processed_rule.policy)
- end
-
- if processed_rule.mime_types then
- processed_rule.mime_types = fun.totable(fun.map(function(gl)
- return rspamd_regexp.import_glob(gl, 'i')
- end, processed_rule.mime_types))
- end
-
- table.insert(rules, processed_rule)
- return #rules
- end
-
- local function check_length(task, part, rule)
- local bytes = part:get_length()
- local length_ok = bytes > 0
-
- local id = part:get_id()
- lua_util.debugm(N, task, 'check size of part %s', id)
-
- if length_ok and rule.min_bytes > 0 then
-
- local adjusted_bytes = bytes
-
- if part:is_text() then
- bytes = part:get_text():get_length()
- if rule.text_multiplier then
- adjusted_bytes = bytes * rule.text_multiplier
- end
- end
-
- if rule.min_bytes > adjusted_bytes then
- lua_util.debugm(N, task, 'skip part of length %s (%s adjusted) ' ..
- 'as it has less than %s bytes',
- bytes, adjusted_bytes, rule.min_bytes)
- length_ok = false
- else
- lua_util.debugm(N, task, 'allow part of length %s (%s adjusted)',
- bytes, adjusted_bytes, rule.min_bytes)
- end
- else
- lua_util.debugm(N, task, 'allow part %s, no length limits', id)
- end
-
- return length_ok
- end
-
- local function check_text_part(task, part, rule, text)
- local allow_direct,allow_shingles = false,false
-
- local id = part:get_id()
- lua_util.debugm(N, task, 'check text part %s', id)
- local wcnt = text:get_words_count()
-
- if rule.text_shingles then
- -- Check number of words
- if rule.min_length > 0 and wcnt < rule.min_length then
- lua_util.debugm(N, task, 'text has less than %s words: %s; disable shingles',
- rule.min_length, wcnt)
- allow_shingles = false
- else
- lua_util.debugm(N, task, 'allow shingles in text %s, %s words',
- id, wcnt)
- allow_shingles = wcnt > 0
- end
-
- if not rule.short_text_direct_hash and not allow_shingles then
- allow_direct = false
- else
- if not allow_shingles then
- lua_util.debugm(N, task,
- 'allow direct hash for short text %s, %s words',
- id, wcnt)
- allow_direct = check_length(task, part, rule)
- else
- allow_direct = wcnt > 0
- end
- end
- else
- lua_util.debugm(N, task,
- 'disable shingles in text %s', id)
- allow_direct = check_length(task, part, rule)
- end
-
- return allow_direct,allow_shingles
- end
-
- local function has_sane_text_parts(task)
- local text_parts = task:get_text_parts() or {}
-
- return fun.any(function(tp) return tp:get_words_count() > 10 end, text_parts)
- end
-
- local function check_image_part(task, part, rule, image)
- if rule.skip_images then
- lua_util.debugm(N, task, 'skip image part as images are disabled')
- return false,false
- end
-
- local id = part:get_id()
- lua_util.debugm(N, task, 'check image part %s', id)
-
- if rule.min_width > 0 or rule.min_height > 0 then
- -- Check dimensions
- local min_width = rule.min_width or rule.min_height
- local min_height = rule.min_height or rule.min_width
- local height = image:get_height()
- local width = image:get_width()
-
- if height and width then
- if height < min_height or width < min_width then
-
-
- if not has_sane_text_parts(task) then
- lua_util.debugm(N, task, 'allow image part %s (%sx%s): no large enough text part found',
- id, width, height)
- return true, false
- else
- lua_util.debugm(N, task, 'skip image part %s as it does not meet minimum sizes: %sx%s < %sx%s',
- id, width, height, min_width, min_height)
- return false, false
- end
-
-
- else
- lua_util.debugm(N, task, 'allow image part %s: %sx%s',
- id, width, height)
- end
- end
- end
-
- return check_length(task, part, rule),false
- end
-
- local function mime_types_check(task, part, rule)
- local t,st = part:get_type()
-
- if not t then return false, false end
-
- local ct = string.format('%s/%s', t, st)
- t,st = part:get_detected_type()
- local detected_ct = string.format('%s/%s', t, st)
- local id = part:get_id()
- lua_util.debugm(N, task, 'check binary part %s: %s', id, ct)
-
- -- For bad mime mime parts we implicitly enable fuzzy check
- local mime_trace = (task:get_symbol('MIME_TRACE') or {})[1]
- local opts = {}
-
- if mime_trace then
- opts = mime_trace.options or opts
- end
- opts = fun.tomap(fun.map(function(opt)
- local elts = lua_util.str_split(opt, ':')
- return elts[1],elts[2]
- end, opts))
-
- if opts[id] and opts[id] == '-' then
- lua_util.debugm(N, task, 'explicitly check binary part %s: bad mime type %s', id, ct)
- return check_length(task, part, rule),false
- end
-
- if rule.mime_types then
-
- if fun.any(function(gl_re)
- if gl_re:match(ct) or (detected_ct and gl_re:match(detected_ct)) then
- return true
- else
- return false
- end
- end, rule.mime_types) then
- lua_util.debugm(N, task, 'found mime type match for part %s: %s (%s detected)',
- id, ct, detected_ct)
- return check_length(task, part, rule),false
- end
-
- return false, false
- end
-
- return false,false
- end
-
- exports.check_mime_part = function(task, part, rule_id)
- local rule = rules[rule_id]
-
- if not rule then
- rspamd_logger.errx(task, 'cannot find rule with id %s', rule_id)
-
- return false,false
- end
-
- if part:is_text() then
- return check_text_part(task, part, rule, part:get_text())
- end
-
- if part:is_image() then
- return check_image_part(task, part, rule, part:get_image())
- end
-
- if part:is_archive() and rule.scan_archives then
- -- Always send archives
- lua_util.debugm(N, task, 'check archive part %s', part:get_id())
-
- return true,false
- end
-
- if part:is_attachment() then
- return mime_types_check(task, part, rule)
- end
-
- return false,false
- end
-
- exports.cleanup_rules = function()
- rules = {}
- end
-
- return exports
|