2018-11-01 18:20:23 +01:00
|
|
|
--[[
|
|
|
|
Copyright (c) 2018, Vsevolod Stakhov <vsevolod@highsecure.ru>
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
]]--
|
|
|
|
|
|
|
|
--[[[
|
|
|
|
-- @module lua_fuzzy
|
|
|
|
-- This module contains helper functions for supporting fuzzy check module
|
|
|
|
--]]
|
|
|
|
|
|
|
|
|
|
|
|
local N = "lua_fuzzy"
|
|
|
|
local lua_util = require "lua_util"
|
2018-11-05 15:30:51 +01:00
|
|
|
local rspamd_regexp = require "rspamd_regexp"
|
|
|
|
local fun = require "fun"
|
2018-11-01 18:20:23 +01:00
|
|
|
local rspamd_logger = require "rspamd_logger"
|
|
|
|
local ts = require("tableshape").types
|
|
|
|
|
|
|
|
-- Filled by C code, indexed by number in this table
|
|
|
|
local rules = {}
|
|
|
|
|
|
|
|
-- Pre-defined rules options
|
|
|
|
local policies = {
|
|
|
|
recommended = {
|
|
|
|
min_bytes = 1024,
|
|
|
|
min_height = 500,
|
|
|
|
min_width = 500,
|
2018-11-02 10:53:52 +01:00
|
|
|
min_length = 32,
|
2018-11-01 18:20:23 +01:00
|
|
|
text_multiplier = 4.0, -- divide min_bytes by 4 for texts
|
2018-11-02 10:53:52 +01:00
|
|
|
mime_types = {"application/*"},
|
|
|
|
scan_archives = true,
|
2018-11-01 18:20:23 +01:00
|
|
|
short_text_direct_hash = true,
|
2018-11-02 10:53:52 +01:00
|
|
|
text_shingles = true,
|
2018-11-02 16:00:30 +01:00
|
|
|
skip_images = false,
|
2018-11-01 18:20:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
local default_policy = policies.recommended
|
|
|
|
|
|
|
|
local policy_schema = ts.shape{
|
|
|
|
min_bytes = ts.number + ts.string / tonumber,
|
|
|
|
min_height = ts.number + ts.string / tonumber,
|
|
|
|
min_width = ts.number + ts.string / tonumber,
|
|
|
|
min_length = ts.number + ts.string / tonumber,
|
|
|
|
text_multiplier = ts.number,
|
|
|
|
mime_types = ts.array_of(ts.string),
|
2018-11-06 15:28:51 +01:00
|
|
|
scan_archives = ts.boolean,
|
|
|
|
short_text_direct_hash = ts.boolean,
|
|
|
|
text_shingles = ts.boolean,
|
|
|
|
skip_imagess = ts.boolean,
|
2018-11-01 18:20:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
local exports = {}
|
|
|
|
|
|
|
|
|
|
|
|
--[[[
|
|
|
|
-- @function lua_fuzzy.register_policy(name, policy)
|
|
|
|
-- Adds a new policy with name `name`. Must be valid, checked using policy_schema
|
|
|
|
--]]
|
|
|
|
exports.register_policy = function(name, policy)
|
|
|
|
if policies[name] then
|
|
|
|
rspamd_logger.warnx(rspamd_config, "overriding policy %s", name)
|
|
|
|
end
|
|
|
|
|
|
|
|
local parsed_policy,err = policy_schema:transform(policy)
|
|
|
|
|
|
|
|
if not parsed_policy then
|
|
|
|
rspamd_logger.errx(rspamd_config, 'invalid fuzzy rule policy %s: %s',
|
|
|
|
name, err)
|
|
|
|
|
|
|
|
return
|
|
|
|
else
|
|
|
|
policies.name = parsed_policy
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
--[[[
|
|
|
|
-- @function lua_fuzzy.process_rule(rule)
|
|
|
|
-- Processes fuzzy rule (applying policies or defaults if needed). Returns policy id
|
|
|
|
--]]
|
|
|
|
exports.process_rule = function(rule)
|
|
|
|
local processed_rule = lua_util.shallowcopy(rule)
|
|
|
|
local policy = default_policy
|
|
|
|
|
|
|
|
if processed_rule.policy then
|
|
|
|
policy = policies[processed_rule.policy]
|
2018-11-06 17:18:54 +01:00
|
|
|
end
|
2018-11-01 18:20:23 +01:00
|
|
|
|
2018-11-06 17:18:54 +01:00
|
|
|
if policy then
|
|
|
|
processed_rule = lua_util.override_defaults(policy, processed_rule)
|
|
|
|
else
|
|
|
|
rspamd_logger.warnx(rspamd_config, "unknown policy %s", processed_rule.policy)
|
2018-11-01 18:20:23 +01:00
|
|
|
end
|
|
|
|
|
2018-11-05 15:30:51 +01:00
|
|
|
if processed_rule.mime_types then
|
|
|
|
processed_rule.mime_types = fun.totable(fun.map(function(gl)
|
|
|
|
return rspamd_regexp.import_glob(gl, 'i')
|
|
|
|
end, processed_rule.mime_types))
|
|
|
|
end
|
|
|
|
|
2018-11-01 18:20:23 +01:00
|
|
|
table.insert(rules, processed_rule)
|
|
|
|
return #rules
|
|
|
|
end
|
|
|
|
|
2018-11-02 10:53:52 +01:00
|
|
|
local function check_length(task, part, rule)
|
2018-11-06 18:48:01 +01:00
|
|
|
local bytes = part:get_length()
|
|
|
|
local length_ok = bytes > 0
|
2018-11-02 10:53:52 +01:00
|
|
|
|
2018-11-06 17:18:54 +01:00
|
|
|
local id = part:get_id()
|
|
|
|
lua_util.debugm(N, task, 'check size of part %s', id)
|
|
|
|
|
2018-11-06 18:48:01 +01:00
|
|
|
if length_ok and rule.min_bytes > 0 then
|
|
|
|
|
2018-11-02 10:53:52 +01:00
|
|
|
local adjusted_bytes = bytes
|
|
|
|
|
|
|
|
if part:is_text() then
|
2018-11-08 11:54:26 +01:00
|
|
|
bytes = part:get_text():get_length()
|
2018-11-02 10:53:52 +01:00
|
|
|
if rule.text_multiplier then
|
|
|
|
adjusted_bytes = bytes * rule.text_multiplier
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if rule.min_bytes > adjusted_bytes then
|
2018-11-08 11:54:26 +01:00
|
|
|
lua_util.debugm(N, task, 'skip part of length %s (%s adjusted) ' ..
|
2018-11-02 10:53:52 +01:00
|
|
|
'as it has less than %s bytes',
|
|
|
|
bytes, adjusted_bytes, rule.min_bytes)
|
|
|
|
length_ok = false
|
2018-11-06 17:18:54 +01:00
|
|
|
else
|
|
|
|
lua_util.debugm(N, task, 'allow part of length %s (%s adjusted)',
|
|
|
|
bytes, adjusted_bytes, rule.min_bytes)
|
2018-11-02 10:53:52 +01:00
|
|
|
end
|
2018-11-06 17:18:54 +01:00
|
|
|
else
|
|
|
|
lua_util.debugm(N, task, 'allow part %s, no length limits', id)
|
2018-11-02 10:53:52 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
return length_ok
|
|
|
|
end
|
|
|
|
|
|
|
|
local function check_text_part(task, part, rule, text)
|
|
|
|
local allow_direct,allow_shingles = false,false
|
|
|
|
|
2018-11-06 17:18:54 +01:00
|
|
|
local id = part:get_id()
|
|
|
|
lua_util.debugm(N, task, 'check text part %s', id)
|
2018-11-06 18:48:01 +01:00
|
|
|
local wcnt = text:get_words_count()
|
2018-11-06 17:18:54 +01:00
|
|
|
|
2018-11-02 10:53:52 +01:00
|
|
|
if rule.text_shingles then
|
|
|
|
-- Check number of words
|
2018-11-06 18:48:01 +01:00
|
|
|
if rule.min_length > 0 and wcnt < rule.min_length then
|
2018-11-06 17:18:54 +01:00
|
|
|
lua_util.debugm(N, task, 'text has less than %s words: %s; disable shingles',
|
2018-11-02 10:53:52 +01:00
|
|
|
rule.min_length, wcnt)
|
|
|
|
allow_shingles = false
|
|
|
|
else
|
2018-11-06 17:18:54 +01:00
|
|
|
lua_util.debugm(N, task, 'allow shingles in text %s, %s words',
|
|
|
|
id, wcnt)
|
2018-11-06 18:48:01 +01:00
|
|
|
allow_shingles = wcnt > 0
|
2018-11-02 10:53:52 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
if not rule.short_text_direct_hash and not allow_shingles then
|
|
|
|
allow_direct = false
|
|
|
|
else
|
2018-11-06 17:18:54 +01:00
|
|
|
if not allow_shingles then
|
|
|
|
lua_util.debugm(N, task,
|
|
|
|
'allow direct hash for short text %s, %s words',
|
|
|
|
id, wcnt)
|
|
|
|
allow_direct = check_length(task, part, rule)
|
2018-11-06 18:48:01 +01:00
|
|
|
else
|
|
|
|
allow_direct = wcnt > 0
|
2018-11-06 17:18:54 +01:00
|
|
|
end
|
2018-11-02 10:53:52 +01:00
|
|
|
end
|
|
|
|
else
|
2018-11-06 17:18:54 +01:00
|
|
|
lua_util.debugm(N, task,
|
|
|
|
'disable shingles in text %s', id)
|
2018-11-02 10:53:52 +01:00
|
|
|
allow_direct = check_length(task, part, rule)
|
|
|
|
end
|
|
|
|
|
|
|
|
return allow_direct,allow_shingles
|
|
|
|
end
|
|
|
|
|
2018-11-06 18:48:01 +01:00
|
|
|
local function has_sane_text_parts(task)
|
|
|
|
local text_parts = task:get_text_parts() or {}
|
|
|
|
|
|
|
|
return fun.any(function(tp) return tp:get_words_count() > 10 end, text_parts)
|
|
|
|
end
|
|
|
|
|
2018-11-02 16:00:30 +01:00
|
|
|
local function check_image_part(task, part, rule, image)
|
|
|
|
if rule.skip_images then
|
|
|
|
lua_util.debugm(N, task, 'skip image part as images are disabled')
|
|
|
|
return false,false
|
|
|
|
end
|
|
|
|
|
2018-11-06 17:18:54 +01:00
|
|
|
local id = part:get_id()
|
|
|
|
lua_util.debugm(N, task, 'check image part %s', id)
|
|
|
|
|
2018-11-06 18:48:01 +01:00
|
|
|
if rule.min_width > 0 or rule.min_height > 0 then
|
2018-11-02 16:00:30 +01:00
|
|
|
-- Check dimensions
|
|
|
|
local min_width = rule.min_width or rule.min_height
|
|
|
|
local min_height = rule.min_height or rule.min_width
|
|
|
|
local height = image:get_height()
|
|
|
|
local width = image:get_width()
|
|
|
|
|
|
|
|
if height and width then
|
|
|
|
if height < min_height or width < min_width then
|
|
|
|
|
2018-11-06 18:48:01 +01:00
|
|
|
|
|
|
|
if not has_sane_text_parts(task) then
|
|
|
|
lua_util.debugm(N, task, 'allow image part %s (%sx%s): no large enough text part found',
|
|
|
|
id, width, height)
|
|
|
|
return true, false
|
|
|
|
else
|
|
|
|
lua_util.debugm(N, task, 'skip image part %s as it does not meet minimum sizes: %sx%s < %sx%s',
|
|
|
|
id, width, height, min_width, min_height)
|
|
|
|
return false, false
|
|
|
|
end
|
|
|
|
|
|
|
|
|
2018-11-06 17:18:54 +01:00
|
|
|
else
|
|
|
|
lua_util.debugm(N, task, 'allow image part %s: %sx%s',
|
|
|
|
id, width, height)
|
2018-11-02 16:00:30 +01:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return check_length(task, part, rule),false
|
|
|
|
end
|
|
|
|
|
|
|
|
local function mime_types_check(task, part, rule)
|
2018-11-05 15:30:51 +01:00
|
|
|
local t,st = part:get_type()
|
|
|
|
|
|
|
|
if not t then return false, false end
|
|
|
|
|
|
|
|
local ct = string.format('%s/%s', t, st)
|
2018-12-11 13:26:36 +01:00
|
|
|
t,st = part:get_detected_type()
|
|
|
|
local detected_ct = string.format('%s/%s', t, st)
|
2018-11-06 13:07:49 +01:00
|
|
|
local id = part:get_id()
|
2018-11-06 17:18:54 +01:00
|
|
|
lua_util.debugm(N, task, 'check binary part %s: %s', id, ct)
|
2018-11-06 13:07:49 +01:00
|
|
|
|
|
|
|
-- For bad mime mime parts we implicitly enable fuzzy check
|
2018-11-08 11:54:26 +01:00
|
|
|
local mime_trace = (task:get_symbol('MIME_TRACE') or {})[1]
|
2018-11-06 13:07:49 +01:00
|
|
|
local opts = {}
|
|
|
|
|
|
|
|
if mime_trace then
|
2018-11-08 11:54:26 +01:00
|
|
|
opts = mime_trace.options or opts
|
2018-11-06 13:07:49 +01:00
|
|
|
end
|
|
|
|
opts = fun.tomap(fun.map(function(opt)
|
|
|
|
local elts = lua_util.str_split(opt, ':')
|
|
|
|
return elts[1],elts[2]
|
|
|
|
end, opts))
|
|
|
|
|
|
|
|
if opts[id] and opts[id] == '-' then
|
2018-11-06 17:18:54 +01:00
|
|
|
lua_util.debugm(N, task, 'explicitly check binary part %s: bad mime type %s', id, ct)
|
2018-11-06 13:07:49 +01:00
|
|
|
return check_length(task, part, rule),false
|
|
|
|
end
|
2018-11-05 15:30:51 +01:00
|
|
|
|
|
|
|
if rule.mime_types then
|
2018-11-13 17:17:15 +01:00
|
|
|
|
2018-11-05 15:30:51 +01:00
|
|
|
if fun.any(function(gl_re)
|
2018-12-11 13:26:36 +01:00
|
|
|
if gl_re:match(ct) or (detected_ct and gl_re:match(detected_ct)) then
|
|
|
|
return true
|
|
|
|
else
|
|
|
|
return false
|
|
|
|
end
|
2018-11-05 15:30:51 +01:00
|
|
|
end, rule.mime_types) then
|
2018-12-11 13:26:36 +01:00
|
|
|
lua_util.debugm(N, task, 'found mime type match for part %s: %s (%s detected)',
|
|
|
|
id, ct, detected_ct)
|
2018-11-06 13:07:49 +01:00
|
|
|
return check_length(task, part, rule),false
|
2018-11-05 15:30:51 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
return false, false
|
|
|
|
end
|
|
|
|
|
|
|
|
return false,false
|
2018-11-02 16:00:30 +01:00
|
|
|
end
|
|
|
|
|
2018-11-02 10:53:52 +01:00
|
|
|
exports.check_mime_part = function(task, part, rule_id)
|
|
|
|
local rule = rules[rule_id]
|
|
|
|
|
|
|
|
if not rule then
|
|
|
|
rspamd_logger.errx(task, 'cannot find rule with id %s', rule_id)
|
|
|
|
|
|
|
|
return false,false
|
|
|
|
end
|
|
|
|
|
|
|
|
if part:is_text() then
|
|
|
|
return check_text_part(task, part, rule, part:get_text())
|
|
|
|
end
|
2018-11-02 16:00:30 +01:00
|
|
|
|
|
|
|
if part:is_image() then
|
|
|
|
return check_image_part(task, part, rule, part:get_image())
|
|
|
|
end
|
|
|
|
|
|
|
|
if part:is_archive() and rule.scan_archives then
|
|
|
|
-- Always send archives
|
2018-11-06 17:18:54 +01:00
|
|
|
lua_util.debugm(N, task, 'check archive part %s', part:get_id())
|
|
|
|
|
2018-11-02 16:00:30 +01:00
|
|
|
return true,false
|
|
|
|
end
|
|
|
|
|
2018-11-22 14:28:57 +01:00
|
|
|
if part:is_attachment() then
|
2018-11-05 19:12:49 +01:00
|
|
|
return mime_types_check(task, part, rule)
|
|
|
|
end
|
|
|
|
|
|
|
|
return false,false
|
2018-11-02 10:53:52 +01:00
|
|
|
end
|
|
|
|
|
2018-11-06 15:28:51 +01:00
|
|
|
exports.cleanup_rules = function()
|
|
|
|
rules = {}
|
|
|
|
end
|
|
|
|
|
2018-11-01 18:20:23 +01:00
|
|
|
return exports
|