From 2a0b5e9c411c8a63da348de9aaef706275189260 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 2 Nov 2018 09:53:52 +0000 Subject: [PATCH] [Project] Add logic for checking of the text parts --- lualib/lua_fuzzy.lua | 73 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/lualib/lua_fuzzy.lua b/lualib/lua_fuzzy.lua index 5003c821c..663560021 100644 --- a/lualib/lua_fuzzy.lua +++ b/lualib/lua_fuzzy.lua @@ -34,10 +34,12 @@ local policies = { min_bytes = 1024, min_height = 500, min_width = 500, - min_length = 100, -- short words multiplier + min_length = 32, text_multiplier = 4.0, -- divide min_bytes by 4 for texts - mime_types = {"*"}, + mime_types = {"application/*"}, + scan_archives = true, short_text_direct_hash = true, + text_shingles = true, } } @@ -50,7 +52,9 @@ local policy_schema = ts.shape{ min_length = ts.number + ts.string / tonumber, text_multiplier = ts.number, mime_types = ts.array_of(ts.string), + scan_archives = ts.bool, short_text_direct_hash = ts.bool, + text_shingles = ts.bool, } @@ -100,4 +104,69 @@ exports.process_rule = function(rule) return #rules end +local function check_length(task, part, rule) + local length_ok = true + + if rule.min_bytes then + local bytes = part:get_length() + local adjusted_bytes = bytes + + if part:is_text() then + if rule.text_multiplier then + adjusted_bytes = bytes * rule.text_multiplier + end + end + + if rule.min_bytes > adjusted_bytes then + lua_util.debugm(N, task, 'skip part of length %s (%s adjusted)' .. + 'as it has less than %s bytes', + bytes, adjusted_bytes, rule.min_bytes) + length_ok = false + end + end + + return length_ok +end + +local function check_text_part(task, part, rule, text) + local allow_direct,allow_shingles = false,false + + if rule.text_shingles then + -- Check number of words + local wcnt = text:get_words_count() + if rule.min_length and wcnt < rule.min_length then + lua_util.debugm(N, task, 'text has less than %s words: %s', + rule.min_length, wcnt) + allow_shingles = false + else + allow_shingles = true + end + + if not rule.short_text_direct_hash and not allow_shingles then + allow_direct = false + else + allow_direct = check_length(task, part, rule) + end + + else + allow_direct = check_length(task, part, rule) + end + + return allow_direct,allow_shingles +end + +exports.check_mime_part = function(task, part, rule_id) + local rule = rules[rule_id] + + if not rule then + rspamd_logger.errx(task, 'cannot find rule with id %s', rule_id) + + return false,false + end + + if part:is_text() then + return check_text_part(task, part, rule, part:get_text()) + end +end + return exports \ No newline at end of file -- 2.39.5