summaryrefslogtreecommitdiffstats
path: root/lualib/lua_fuzzy.lua
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-02 09:53:52 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-02 09:53:52 +0000
commit2a0b5e9c411c8a63da348de9aaef706275189260 (patch)
treef3b23197051aad4094a640ec65fe01273c7374f7 /lualib/lua_fuzzy.lua
parentf81dd0be8c5a13b8f75871e7011982d9de66c988 (diff)
downloadrspamd-2a0b5e9c411c8a63da348de9aaef706275189260.tar.gz
rspamd-2a0b5e9c411c8a63da348de9aaef706275189260.zip
[Project] Add logic for checking of the text parts
Diffstat (limited to 'lualib/lua_fuzzy.lua')
-rw-r--r--lualib/lua_fuzzy.lua73
1 files changed, 71 insertions, 2 deletions
diff --git a/lualib/lua_fuzzy.lua b/lualib/lua_fuzzy.lua
index 5003c821c..663560021 100644
--- a/lualib/lua_fuzzy.lua
+++ b/lualib/lua_fuzzy.lua
@@ -34,10 +34,12 @@ local policies = {
min_bytes = 1024,
min_height = 500,
min_width = 500,
- min_length = 100, -- short words multiplier
+ min_length = 32,
text_multiplier = 4.0, -- divide min_bytes by 4 for texts
- mime_types = {"*"},
+ mime_types = {"application/*"},
+ scan_archives = true,
short_text_direct_hash = true,
+ text_shingles = true,
}
}
@@ -50,7 +52,9 @@ local policy_schema = ts.shape{
min_length = ts.number + ts.string / tonumber,
text_multiplier = ts.number,
mime_types = ts.array_of(ts.string),
+ scan_archives = ts.bool,
short_text_direct_hash = ts.bool,
+ text_shingles = ts.bool,
}
@@ -100,4 +104,69 @@ exports.process_rule = function(rule)
return #rules
end
+local function check_length(task, part, rule)
+ local length_ok = true
+
+ if rule.min_bytes then
+ local bytes = part:get_length()
+ local adjusted_bytes = bytes
+
+ if part:is_text() then
+ if rule.text_multiplier then
+ adjusted_bytes = bytes * rule.text_multiplier
+ end
+ end
+
+ if rule.min_bytes > adjusted_bytes then
+ lua_util.debugm(N, task, 'skip part of length %s (%s adjusted)' ..
+ 'as it has less than %s bytes',
+ bytes, adjusted_bytes, rule.min_bytes)
+ length_ok = false
+ end
+ end
+
+ return length_ok
+end
+
+local function check_text_part(task, part, rule, text)
+ local allow_direct,allow_shingles = false,false
+
+ if rule.text_shingles then
+ -- Check number of words
+ local wcnt = text:get_words_count()
+ if rule.min_length and wcnt < rule.min_length then
+ lua_util.debugm(N, task, 'text has less than %s words: %s',
+ rule.min_length, wcnt)
+ allow_shingles = false
+ else
+ allow_shingles = true
+ end
+
+ if not rule.short_text_direct_hash and not allow_shingles then
+ allow_direct = false
+ else
+ allow_direct = check_length(task, part, rule)
+ end
+
+ else
+ allow_direct = check_length(task, part, rule)
+ end
+
+ return allow_direct,allow_shingles
+end
+
+exports.check_mime_part = function(task, part, rule_id)
+ local rule = rules[rule_id]
+
+ if not rule then
+ rspamd_logger.errx(task, 'cannot find rule with id %s', rule_id)
+
+ return false,false
+ end
+
+ if part:is_text() then
+ return check_text_part(task, part, rule, part:get_text())
+ end
+end
+
return exports \ No newline at end of file