summaryrefslogtreecommitdiffstats
path: root/lualib
diff options
context:
space:
mode:
Diffstat (limited to 'lualib')
-rw-r--r--lualib/meta_functions.lua121
1 files changed, 117 insertions, 4 deletions
diff --git a/lualib/meta_functions.lua b/lualib/meta_functions.lua
index f8e951066..1394204c1 100644
--- a/lualib/meta_functions.lua
+++ b/lualib/meta_functions.lua
@@ -16,6 +16,8 @@ limitations under the License.
local exports = {}
+local N = "metatokens"
+
-- Metafunctions
local function meta_size_function(task)
local sizes = {
@@ -115,7 +117,7 @@ local function meta_encoding_function(task)
local nother = 0
local tp = task:get_text_parts()
- if tp then
+ if tp and #tp > 0 then
for _,p in ipairs(tp) do
if p:is_utf() then
nutf = nutf + 1
@@ -123,9 +125,11 @@ local function meta_encoding_function(task)
nother = nother + 1
end
end
+
+ return {nutf / #tp, nother / #tp}
end
- return {nutf, nother}
+ return {0, 0}
end
local function meta_recipients_function(task)
@@ -196,10 +200,75 @@ local function meta_urls_function(task)
return {0}
end
+local function meta_words_function(task)
+ local avg_len = task:get_mempool():get_variable("avg_words_len", "double") or 0.0
+ local short_words = task:get_mempool():get_variable("short_words_cnt", "double") or 0.0
+ local ret_len = 0
+
+ local lens = {
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 15,
+ 20,
+ }
+
+ for i = 1,#lens do
+ if lens[i] >= avg_len then
+ ret_len = (1.0 * i) / #lens
+ break
+ end
+ end
+
+ local tp = task:get_text_parts()
+ local wres = {
+ 0, -- spaces rate
+ 0, -- double spaces rate
+ 0, -- non spaces rate
+ 0, -- ascii characters rate
+ 0, -- non-ascii characters rate
+ 0, -- capital characters rate
+ 0, -- numeric cahracters
+ }
+ for _,p in ipairs(tp) do
+ local stats = p:get_stats()
+ local len = p:get_length()
+
+ if len > 0 then
+ wres[1] = wres[1] + stats['spaces'] / len
+ wres[2] = wres[2] + stats['double_spaces'] / len
+ wres[3] = wres[3] + stats['non_spaces'] / len
+ wres[4] = wres[4] + stats['ascii_characters'] / len
+ wres[5] = wres[5] + stats['non_ascii_characters'] / len
+ wres[6] = wres[6] + stats['capital_letters'] / len
+ wres[7] = wres[7] + stats['numeric_characters'] / len
+ end
+ end
+
+ local ret = {
+ short_words,
+ ret_len,
+ }
+ for _,wr in ipairs(wres) do
+ table.insert(ret, wr / #tp)
+ end
+
+ return ret
+end
+
local metafunctions = {
{
cb = meta_size_function,
ninputs = 1,
+ desc = {
+ "size"
+ }
},
{
cb = meta_images_function,
@@ -209,36 +278,80 @@ local metafunctions = {
-- 3 - number of jpeg images
-- 4 - number of large images (> 128 x 128)
-- 5 - number of small images (< 128 x 128)
+ desc = {
+ 'nimages',
+ 'npng_images',
+ 'njpeg_images',
+ 'nlarge_images',
+ 'nsmall_images'
+ }
},
{
cb = meta_nparts_function,
ninputs = 2,
-- 1 - number of text parts
-- 2 - number of attachments
+ desc = {
+ 'ntext_parts',
+ 'nattachments'
+ }
},
{
cb = meta_encoding_function,
ninputs = 2,
-- 1 - number of utf parts
-- 2 - number of non-utf parts
+ desc = {
+ 'nutf_parts',
+ 'nascii_parts'
+ }
},
{
cb = meta_recipients_function,
ninputs = 2,
-- 1 - number of mime rcpt
-- 2 - number of smtp rcpt
+ desc = {
+ 'nmime_rcpt',
+ 'nsmtp_rcpt'
+ }
},
{
cb = meta_received_function,
ninputs = 4,
+ desc = {
+ 'nreceived',
+ 'nreceived_invalid',
+ 'nreceived_bad_time',
+ 'nreceived_secure'
+ }
},
{
cb = meta_urls_function,
ninputs = 1,
+ desc = {
+ 'nurls'
+ }
+ },
+ {
+ cb = meta_words_function,
+ ninputs = 9,
+ desc = {
+ 'avg_words_len',
+ 'nshort_words',
+ 'spaces_rate',
+ 'double_spaces_rate',
+ 'non_spaces_rate',
+ 'ascii_characters_rate',
+ 'non_ascii_characters_rate',
+ 'capital_characters_rate',
+ 'numeric_cahracters'
+ }
},
}
local function rspamd_gen_metatokens(task)
+ local rspamd_logger = require "rspamd_logger"
local ipairs = ipairs
local metatokens = {}
local cached = task:cache_get('metatokens')
@@ -248,8 +361,8 @@ local function rspamd_gen_metatokens(task)
else
for _,mt in ipairs(metafunctions) do
local ct = mt.cb(task)
-
- for _,tok in ipairs(ct) do
+ for i,tok in ipairs(ct) do
+ rspamd_logger.debugm(N, task, "metatoken: %s = %s", mt.desc[i], tok)
table.insert(metatokens, tok)
end
end