diff options
Diffstat (limited to 'lualib')
-rw-r--r-- | lualib/meta_functions.lua | 121 |
1 files changed, 117 insertions, 4 deletions
diff --git a/lualib/meta_functions.lua b/lualib/meta_functions.lua index f8e951066..1394204c1 100644 --- a/lualib/meta_functions.lua +++ b/lualib/meta_functions.lua @@ -16,6 +16,8 @@ limitations under the License. local exports = {} +local N = "metatokens" + -- Metafunctions local function meta_size_function(task) local sizes = { @@ -115,7 +117,7 @@ local function meta_encoding_function(task) local nother = 0 local tp = task:get_text_parts() - if tp then + if tp and #tp > 0 then for _,p in ipairs(tp) do if p:is_utf() then nutf = nutf + 1 @@ -123,9 +125,11 @@ local function meta_encoding_function(task) nother = nother + 1 end end + + return {nutf / #tp, nother / #tp} end - return {nutf, nother} + return {0, 0} end local function meta_recipients_function(task) @@ -196,10 +200,75 @@ local function meta_urls_function(task) return {0} end +local function meta_words_function(task) + local avg_len = task:get_mempool():get_variable("avg_words_len", "double") or 0.0 + local short_words = task:get_mempool():get_variable("short_words_cnt", "double") or 0.0 + local ret_len = 0 + + local lens = { + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 15, + 20, + } + + for i = 1,#lens do + if lens[i] >= avg_len then + ret_len = (1.0 * i) / #lens + break + end + end + + local tp = task:get_text_parts() + local wres = { + 0, -- spaces rate + 0, -- double spaces rate + 0, -- non spaces rate + 0, -- ascii characters rate + 0, -- non-ascii characters rate + 0, -- capital characters rate + 0, -- numeric cahracters + } + for _,p in ipairs(tp) do + local stats = p:get_stats() + local len = p:get_length() + + if len > 0 then + wres[1] = wres[1] + stats['spaces'] / len + wres[2] = wres[2] + stats['double_spaces'] / len + wres[3] = wres[3] + stats['non_spaces'] / len + wres[4] = wres[4] + stats['ascii_characters'] / len + wres[5] = wres[5] + stats['non_ascii_characters'] / len + wres[6] = wres[6] + stats['capital_letters'] / len + wres[7] = wres[7] + stats['numeric_characters'] / len + end + end + + local ret = { + short_words, + ret_len, + } + for _,wr in ipairs(wres) do + table.insert(ret, wr / #tp) + end + + return ret +end + local metafunctions = { { cb = meta_size_function, ninputs = 1, + desc = { + "size" + } }, { cb = meta_images_function, @@ -209,36 +278,80 @@ local metafunctions = { -- 3 - number of jpeg images -- 4 - number of large images (> 128 x 128) -- 5 - number of small images (< 128 x 128) + desc = { + 'nimages', + 'npng_images', + 'njpeg_images', + 'nlarge_images', + 'nsmall_images' + } }, { cb = meta_nparts_function, ninputs = 2, -- 1 - number of text parts -- 2 - number of attachments + desc = { + 'ntext_parts', + 'nattachments' + } }, { cb = meta_encoding_function, ninputs = 2, -- 1 - number of utf parts -- 2 - number of non-utf parts + desc = { + 'nutf_parts', + 'nascii_parts' + } }, { cb = meta_recipients_function, ninputs = 2, -- 1 - number of mime rcpt -- 2 - number of smtp rcpt + desc = { + 'nmime_rcpt', + 'nsmtp_rcpt' + } }, { cb = meta_received_function, ninputs = 4, + desc = { + 'nreceived', + 'nreceived_invalid', + 'nreceived_bad_time', + 'nreceived_secure' + } }, { cb = meta_urls_function, ninputs = 1, + desc = { + 'nurls' + } + }, + { + cb = meta_words_function, + ninputs = 9, + desc = { + 'avg_words_len', + 'nshort_words', + 'spaces_rate', + 'double_spaces_rate', + 'non_spaces_rate', + 'ascii_characters_rate', + 'non_ascii_characters_rate', + 'capital_characters_rate', + 'numeric_cahracters' + } }, } local function rspamd_gen_metatokens(task) + local rspamd_logger = require "rspamd_logger" local ipairs = ipairs local metatokens = {} local cached = task:cache_get('metatokens') @@ -248,8 +361,8 @@ local function rspamd_gen_metatokens(task) else for _,mt in ipairs(metafunctions) do local ct = mt.cb(task) - - for _,tok in ipairs(ct) do + for i,tok in ipairs(ct) do + rspamd_logger.debugm(N, task, "metatoken: %s = %s", mt.desc[i], tok) table.insert(metatokens, tok) end end |