diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-11-05 12:03:20 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-11-05 12:03:20 +0000 |
commit | c38238123244947b0effaa8b3637d80e90106f89 (patch) | |
tree | 0f444eb6a2c8108b9f9c03d879eabbfba9412db9 /src/lua | |
parent | 6fe3489d5bdad985adcfd4e478d91cac2d437448 (diff) | |
download | rspamd-c38238123244947b0effaa8b3637d80e90106f89.tar.gz rspamd-c38238123244947b0effaa8b3637d80e90106f89.zip |
[Rework] Restore old fann_scores, move common parts
Diffstat (limited to 'src/lua')
-rw-r--r-- | src/lua/global_functions.lua | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/src/lua/global_functions.lua b/src/lua/global_functions.lua index 0eb461496..b8b840043 100644 --- a/src/lua/global_functions.lua +++ b/src/lua/global_functions.lua @@ -164,3 +164,220 @@ function rspamd_str_split(s, sep) local p = lpeg.Ct(elem * (sep * elem)^0) -- make a table capture return lpeg.match(p, s) end + +-- Metafunctions +local function meta_size_function(task) + local sizes = { + 100, + 200, + 500, + 1000, + 2000, + 4000, + 10000, + 20000, + 30000, + 100000, + 200000, + 400000, + 800000, + 1000000, + 2000000, + 8000000, + } + + local size = task:get_size() + for i = 1,#sizes do + if sizes[i] >= size then + return {i / #sizes} + end + end + + return {0} +end + +local function meta_images_function(task) + local images = task:get_images() + local ntotal = 0 + local njpg = 0 + local npng = 0 + local nlarge = 0 + local nsmall = 0 + + if images then + for _,img in ipairs(images) do + if img:get_type() == 'png' then + npng = npng + 1 + elseif img:get_type() == 'jpeg' then + njpg = njpg + 1 + end + + local w = img:get_width() + local h = img:get_height() + + if w > 0 and h > 0 then + if w + h > 256 then + nlarge = nlarge + 1 + else + nsmall = nsmall + 1 + end + end + + ntotal = ntotal + 1 + end + end + if ntotal > 0 then + njpg = njpg / ntotal + npng = npng / ntotal + nlarge = nlarge / ntotal + nsmall = nsmall / ntotal + end + return {ntotal,njpg,npng,nlarge,nsmall} +end + +local function meta_nparts_function(task) + local nattachments = 0 + local ntextparts = 0 + local totalparts = 1 + + local tp = task:get_text_parts() + if tp then + ntextparts = #tp + end + + local parts = task:get_parts() + + if parts then + for _,p in ipairs(parts) do + if p:get_filename() then + nattachments = nattachments + 1 + end + totalparts = totalparts + 1 + end + end + + return {ntextparts/totalparts, nattachments/totalparts} +end + +local function meta_encoding_function(task) + local nutf = 0 + local nother = 0 + + local tp = task:get_text_parts() + if tp then + for _,p in ipairs(tp) do + if p:is_utf() then + nutf = nutf + 1 + else + nother = nother + 1 + end + end + end + + return {nutf, nother} +end + +local function meta_recipients_function(task) + local nmime = 0 + local nsmtp = 0 + + if task:has_recipients('mime') then + nmime = #(task:get_recipients('mime')) + end + if task:has_recipients('smtp') then + nsmtp = #(task:get_recipients('smtp')) + end + + if nmime > 0 then nmime = 1.0 / nmime end + if nsmtp > 0 then nsmtp = 1.0 / nsmtp end + + return {nmime,nsmtp} +end + +local function meta_received_function(task) + local ret = 0 + local rh = task:get_received_headers() + + if rh and #rh > 0 then + ret = 1 / #rh + end + + return {ret} +end + +local function meta_urls_function(task) + if task:has_urls() then + return {1.0 / #(task:get_urls())} + end + + return {0} +end + +local function meta_attachments_function(task) +end + +local metafunctions = { + { + cb = meta_size_function, + ninputs = 1, + }, + { + cb = meta_images_function, + ninputs = 5, + -- 1 - number of images, + -- 2 - number of png images, + -- 3 - number of jpeg images + -- 4 - number of large images (> 128 x 128) + -- 5 - number of small images (< 128 x 128) + }, + { + cb = meta_nparts_function, + ninputs = 2, + -- 1 - number of text parts + -- 2 - number of attachments + }, + { + cb = meta_encoding_function, + ninputs = 2, + -- 1 - number of utf parts + -- 2 - number of non-utf parts + }, + { + cb = meta_recipients_function, + ninputs = 2, + -- 1 - number of mime rcpt + -- 2 - number of smtp rcpt + }, + { + cb = meta_received_function, + ninputs = 1, + }, + { + cb = meta_urls_function, + ninputs = 1, + }, +} + +function rspamd_gen_metatokens(task) + local ipairs = ipairs + local metatokens = {} + for _,mt in ipairs(metafunctions) do + local ct = mt.cb(task) + + for _,tok in ipairs(ct) do + table.insert(metatokens, tok) + end + end + + return metatokens +end + +function rspamd_count_metatokens() + local ipairs = ipairs + local total = 0 + for _,mt in ipairs(metafunctions) do + total = total + mt.ninputs + end + + return total +end
\ No newline at end of file |