2017-09-09 16:40:15 +02:00
|
|
|
--[[
|
|
|
|
Copyright (c) 2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
]]--
|
|
|
|
|
2017-05-23 12:37:43 +02:00
|
|
|
local exports = {}
|
|
|
|
|
2017-09-23 14:44:40 +02:00
|
|
|
local N = "metatokens"
|
|
|
|
|
2017-05-23 12:37:43 +02:00
|
|
|
-- Metafunctions
|
|
|
|
local function meta_size_function(task)
|
|
|
|
local sizes = {
|
|
|
|
100,
|
|
|
|
200,
|
|
|
|
500,
|
|
|
|
1000,
|
|
|
|
2000,
|
|
|
|
4000,
|
|
|
|
10000,
|
|
|
|
20000,
|
|
|
|
30000,
|
|
|
|
100000,
|
|
|
|
200000,
|
|
|
|
400000,
|
|
|
|
800000,
|
|
|
|
1000000,
|
|
|
|
2000000,
|
|
|
|
8000000,
|
|
|
|
}
|
|
|
|
|
|
|
|
local size = task:get_size()
|
|
|
|
for i = 1,#sizes do
|
|
|
|
if sizes[i] >= size then
|
|
|
|
return {(1.0 * i) / #sizes}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return {0}
|
|
|
|
end
|
|
|
|
|
|
|
|
local function meta_images_function(task)
|
|
|
|
local images = task:get_images()
|
|
|
|
local ntotal = 0
|
|
|
|
local njpg = 0
|
|
|
|
local npng = 0
|
|
|
|
local nlarge = 0
|
|
|
|
local nsmall = 0
|
|
|
|
|
|
|
|
if images then
|
|
|
|
for _,img in ipairs(images) do
|
|
|
|
if img:get_type() == 'png' then
|
|
|
|
npng = npng + 1
|
|
|
|
elseif img:get_type() == 'jpeg' then
|
|
|
|
njpg = njpg + 1
|
|
|
|
end
|
|
|
|
|
|
|
|
local w = img:get_width()
|
|
|
|
local h = img:get_height()
|
|
|
|
|
|
|
|
if w > 0 and h > 0 then
|
|
|
|
if w + h > 256 then
|
|
|
|
nlarge = nlarge + 1
|
|
|
|
else
|
|
|
|
nsmall = nsmall + 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
ntotal = ntotal + 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if ntotal > 0 then
|
|
|
|
njpg = 1.0 * njpg / ntotal
|
|
|
|
npng = 1.0 * npng / ntotal
|
|
|
|
nlarge = 1.0 * nlarge / ntotal
|
|
|
|
nsmall = 1.0 * nsmall / ntotal
|
|
|
|
end
|
|
|
|
return {ntotal,njpg,npng,nlarge,nsmall}
|
|
|
|
end
|
|
|
|
|
|
|
|
local function meta_nparts_function(task)
|
|
|
|
local nattachments = 0
|
|
|
|
local ntextparts = 0
|
|
|
|
local totalparts = 1
|
|
|
|
|
|
|
|
local tp = task:get_text_parts()
|
|
|
|
if tp then
|
|
|
|
ntextparts = #tp
|
|
|
|
end
|
|
|
|
|
|
|
|
local parts = task:get_parts()
|
|
|
|
|
|
|
|
if parts then
|
|
|
|
for _,p in ipairs(parts) do
|
|
|
|
if p:get_filename() then
|
|
|
|
nattachments = nattachments + 1
|
|
|
|
end
|
|
|
|
totalparts = totalparts + 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return {(1.0 * ntextparts)/totalparts, (1.0 * nattachments)/totalparts}
|
|
|
|
end
|
|
|
|
|
|
|
|
local function meta_encoding_function(task)
|
|
|
|
local nutf = 0
|
|
|
|
local nother = 0
|
|
|
|
|
|
|
|
local tp = task:get_text_parts()
|
2017-09-23 15:43:17 +02:00
|
|
|
if tp and #tp > 0 then
|
2017-05-23 12:37:43 +02:00
|
|
|
for _,p in ipairs(tp) do
|
|
|
|
if p:is_utf() then
|
|
|
|
nutf = nutf + 1
|
|
|
|
else
|
|
|
|
nother = nother + 1
|
|
|
|
end
|
|
|
|
end
|
2017-09-23 15:43:17 +02:00
|
|
|
|
|
|
|
return {nutf / #tp, nother / #tp}
|
2017-05-23 12:37:43 +02:00
|
|
|
end
|
|
|
|
|
2017-09-23 15:43:17 +02:00
|
|
|
return {0, 0}
|
2017-05-23 12:37:43 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
local function meta_recipients_function(task)
|
|
|
|
local nmime = 0
|
|
|
|
local nsmtp = 0
|
|
|
|
|
|
|
|
if task:has_recipients('mime') then
|
|
|
|
nmime = #(task:get_recipients('mime'))
|
|
|
|
end
|
|
|
|
if task:has_recipients('smtp') then
|
|
|
|
nsmtp = #(task:get_recipients('smtp'))
|
|
|
|
end
|
|
|
|
|
|
|
|
if nmime > 0 then nmime = 1.0 / nmime end
|
|
|
|
if nsmtp > 0 then nsmtp = 1.0 / nsmtp end
|
|
|
|
|
|
|
|
return {nmime,nsmtp}
|
|
|
|
end
|
|
|
|
|
|
|
|
local function meta_received_function(task)
|
|
|
|
local count_factor = 0
|
|
|
|
local invalid_factor = 0
|
|
|
|
local rh = task:get_received_headers()
|
|
|
|
local time_factor = 0
|
|
|
|
local secure_factor = 0
|
|
|
|
local fun = require "fun"
|
|
|
|
|
|
|
|
if rh and #rh > 0 then
|
|
|
|
|
|
|
|
local ntotal = 0.0
|
|
|
|
local init_time = 0
|
|
|
|
|
|
|
|
fun.each(function(rc)
|
|
|
|
ntotal = ntotal + 1.0
|
|
|
|
|
|
|
|
if not rc.by_hostname then
|
|
|
|
invalid_factor = invalid_factor + 1.0
|
|
|
|
end
|
|
|
|
if init_time == 0 and rc.timestamp then
|
|
|
|
init_time = rc.timestamp
|
|
|
|
elseif rc.timestamp then
|
|
|
|
time_factor = time_factor + math.abs(init_time - rc.timestamp)
|
|
|
|
init_time = rc.timestamp
|
|
|
|
end
|
|
|
|
if rc.flags and (rc.flags['ssl'] or rc.flags['authenticated']) then
|
|
|
|
secure_factor = secure_factor + 1.0
|
|
|
|
end
|
|
|
|
end,
|
|
|
|
fun.filter(function(rc) return not rc.flags or not rc.flags['artificial'] end, rh))
|
|
|
|
|
|
|
|
invalid_factor = invalid_factor / ntotal
|
|
|
|
secure_factor = secure_factor / ntotal
|
|
|
|
count_factor = 1.0 / ntotal
|
|
|
|
|
|
|
|
if time_factor ~= 0 then
|
|
|
|
time_factor = 1.0 / time_factor
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return {count_factor, invalid_factor, time_factor, secure_factor}
|
|
|
|
end
|
|
|
|
|
|
|
|
local function meta_urls_function(task)
|
|
|
|
if task:has_urls() then
|
|
|
|
return {1.0 / #(task:get_urls())}
|
|
|
|
end
|
|
|
|
|
|
|
|
return {0}
|
|
|
|
end
|
|
|
|
|
2017-09-23 14:44:40 +02:00
|
|
|
local function meta_words_function(task)
|
|
|
|
local avg_len = task:get_mempool():get_variable("avg_words_len", "double") or 0.0
|
|
|
|
local short_words = task:get_mempool():get_variable("short_words_cnt", "double") or 0.0
|
|
|
|
local ret_len = 0
|
|
|
|
|
|
|
|
local lens = {
|
|
|
|
2,
|
|
|
|
3,
|
|
|
|
4,
|
|
|
|
5,
|
|
|
|
6,
|
|
|
|
7,
|
|
|
|
8,
|
|
|
|
9,
|
|
|
|
10,
|
|
|
|
15,
|
|
|
|
20,
|
|
|
|
}
|
|
|
|
|
|
|
|
for i = 1,#lens do
|
|
|
|
if lens[i] >= avg_len then
|
|
|
|
ret_len = (1.0 * i) / #lens
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-09-23 15:43:17 +02:00
|
|
|
local tp = task:get_text_parts()
|
|
|
|
local wres = {
|
|
|
|
0, -- spaces rate
|
|
|
|
0, -- double spaces rate
|
|
|
|
0, -- non spaces rate
|
|
|
|
0, -- ascii characters rate
|
|
|
|
0, -- non-ascii characters rate
|
|
|
|
0, -- capital characters rate
|
|
|
|
0, -- numeric cahracters
|
|
|
|
}
|
|
|
|
for _,p in ipairs(tp) do
|
|
|
|
local stats = p:get_stats()
|
|
|
|
local len = p:get_length()
|
|
|
|
|
|
|
|
if len > 0 then
|
|
|
|
wres[1] = wres[1] + stats['spaces'] / len
|
|
|
|
wres[2] = wres[2] + stats['double_spaces'] / len
|
|
|
|
wres[3] = wres[3] + stats['non_spaces'] / len
|
|
|
|
wres[4] = wres[4] + stats['ascii_characters'] / len
|
|
|
|
wres[5] = wres[5] + stats['non_ascii_characters'] / len
|
|
|
|
wres[6] = wres[6] + stats['capital_letters'] / len
|
|
|
|
wres[7] = wres[7] + stats['numeric_characters'] / len
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
local ret = {
|
2017-09-23 14:44:40 +02:00
|
|
|
short_words,
|
|
|
|
ret_len,
|
|
|
|
}
|
2017-11-03 19:48:58 +01:00
|
|
|
|
|
|
|
local divisor = 1.0
|
|
|
|
if #tp > 0 then
|
2017-11-03 20:13:52 +01:00
|
|
|
divisor = #tp
|
2017-11-03 19:48:58 +01:00
|
|
|
end
|
|
|
|
|
2017-09-23 15:43:17 +02:00
|
|
|
for _,wr in ipairs(wres) do
|
2017-11-03 19:48:58 +01:00
|
|
|
table.insert(ret, wr / divisor)
|
2017-09-23 15:43:17 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
return ret
|
2017-09-23 14:44:40 +02:00
|
|
|
end
|
|
|
|
|
2017-05-23 12:37:43 +02:00
|
|
|
local metafunctions = {
|
|
|
|
{
|
|
|
|
cb = meta_size_function,
|
|
|
|
ninputs = 1,
|
2017-09-23 14:44:40 +02:00
|
|
|
desc = {
|
|
|
|
"size"
|
|
|
|
}
|
2017-05-23 12:37:43 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
cb = meta_images_function,
|
|
|
|
ninputs = 5,
|
|
|
|
-- 1 - number of images,
|
|
|
|
-- 2 - number of png images,
|
|
|
|
-- 3 - number of jpeg images
|
|
|
|
-- 4 - number of large images (> 128 x 128)
|
|
|
|
-- 5 - number of small images (< 128 x 128)
|
2017-09-23 14:44:40 +02:00
|
|
|
desc = {
|
|
|
|
'nimages',
|
|
|
|
'npng_images',
|
|
|
|
'njpeg_images',
|
|
|
|
'nlarge_images',
|
|
|
|
'nsmall_images'
|
|
|
|
}
|
2017-05-23 12:37:43 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
cb = meta_nparts_function,
|
|
|
|
ninputs = 2,
|
|
|
|
-- 1 - number of text parts
|
|
|
|
-- 2 - number of attachments
|
2017-09-23 14:44:40 +02:00
|
|
|
desc = {
|
|
|
|
'ntext_parts',
|
|
|
|
'nattachments'
|
|
|
|
}
|
2017-05-23 12:37:43 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
cb = meta_encoding_function,
|
|
|
|
ninputs = 2,
|
|
|
|
-- 1 - number of utf parts
|
|
|
|
-- 2 - number of non-utf parts
|
2017-09-23 14:44:40 +02:00
|
|
|
desc = {
|
|
|
|
'nutf_parts',
|
|
|
|
'nascii_parts'
|
|
|
|
}
|
2017-05-23 12:37:43 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
cb = meta_recipients_function,
|
|
|
|
ninputs = 2,
|
|
|
|
-- 1 - number of mime rcpt
|
|
|
|
-- 2 - number of smtp rcpt
|
2017-09-23 14:44:40 +02:00
|
|
|
desc = {
|
|
|
|
'nmime_rcpt',
|
|
|
|
'nsmtp_rcpt'
|
|
|
|
}
|
2017-05-23 12:37:43 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
cb = meta_received_function,
|
|
|
|
ninputs = 4,
|
2017-09-23 14:44:40 +02:00
|
|
|
desc = {
|
|
|
|
'nreceived',
|
|
|
|
'nreceived_invalid',
|
|
|
|
'nreceived_bad_time',
|
|
|
|
'nreceived_secure'
|
|
|
|
}
|
2017-05-23 12:37:43 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
cb = meta_urls_function,
|
|
|
|
ninputs = 1,
|
2017-09-23 14:44:40 +02:00
|
|
|
desc = {
|
|
|
|
'nurls'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
cb = meta_words_function,
|
2017-09-23 15:43:17 +02:00
|
|
|
ninputs = 9,
|
2017-09-23 14:44:40 +02:00
|
|
|
desc = {
|
|
|
|
'avg_words_len',
|
2017-09-23 15:43:17 +02:00
|
|
|
'nshort_words',
|
|
|
|
'spaces_rate',
|
|
|
|
'double_spaces_rate',
|
|
|
|
'non_spaces_rate',
|
|
|
|
'ascii_characters_rate',
|
|
|
|
'non_ascii_characters_rate',
|
|
|
|
'capital_characters_rate',
|
|
|
|
'numeric_cahracters'
|
2017-09-23 14:44:40 +02:00
|
|
|
}
|
2017-05-23 12:37:43 +02:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
local function rspamd_gen_metatokens(task)
|
2017-09-23 14:44:40 +02:00
|
|
|
local rspamd_logger = require "rspamd_logger"
|
2017-05-23 12:37:43 +02:00
|
|
|
local ipairs = ipairs
|
|
|
|
local metatokens = {}
|
|
|
|
local cached = task:cache_get('metatokens')
|
|
|
|
|
|
|
|
if cached then
|
|
|
|
return cached
|
|
|
|
else
|
|
|
|
for _,mt in ipairs(metafunctions) do
|
|
|
|
local ct = mt.cb(task)
|
2017-09-23 14:44:40 +02:00
|
|
|
for i,tok in ipairs(ct) do
|
|
|
|
rspamd_logger.debugm(N, task, "metatoken: %s = %s", mt.desc[i], tok)
|
2017-05-23 12:37:43 +02:00
|
|
|
table.insert(metatokens, tok)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
task:cache_set('metatokens', metatokens)
|
|
|
|
end
|
|
|
|
|
|
|
|
return metatokens
|
|
|
|
end
|
|
|
|
|
|
|
|
exports.rspamd_gen_metatokens = rspamd_gen_metatokens
|
2018-06-16 15:18:49 +02:00
|
|
|
exports.gen_metatokens = rspamd_gen_metatokens
|
|
|
|
|
|
|
|
local function rspamd_gen_metatokens_table(task)
|
|
|
|
local metatokens = {}
|
|
|
|
|
|
|
|
for _,mt in ipairs(metafunctions) do
|
|
|
|
local ct = mt.cb(task)
|
|
|
|
for i,tok in ipairs(ct) do
|
|
|
|
metatokens[mt.desc[i]] = tok
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return metatokens
|
|
|
|
end
|
|
|
|
|
|
|
|
exports.rspamd_gen_metatokens_table = rspamd_gen_metatokens_table
|
|
|
|
exports.gen_metatokens_table = rspamd_gen_metatokens_table
|
2017-05-23 12:37:43 +02:00
|
|
|
|
|
|
|
local function rspamd_count_metatokens()
|
|
|
|
local ipairs = ipairs
|
|
|
|
local total = 0
|
|
|
|
for _,mt in ipairs(metafunctions) do
|
|
|
|
total = total + mt.ninputs
|
|
|
|
end
|
|
|
|
|
|
|
|
return total
|
|
|
|
end
|
|
|
|
|
|
|
|
exports.rspamd_count_metatokens = rspamd_count_metatokens
|
2018-06-16 15:18:49 +02:00
|
|
|
exports.count_metatokens = rspamd_count_metatokens
|
2017-05-23 12:37:43 +02:00
|
|
|
|
|
|
|
return exports
|