diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-17 09:54:22 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-17 09:54:22 +0000 |
commit | c4c5baea41f8136dd30f69160ea37dec5cd8dd42 (patch) | |
tree | cb142e4c2170eed5ff313628ace9b5f4f510e32c /lualib | |
parent | 2bc29360a3fb538f09d6ca0a8408f26f8bd4c310 (diff) | |
download | rspamd-c4c5baea41f8136dd30f69160ea37dec5cd8dd42.tar.gz rspamd-c4c5baea41f8136dd30f69160ea37dec5cd8dd42.zip |
[Minor] Reintroduce metatokens for bayes
Diffstat (limited to 'lualib')
-rw-r--r-- | lualib/lua_stat.lua | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/lualib/lua_stat.lua b/lualib/lua_stat.lua index 2b43defb6..5f985e035 100644 --- a/lualib/lua_stat.lua +++ b/lualib/lua_stat.lua @@ -541,6 +541,7 @@ local function process_stat_config(cfg) classify_urls = true, classify_meta = true, classify_max_tlds = 10, + classify_meta = true, } res_config = lua_util.override_defaults(res_config, opts_section) @@ -553,9 +554,15 @@ local function process_stat_config(cfg) local hname if s1 and s2 then - hname = string.format('#h:%s-%s', s1, s2) + hname = string.format('%s-%s', s1, s2) else - hname = string.format('#h:%s', v:sub(1, 2):lower()) + s1 = v:match("^X%-([A-Z].*)$") + + if s1 then + hname = string.format('x%s', s1:sub(1, 3):lower()) + else + hname = string.format('%s', v:sub(1, 3):lower()) + end end if classify_headers_parsed[hname] then @@ -706,6 +713,17 @@ local function get_headers_stat_tokens(task, cf, res, i) return i end +local function get_meta_stat_tokens(task, res, i) + local day_and_hour = os.date('%u:%H', + task:get_date{format = 'message', gmt = true}) + rawset(res, i, string.format("#dt:%s", day_and_hour)) + lua_util.debugm("bayes", task, "added day_of_week name token: %s", + res[i]) + i = i + 1 + + return i +end + local function get_stat_tokens(task, cf) local res = {} local E = {} @@ -745,7 +763,7 @@ local function get_stat_tokens(task, cf) end if cf.classify_urls then - local urls = lua_util.extract_specific_urls{task = task, limit = 5} + local urls = lua_util.extract_specific_urls{task = task, limit = 5, esld_limit = 1} if urls then for _,u in ipairs(urls) do @@ -757,6 +775,10 @@ local function get_stat_tokens(task, cf) end end + if cf.classify_meta then + i = get_meta_stat_tokens(task, res, i) + end + return res end |