aboutsummaryrefslogtreecommitdiffstats
path: root/lualib/lua_bayes_learn.lua
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-07-24 15:03:29 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-07-24 15:03:29 +0100
commite1fadcc80b5f6a3d566224b0ed1a74d7a9dbc9ed (patch)
tree718bee26b22d6582e63a7bffc8d50104b866c131 /lualib/lua_bayes_learn.lua
parent701a711049ee01373bc3862cc441fc3065c8dbc2 (diff)
downloadrspamd-e1fadcc80b5f6a3d566224b0ed1a74d7a9dbc9ed.tar.gz
rspamd-e1fadcc80b5f6a3d566224b0ed1a74d7a9dbc9ed.zip
[Feature] Improve autolearning
Diffstat (limited to 'lualib/lua_bayes_learn.lua')
-rw-r--r--lualib/lua_bayes_learn.lua67
1 files changed, 67 insertions, 0 deletions
diff --git a/lualib/lua_bayes_learn.lua b/lualib/lua_bayes_learn.lua
index 7df52a2ef..5a46265e7 100644
--- a/lualib/lua_bayes_learn.lua
+++ b/lualib/lua_bayes_learn.lua
@@ -16,6 +16,10 @@ limitations under the License.
-- This file contains functions to simplify bayes classifier auto-learning
+local lua_util = require "lua_util"
+
+local N = "lua_bayes"
+
local exports = {}
exports.can_learn = function(task, is_spam, is_unlearn)
@@ -46,4 +50,67 @@ exports.can_learn = function(task, is_spam, is_unlearn)
return true
end
+exports.autolearn = function(task, conf)
+ -- We have autolearn config so let's figure out what is requested
+ local verdict,score = lua_util.get_task_verdict(task)
+ local learn_spam,learn_ham = false, false
+
+ if verdict == 'passthrough' then
+ -- No need to autolearn
+ lua_util.debugm(N, task, 'no need to autolearn - verdict: %s',
+ verdict)
+ return
+ end
+
+ if conf.spam_threshold and conf.ham_threshold then
+ if verdict == 'spam' then
+ if conf.spam_threshold and score >= conf.spam_threshold then
+ lua_util.debugm(N, task, 'can autolearn spam: score %s >= %s',
+ score, conf.spam_threshold)
+ learn_spam = true
+ end
+ elseif verdict == 'ham' then
+ if conf.ham_threshold and score <= conf.ham_threshold then
+ lua_util.debugm(N, task, 'can autolearn ham: score %s <= %s',
+ score, conf.ham_threshold)
+ learn_ham = true
+ end
+ end
+ end
+
+ if conf.check_balance then
+ -- Check balance of learns
+ local spam_learns = task:get_mempool():get_variable('spam_learns', 'int64') or 0
+ local ham_learns = task:get_mempool():get_variable('ham_learns', 'int64') or 0
+
+ local min_balance = 0.9
+ if conf.min_balance then min_balance = conf.min_balance end
+
+ if spam_learns > 0 or ham_learns > 0 then
+ local max_ratio = 1.0 / min_balance
+ local spam_learns_ratio = spam_learns / (ham_learns + 1)
+ if spam_learns_ratio > max_ratio and learn_spam then
+ lua_util.debugm(N, task,
+ 'skip learning spam, balance is not satisfied: %s < %s; %s spam learns; %s ham learns',
+ spam_learns_ratio, min_balance, spam_learns, ham_learns)
+ learn_spam = false
+ end
+
+ local ham_learns_ratio = ham_learns / (spam_learns + 1)
+ if ham_learns_ratio > max_ratio and learn_ham then
+ lua_util.debugm(N, task,
+ 'skip learning ham, balance is not satisfied: %s < %s; %s spam learns; %s ham learns',
+ ham_learns_ratio, min_balance, spam_learns, ham_learns)
+ learn_ham = false
+ end
+ end
+ end
+
+ if learn_spam then
+ return 'spam'
+ elseif learn_ham then
+ return 'ham'
+ end
+end
+
return exports \ No newline at end of file