From ebf85df69089ca407d280df4cfaeb865e1e455f1 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 24 Jul 2019 13:23:23 +0100 Subject: [PATCH] [Rework] Use a dedicated library for autolearn --- conf/statistic.conf | 29 +--------------------- lualib/lua_bayes_learn.lua | 49 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 28 deletions(-) create mode 100644 lualib/lua_bayes_learn.lua diff --git a/conf/statistic.conf b/conf/statistic.conf index 8d45e7608..bb76853ca 100644 --- a/conf/statistic.conf +++ b/conf/statistic.conf @@ -41,34 +41,7 @@ classifier "bayes" { symbol = "BAYES_SPAM"; spam = true; } - learn_condition =<= 0.95 - else - cl = 'ham' - in_class = prob <= 0.05 - end - - if in_class then - return false,string.format('already in class %s; probability %.2f%%', - cl, math.abs((prob - 0.5) * 200.0)) - end - end - end - - return true -end -EOD + learn_condition = "return require("lua_bayes_learn").autolearn" .include(try=true; priority=1) "$LOCAL_CONFDIR/local.d/classifier-bayes.conf" .include(try=true; priority=10) "$LOCAL_CONFDIR/override.d/classifier-bayes.conf" diff --git a/lualib/lua_bayes_learn.lua b/lualib/lua_bayes_learn.lua new file mode 100644 index 000000000..70cbb96c0 --- /dev/null +++ b/lualib/lua_bayes_learn.lua @@ -0,0 +1,49 @@ +--[[ +Copyright (c) 2019, Vsevolod Stakhov + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- This file contains functions to simplify bayes classifier auto-learning + +local exports = {} + +exports.autolearn = function(task, is_spam, is_unlearn) + local learn_type = task:get_request_header('Learn-Type') + + if not (learn_type and tostring(learn_type) == 'bulk') then + local prob = task:get_mempool():get_variable('bayes_prob', 'double') + + if prob then + local in_class = false + local cl + if is_spam then + cl = 'spam' + in_class = prob >= 0.95 + else + cl = 'ham' + in_class = prob <= 0.05 + end + + if in_class then + return false,string.format( + 'already in class %s; probability %.2f%%', + cl, math.abs((prob - 0.5) * 200.0)) + end + end + end + + return true +end + +return exports \ No newline at end of file -- 2.39.5