From ebf85df69089ca407d280df4cfaeb865e1e455f1 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 24 Jul 2019 13:23:23 +0100 Subject: [Rework] Use a dedicated library for autolearn --- lualib/lua_bayes_learn.lua | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 lualib/lua_bayes_learn.lua (limited to 'lualib') diff --git a/lualib/lua_bayes_learn.lua b/lualib/lua_bayes_learn.lua new file mode 100644 index 000000000..70cbb96c0 --- /dev/null +++ b/lualib/lua_bayes_learn.lua @@ -0,0 +1,49 @@ +--[[ +Copyright (c) 2019, Vsevolod Stakhov + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- This file contains functions to simplify bayes classifier auto-learning + +local exports = {} + +exports.autolearn = function(task, is_spam, is_unlearn) + local learn_type = task:get_request_header('Learn-Type') + + if not (learn_type and tostring(learn_type) == 'bulk') then + local prob = task:get_mempool():get_variable('bayes_prob', 'double') + + if prob then + local in_class = false + local cl + if is_spam then + cl = 'spam' + in_class = prob >= 0.95 + else + cl = 'ham' + in_class = prob <= 0.05 + end + + if in_class then + return false,string.format( + 'already in class %s; probability %.2f%%', + cl, math.abs((prob - 0.5) * 200.0)) + end + end + end + + return true +end + +return exports \ No newline at end of file -- cgit v1.2.3