summaryrefslogtreecommitdiffstats
path: root/lualib/lua_bayes_learn.lua
diff options
context:
space:
mode:
Diffstat (limited to 'lualib/lua_bayes_learn.lua')
-rw-r--r--lualib/lua_bayes_learn.lua49
1 files changed, 49 insertions, 0 deletions
diff --git a/lualib/lua_bayes_learn.lua b/lualib/lua_bayes_learn.lua
new file mode 100644
index 000000000..70cbb96c0
--- /dev/null
+++ b/lualib/lua_bayes_learn.lua
@@ -0,0 +1,49 @@
+--[[
+Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- This file contains functions to simplify bayes classifier auto-learning
+
+local exports = {}
+
+exports.autolearn = function(task, is_spam, is_unlearn)
+ local learn_type = task:get_request_header('Learn-Type')
+
+ if not (learn_type and tostring(learn_type) == 'bulk') then
+ local prob = task:get_mempool():get_variable('bayes_prob', 'double')
+
+ if prob then
+ local in_class = false
+ local cl
+ if is_spam then
+ cl = 'spam'
+ in_class = prob >= 0.95
+ else
+ cl = 'ham'
+ in_class = prob <= 0.05
+ end
+
+ if in_class then
+ return false,string.format(
+ 'already in class %s; probability %.2f%%',
+ cl, math.abs((prob - 0.5) * 200.0))
+ end
+ end
+ end
+
+ return true
+end
+
+return exports \ No newline at end of file