diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-12-07 15:01:11 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-12-07 15:01:11 +0000 |
commit | 0d993187c1b1b37cfd99d3212745927eea0bff7a (patch) | |
tree | dd11da9a27a4c4df441db53370e16850d48e02ea /lualib | |
parent | 3a7f4ef0ed9fb2583387c0fbcc7fc28ab403b3bc (diff) | |
download | rspamd-0d993187c1b1b37cfd99d3212745927eea0bff7a.tar.gz rspamd-0d993187c1b1b37cfd99d3212745927eea0bff7a.zip |
[Project] Add bayes learn script
Diffstat (limited to 'lualib')
-rw-r--r-- | lualib/lua_bayes_redis.lua | 15 | ||||
-rw-r--r-- | lualib/redis_scripts/bayes_learn.lua | 25 |
2 files changed, 38 insertions, 2 deletions
diff --git a/lualib/lua_bayes_redis.lua b/lualib/lua_bayes_redis.lua index 575beff4b..2286295d5 100644 --- a/lualib/lua_bayes_redis.lua +++ b/lualib/lua_bayes_redis.lua @@ -42,8 +42,19 @@ local function gen_classify_functor(redis_params, classify_script_id) end local function gen_learn_functor(redis_params, learn_script_id) - return function(task, expanded_key, id, is_spam, stat_tokens, callback) - -- TODO: write this function + return function(task, expanded_key, id, is_spam, symbol, is_unlearn, stat_tokens, callback) + local function learn_redis_cb(err, data) + lua_util.debugm(N, task, 'learn redis cb: %s, %s', err, data) + if err then + callback(task, false, err) + else + callback(task, true) + end + end + + lua_redis.exec_redis_script(learn_script_id, + { task = task, is_write = false, key = expanded_key }, + learn_redis_cb, { expanded_key, is_spam, symbol, is_unlearn, stat_tokens }) end end diff --git a/lualib/redis_scripts/bayes_learn.lua b/lualib/redis_scripts/bayes_learn.lua new file mode 100644 index 000000000..2b74fcca9 --- /dev/null +++ b/lualib/redis_scripts/bayes_learn.lua @@ -0,0 +1,25 @@ +-- Lua script to perform bayes learning +-- This script accepts the following parameters: +-- key1 - prefix for bayes tokens (e.g. for per-user classification) +-- key2 - boolean is_spam +-- key3 - string symbol +-- key4 - boolean is_unlearn +-- key5 - set of tokens encoded in messagepack array of int64_t + +local prefix = KEYS[1] +local is_spam = KEYS[2] +local symbol = KEYS[3] +local is_unlearn = KEYS[4] +local input_tokens = cmsgpack.unpack(KEYS[5]) + +local prefix_underscore = prefix .. '_' +local hash_key = is_spam and 'S' or 'H' +local learned_key = is_spam and 'learns_spam' or 'learns_ham' + +redis.call('SADD', symbol .. '_keys', prefix) +redis.call('HSET', prefix, 'version', '2') -- new schema +redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count + +for _, token in ipairs(input_tokens) do + redis.call('HINCRBY', prefix_underscore .. tostring(token), hash_key, 1) +end
\ No newline at end of file |