]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Add classify redis script
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 2 Dec 2023 12:36:17 +0000 (12:36 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 2 Dec 2023 12:36:17 +0000 (12:36 +0000)
.luacheckrc
lualib/redis_scripts/bayes_classify.lua [new file with mode: 0644]

index 353bee41da244bdbb064c186497dec9498301a82..7e48b8ee0526ac5eda75b16ff36c19a5fff1faf7 100644 (file)
@@ -64,6 +64,7 @@ files['/**/lualib/redis_scripts/**'].globals = {
   'redis',
   'KEYS',
   'cjson',
+  'cmsgpack',
 }
 
 files['/**/src/rspamadm/*'].globals = {
diff --git a/lualib/redis_scripts/bayes_classify.lua b/lualib/redis_scripts/bayes_classify.lua
new file mode 100644 (file)
index 0000000..c2654e4
--- /dev/null
@@ -0,0 +1,26 @@
+-- Lua script to perform bayes classification
+-- This script accepts the following parameters:
+-- key1 - prefix for bayes tokens (e.g. for per-user classification)
+-- key2 - set of tokens encoded in messagepack array of int64_t
+
+local prefix = KEYS[1]
+local input_tokens = cmsgpack.unpack(KEYS[2])
+local output_spam = {}
+local output_ham = {}
+
+for i, token in ipairs(input_tokens) do
+  local token_data = redis.call('HMGET', prefix .. tostring(token), 'H', 'S')
+
+  if token_data then
+    local ham_count = tonumber(token_data[1]) or 0
+    local spam_count = tonumber(token_data[2]) or 0
+
+    output_ham[i] = ham_count
+    output_spam[i] = spam_count
+  else
+    output_ham[i] = 0
+    output_spam[i] = 0
+  end
+end
+
+return cmsgpack.pack({ output_ham, output_spam })
\ No newline at end of file