From 33cf745fb1c772c57f45e14de15dc706ed5284d5 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 8 Dec 2023 09:33:57 +0000 Subject: [Rework] Use strings for int64_t It seems there is no easy way to use int64 in Redis Lua, hence, we have to use strings. It's much more expensive but still some advantage over the previous schema. --- lualib/redis_scripts/bayes_classify.lua | 6 +++--- lualib/redis_scripts/bayes_learn.lua | 4 ++-- lualib/redis_scripts/bayes_stat.lua | 0 3 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 lualib/redis_scripts/bayes_stat.lua (limited to 'lualib/redis_scripts') diff --git a/lualib/redis_scripts/bayes_classify.lua b/lualib/redis_scripts/bayes_classify.lua index c999609e5..9bef96f14 100644 --- a/lualib/redis_scripts/bayes_classify.lua +++ b/lualib/redis_scripts/bayes_classify.lua @@ -1,10 +1,9 @@ -- Lua script to perform bayes classification -- This script accepts the following parameters: -- key1 - prefix for bayes tokens (e.g. for per-user classification) --- key2 - set of tokens encoded in messagepack array of int64_t +-- key2 - set of tokens encoded in messagepack array of strings local prefix = KEYS[1] -local input_tokens = cmsgpack.unpack(KEYS[2]) local output_spam = {} local output_ham = {} @@ -17,8 +16,9 @@ local prefix_underscore = prefix .. '_' -- This optimisation will save a lot of space for sparse tokens, and in Bayes that assumption is normally held if learned_ham > 0 and learned_spam > 0 then + local input_tokens = cmsgpack.unpack(KEYS[2]) for i, token in ipairs(input_tokens) do - local token_data = redis.call('HMGET', prefix_underscore .. tostring(token), 'H', 'S') + local token_data = redis.call('HMGET', prefix_underscore .. token, 'H', 'S') if token_data then local ham_count = token_data[1] diff --git a/lualib/redis_scripts/bayes_learn.lua b/lualib/redis_scripts/bayes_learn.lua index 638254706..7536f6808 100644 --- a/lualib/redis_scripts/bayes_learn.lua +++ b/lualib/redis_scripts/bayes_learn.lua @@ -4,7 +4,7 @@ -- key2 - boolean is_spam -- key3 - string symbol -- key4 - boolean is_unlearn --- key5 - set of tokens encoded in messagepack array of int64_t +-- key5 - set of tokens encoded in messagepack array of strings local prefix = KEYS[1] local is_spam = KEYS[2] == 'true' and true or false @@ -21,5 +21,5 @@ redis.call('HSET', prefix, 'version', '2') -- new schema redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count for _, token in ipairs(input_tokens) do - redis.call('HINCRBY', prefix_underscore .. tostring(token), hash_key, 1) + redis.call('HINCRBY', prefix_underscore .. token, hash_key, 1) end \ No newline at end of file diff --git a/lualib/redis_scripts/bayes_stat.lua b/lualib/redis_scripts/bayes_stat.lua new file mode 100644 index 000000000..e69de29bb -- cgit v1.2.3