From 235488eac67f055eee9246191ab70eb80fd81a1e Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 20 Oct 2021 11:31:16 +0100 Subject: [PATCH] [Project] Add preliminary support of CDB bayes dump --- lualib/rspamadm/statistics_dump.lua | 71 +++++++++++++++++++++++----- src/libstat/backends/cdb_backend.cxx | 2 +- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/lualib/rspamadm/statistics_dump.lua b/lualib/rspamadm/statistics_dump.lua index 7b1dc581e..33b2a3cc5 100644 --- a/lualib/rspamadm/statistics_dump.lua +++ b/lualib/rspamadm/statistics_dump.lua @@ -19,6 +19,10 @@ local rspamd_logger = require "rspamd_logger" local argparse = require "argparse" local rspamd_zstd = require "rspamd_zstd" local rspamd_text = require "rspamd_text" +local rspamd_util = require "rspamd_util" +local rspamd_cdb = require "rspamd_cdb" +local lua_util = require "lua_util" +local rspamd_i64 = require "rspamd_int64" local ucl = require "ucl" local N = "statistics_dump" @@ -41,8 +45,12 @@ parser:option "-c --config" -- Extract subcommand local dump = parser:command "dump d" :description "Dump bayes statistics" -dump:flag "-j --json" - :description "Json output" +dump:mutex( + dump:flag "-j --json" + :description "Json output", + dump:flag "-C --cdb" + :description "CDB output" +) dump:flag "-c --compress" :description "Compress output" dump:option "-b --batch-size" @@ -51,6 +59,7 @@ dump:option "-b --batch-size" :convert(tonumber) :default(1000) + -- Restore local restore = parser:command "restore r" :description "Restore bayes statistics" @@ -166,10 +175,12 @@ local function redis_map_zip(ar) return data end --- Used to clear plain numeric tables +-- Used to clear tables local clear_fcn = table.clear or function(tbl) - local l = #tbl - for i=1,l do tbl[i] = nil end + local keys = lua_util.keys(tbl) + for _,k in ipairs(keys) do + tbl[k] = nil + end end local compress_ctx @@ -192,7 +203,27 @@ local function dump_out(out, opts, last) end end -local function dump_pattern(conn, pattern, opts, out) +local function dump_cdb(out, opts, last, pattern) + local results = out[pattern] + + if not out.cdb_builder then + -- First invocation + out.cdb_builder = rspamd_cdb.build(string.format('%s.cdb', pattern)) + out.cdb_builder:add('_lrnspam', rspamd_i64.fromstring(results.learns_spam or '0')) + out.cdb_builder:add('_lrnham_', rspamd_i64.fromstring(results.learns_ham or '0')) + end + + for _,o in ipairs(results.elts) do + out.cdb_builder:add(o.key, o.value) + end + + if last then + out.cdb_builder:finalize() + out.cdb_builder = nil + end +end + +local function dump_pattern(conn, pattern, opts, out, key) local cursor = 0 repeat @@ -232,8 +263,16 @@ local function dump_pattern(conn, pattern, opts, out) -- Output keeping track of the commas for i,d in ipairs(tokens) do if cursor == 0 and i == #tokens or not opts.json then - out[#out + 1] = rspamd_logger.slog('"%s": %s\n', d.key, - ucl.to_format(d.data, "json-compact")) + if opts.cdb then + table.insert(out[key].elts, { + key = rspamd_i64.fromstring(string.match(d.key, '%d+')), + value = rspamd_util.pack(' tl::expected /* Now get number of learns */ std::int64_t cdb_key; - static const char learn_spam_key[8] = "lrnspam", learn_ham_key[8] = "lrnham"; + static const char learn_spam_key[9] = "_lrnspam", learn_ham_key[9] = "_lrnham_"; auto check_key = [&](const char *key, std::uint64_t &target) -> tl::expected { memcpy((void *)&cdb_key, key, sizeof(cdb_key)); -- 2.39.5