Browse Source

[Project] Add preliminary support of CDB bayes dump

tags/3.1
Vsevolod Stakhov 2 years ago
parent
commit
235488eac6
2 changed files with 61 additions and 11 deletions
  1. 60
    10
      lualib/rspamadm/statistics_dump.lua
  2. 1
    1
      src/libstat/backends/cdb_backend.cxx

+ 60
- 10
lualib/rspamadm/statistics_dump.lua View File

local argparse = require "argparse" local argparse = require "argparse"
local rspamd_zstd = require "rspamd_zstd" local rspamd_zstd = require "rspamd_zstd"
local rspamd_text = require "rspamd_text" local rspamd_text = require "rspamd_text"
local rspamd_util = require "rspamd_util"
local rspamd_cdb = require "rspamd_cdb"
local lua_util = require "lua_util"
local rspamd_i64 = require "rspamd_int64"
local ucl = require "ucl" local ucl = require "ucl"


local N = "statistics_dump" local N = "statistics_dump"
-- Extract subcommand -- Extract subcommand
local dump = parser:command "dump d" local dump = parser:command "dump d"
:description "Dump bayes statistics" :description "Dump bayes statistics"
dump:flag "-j --json"
:description "Json output"
dump:mutex(
dump:flag "-j --json"
:description "Json output",
dump:flag "-C --cdb"
:description "CDB output"
)
dump:flag "-c --compress" dump:flag "-c --compress"
:description "Compress output" :description "Compress output"
dump:option "-b --batch-size" dump:option "-b --batch-size"
:convert(tonumber) :convert(tonumber)
:default(1000) :default(1000)



-- Restore -- Restore
local restore = parser:command "restore r" local restore = parser:command "restore r"
:description "Restore bayes statistics" :description "Restore bayes statistics"
return data return data
end end


-- Used to clear tables
local clear_fcn = table.clear or function(tbl) local clear_fcn = table.clear or function(tbl)
local l = #tbl
for i=1,l do tbl[i] = nil end
local keys = lua_util.keys(tbl)
for _,k in ipairs(keys) do
tbl[k] = nil
end
end end


local compress_ctx local compress_ctx
end end
end end


local function dump_pattern(conn, pattern, opts, out)
local function dump_cdb(out, opts, last, pattern)
local results = out[pattern]

if not out.cdb_builder then
-- First invocation
out.cdb_builder = rspamd_cdb.build(string.format('%s.cdb', pattern))
out.cdb_builder:add('_lrnspam', rspamd_i64.fromstring(results.learns_spam or '0'))
out.cdb_builder:add('_lrnham_', rspamd_i64.fromstring(results.learns_ham or '0'))
end

for _,o in ipairs(results.elts) do
out.cdb_builder:add(o.key, o.value)
end

if last then
out.cdb_builder:finalize()
out.cdb_builder = nil
end
end

local function dump_pattern(conn, pattern, opts, out, key)
local cursor = 0 local cursor = 0


repeat repeat
-- Output keeping track of the commas -- Output keeping track of the commas
for i,d in ipairs(tokens) do for i,d in ipairs(tokens) do
if cursor == 0 and i == #tokens or not opts.json then if cursor == 0 and i == #tokens or not opts.json then
out[#out + 1] = rspamd_logger.slog('"%s": %s\n', d.key,
ucl.to_format(d.data, "json-compact"))
if opts.cdb then
table.insert(out[key].elts, {
key = rspamd_i64.fromstring(string.match(d.key, '%d+')),
value = rspamd_util.pack('<n<n', tonumber(d.data["S"] or '0') or 0,
tonumber(d.data["H"] or '0'))
})
else
out[#out + 1] = rspamd_logger.slog('"%s": %s\n', d.key,
ucl.to_format(d.data, "json-compact"))
end
else else
out[#out + 1] = rspamd_logger.slog('"%s": %s,\n', d.key, out[#out + 1] = rspamd_logger.slog('"%s": %s,\n', d.key,
ucl.to_format(d.data, "json-compact")) ucl.to_format(d.data, "json-compact"))


-- Do not write the last chunk of out as it will be processed afterwards -- Do not write the last chunk of out as it will be processed afterwards
if not cursor == 0 then if not cursor == 0 then
dump_out(out, opts, false)
clear_fcn(out)
if opts.cdb then
dump_out(out, opts, false)
clear_fcn(out)
else
dump_cdb(out, opts, false, key)
out[key].elts = {}
end
elseif opts.cdb then
dump_cdb(out, opts, true, key)
end end


until cursor == 0 until cursor == 0
if opts.json then if opts.json then
out[#out + 1] = string.format('{"pattern": "%s", "meta": %s, "elts": {\n', out[#out + 1] = string.format('{"pattern": "%s", "meta": %s, "elts": {\n',
k, ucl.to_format(redis_map_zip(additional_keys), 'json-compact')) k, ucl.to_format(redis_map_zip(additional_keys), 'json-compact'))
elseif opts.cdb then
out[k] = redis_map_zip(additional_keys)
out[k].elts = {}
else else
out[#out + 1] = string.format('"%s": %s\n', k, out[#out + 1] = string.format('"%s": %s\n', k,
ucl.to_format(redis_map_zip(additional_keys), 'json-compact')) ucl.to_format(redis_map_zip(additional_keys), 'json-compact'))
end end
dump_pattern(conn, pat, opts, out)
dump_pattern(conn, pat, opts, out, k)
patterns_seen[pat] = true patterns_seen[pat] = true
end end
end end

+ 1
- 1
src/libstat/backends/cdb_backend.cxx View File



/* Now get number of learns */ /* Now get number of learns */
std::int64_t cdb_key; std::int64_t cdb_key;
static const char learn_spam_key[8] = "lrnspam", learn_ham_key[8] = "lrnham";
static const char learn_spam_key[9] = "_lrnspam", learn_ham_key[9] = "_lrnham_";


auto check_key = [&](const char *key, std::uint64_t &target) -> tl::expected<bool, std::string> { auto check_key = [&](const char *key, std::uint64_t &target) -> tl::expected<bool, std::string> {
memcpy((void *)&cdb_key, key, sizeof(cdb_key)); memcpy((void *)&cdb_key, key, sizeof(cdb_key));

Loading…
Cancel
Save