Browse Source

[Rework] Start major stat_convert rework

tags/1.7.0
Vsevolod Stakhov 6 years ago
parent
commit
440b588a74
3 changed files with 153 additions and 184 deletions
  1. 0
    121
      lualib/rspamadm/stat_convert.lua
  2. 7
    9
      src/rspamadm/rspamadm.c
  3. 146
    54
      src/rspamadm/stat_convert.c

+ 0
- 121
lualib/rspamadm/stat_convert.lua View File

@@ -100,126 +100,5 @@ local function convert_learned(cache, server, password, redis_db)
end

return function (_, res)
local db = sqlite3.open(res['source_db'])
local tokens = {}
local num = 0
local total = 0
local nusers = 0
local lim = 1000 -- Update each 1000 tokens
local users_map = {}
local learns = {}
local redis_password = res['redis_password']
local redis_db = nil
local cmd = 'HINCRBY'
local ret, err_str

if res['redis_db'] then
redis_db = tostring(res['redis_db'])
end
if res['reset_previous'] then
cmd = 'HSET'
end

if res['cache_db'] then
if not convert_learned(res['cache_db'], res['redis_host'],
redis_password, redis_db) then
print('Cannot convert learned cache to redis')
return
end
end

if not db then
print('Cannot open source db: ' .. res['source_db'])
return
end

db:sql('BEGIN;')
-- Fill users mapping
for row in db:rows('SELECT * FROM users;') do
if row.id == '0' then
users_map[row.id] = ''
else
users_map[row.id] = row.name
end
learns[row.id] = row.learns
nusers = nusers + 1
end

-- Workaround for old databases
for row in db:rows('SELECT * FROM languages') do
if learns['0'] then
learns['0'] = learns['0'] + row.learns
else
learns['0'] = row.learns
end
end

-- Fill tokens, sending data to redis each `lim` records
for row in db:rows('SELECT token,value,user FROM tokens;') do
local user = ''
if row.user ~= 0 and users_map[row.user] then
user = users_map[row.user]
end

table.insert(tokens, {row.token, row.value, user})

num = num + 1
total = total + 1
if num > lim then
ret,err_str = send_redis(res['redis_host'], res['symbol'],
tokens, redis_password, redis_db, cmd)
if not ret then
print('Cannot send tokens to the redis server: ' .. err_str)
return
end

num = 0
tokens = {}
end
end
if #tokens > 0 then
ret, err_str = send_redis(res['redis_host'], res['symbol'], tokens,
redis_password, redis_db, cmd)

if not ret then
print('Cannot send tokens to the redis server: ' .. err_str)
return
end
end
-- Now update all users
local conn,err = redis.connect_sync({
host = res['redis_host'],
})

if not conn then
print('Cannot connect to ' .. res['redis_host'] .. ' error: ' .. err)
return false
end

if redis_password then
conn:add_cmd('AUTH', {redis_password})
end
if redis_db then
conn:add_cmd('SELECT', {redis_db})
end

for id,learned in pairs(learns) do
local user = users_map[id]
if not conn:add_cmd(cmd, {res['symbol'] .. user, 'learns', learned}) then
print('Cannot update learns for user: ' .. user)
end
if not conn:add_cmd('SADD', {res['symbol'] .. '_keys', res['symbol'] .. user}) then
print('Cannot update learns for user: ' .. user)
end
end
db:sql('COMMIT;')

ret = conn:exec()

if ret then
print(string.format('Migrated %d tokens for %d users for symbol %s',
total, nusers, res['symbol']))
else
print('Error occurred during sending data to redis')
end
end

+ 7
- 9
src/rspamadm/rspamadm.c View File

@@ -292,7 +292,13 @@ main (gint argc, gchar **argv, gchar **env)
rspamd_main->server_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
"rspamadm");

cfg->log_level = G_LOG_LEVEL_WARNING;
/* Setup logger */
if (verbose) {
cfg->log_level = G_LOG_LEVEL_DEBUG;
}
else {
cfg->log_level = G_LOG_LEVEL_INFO;
}

cfg->log_type = RSPAMD_LOG_CONSOLE;
/* Avoid timestamps printing */
@@ -305,14 +311,6 @@ main (gint argc, gchar **argv, gchar **env)
rspamd_config_post_load (cfg,
RSPAMD_CONFIG_INIT_LIBS|RSPAMD_CONFIG_INIT_URL|RSPAMD_CONFIG_INIT_NO_TLD);

/* Setup logger */
if (verbose) {
cfg->log_level = G_LOG_LEVEL_DEBUG;
}
else {
cfg->log_level = G_LOG_LEVEL_INFO;
}

gperf_profiler_init (cfg, "rspamadm");
setproctitle ("rspamdadm");


+ 146
- 54
src/rspamadm/stat_convert.c View File

@@ -17,10 +17,20 @@
#include "rspamadm.h"
#include "lua/lua_common.h"

static gchar *source_db = NULL;
static gchar *redis_host = NULL;
static gchar *symbol = NULL;
#include "contrib/uthash/utlist.h"

/* Common */
static gchar *config_file = NULL;
static gchar *symbol_ham = NULL;
static gchar *symbol_spam = NULL;

/* Inputs */
static gchar *spam_db = NULL;
static gchar *ham_db = NULL;
static gchar *cache_db = NULL;

/* Outputs */
static gchar *redis_host = NULL;
static gchar *redis_db = NULL;
static gchar *redis_password = NULL;
static gboolean reset_previous = FALSE;
@@ -37,20 +47,27 @@ struct rspamadm_command statconvert_command = {
};

static GOptionEntry entries[] = {
{"database", 'd', 0, G_OPTION_ARG_FILENAME, &source_db,
"Input sqlite", NULL},
{"cache", 'c', 0, G_OPTION_ARG_FILENAME, &cache_db,
{"config", 'c', 0, G_OPTION_ARG_FILENAME, &config_file,
"Config file to read data from", NULL},
{"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous,
"Reset previous data instead of appending values", NULL},

{"symbol-spam", 0, 0, G_OPTION_ARG_STRING, &symbol_spam,
"Symbol for spam (e.g. BAYES_SPAM)", NULL},
{"symbol-ham", 0, 0, G_OPTION_ARG_STRING, &symbol_ham,
"Symbol for ham (e.g. BAYES_HAM)", NULL},
{"spam-db", 0, 0, G_OPTION_ARG_STRING, &spam_db,
"Input spam file (sqlite3)", NULL},
{"ham-db", 0, 0, G_OPTION_ARG_STRING, &ham_db,
"Input ham file (sqlite3)", NULL},
{"cache", 0, 0, G_OPTION_ARG_FILENAME, &cache_db,
"Input learn cache", NULL},
{"host", 'h', 0, G_OPTION_ARG_STRING, &redis_host,
{"redis-host", 'h', 0, G_OPTION_ARG_STRING, &redis_host,
"Output redis ip (in format ip:port)", NULL},
{"symbol", 's', 0, G_OPTION_ARG_STRING, &symbol,
"Symbol in redis (e.g. BAYES_SPAM)", NULL},
{"dbname", 'D', 0, G_OPTION_ARG_STRING, &redis_db,
"Database in redis (should be numeric)", NULL},
{"password", 'p', 0, G_OPTION_ARG_STRING, &redis_password,
{"redis-password", 'p', 0, G_OPTION_ARG_STRING, &redis_password,
"Password to connect to redis", NULL},
{"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous,
"Reset previous data instead of appending values", NULL},
{"redis-db", 'd', 0, G_OPTION_ARG_STRING, &redis_db,
"Redis database (should be numeric)", NULL},
{NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
};

@@ -62,15 +79,20 @@ rspamadm_statconvert_help (gboolean full_help)

if (full_help) {
help_str = "Convert statistics from sqlite3 to redis\n\n"
"Usage: rspamadm statconvert -d <sqlite_db> -h <redis_ip> -s <symbol>\n"
"Usage: rspamadm statconvert -c /etc/rspamd.conf [-r]\n"
"Where options are:\n\n"
"-d: input sqlite\n"
"-h: output redis ip (in format ip:port)\n"
"-s: symbol in redis (e.g. BAYES_SPAM)\n"
"-c: also convert data from the learn cache\n"
"-D: output redis database\n"
"-p: redis password\n"
"-r: reset previous data instead of increasing values\n";
"-c: config file to read data from\n"
"-r: reset previous data instead of increasing values\n"
"** Or specify options directly **\n"
"--redis-host: output redis ip (in format ip:port)\n"
"--redis-db: output redis database\n"
"--redis-password: redis password\n"
"--cache: sqlite3 file for learn cache\n"
"--spam-db: sqlite3 input file for spam data\n"
"--ham-db: sqlite3 input file for ham data\n"
"--symbol-spam: symbol in redis for spam (e.g. BAYES_SPAM)\n"
"--symbol-ham: symbol in redis for ham (e.g. BAYES_HAM)\n"
;
}
else {
help_str = "Convert statistics from sqlite3 to redis";
@@ -103,46 +125,116 @@ rspamadm_statconvert (gint argc, gchar **argv)
exit (1);
}

if (!source_db) {
rspamd_fprintf (stderr, "source db is missing\n");
exit (1);
}
if (!redis_host) {
rspamd_fprintf (stderr, "redis host is missing\n");
exit (1);
if (config_file) {
/* Load config file, assuming that it has all information required */
struct ucl_parser *parser;

parser = ucl_parser_new (0);
rspamd_ucl_add_conf_variables (parser, ucl_vars);

if (!ucl_parser_add_file (parser, config_file)) {
msg_err ("ucl parser error: %s", ucl_parser_get_error (parser));
ucl_parser_free (parser);

exit (EXIT_FAILURE);
}

obj = ucl_parser_get_object (parser);
ucl_parser_free (parser);
}
if (!symbol) {
rspamd_fprintf (stderr, "symbol is missing\n");
exit (1);
else {
/* We need to get all information from the command line */
ucl_object_t *classifier, *statfile_ham, *statfile_spam, *tmp, *redis;

/* Check arguments sanity */
if (spam_db == NULL) {
msg_err ("No spam-db specified");
exit (EXIT_FAILURE);
}
if (ham_db == NULL) {
msg_err ("No ham-db specified");
exit (EXIT_FAILURE);
}
if (redis_host == NULL) {
msg_err ("No redis-host specified");
exit (EXIT_FAILURE);
}
if (symbol_ham == NULL) {
msg_err ("No symbol-ham specified");
exit (EXIT_FAILURE);
}
if (symbol_spam == NULL) {
msg_err ("No symbol-spam specified");
exit (EXIT_FAILURE);
}

obj = ucl_object_typed_new (UCL_OBJECT);

classifier = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (obj, classifier, "classifier", 0, false);
/* Now we need to create "bayes" key in it */
tmp = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (classifier, tmp, "bayes", 0, false);
classifier = tmp;
ucl_object_insert_key (classifier, ucl_object_fromstring ("sqlite3"),
"backend", 0, false);

if (cache_db != NULL) {
ucl_object_t *cache;

cache = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (cache, ucl_object_fromstring ("sqlite3"),
"type", 0, false);
ucl_object_insert_key (cache, ucl_object_fromstring (cache_db),
"file", 0, false);

ucl_object_insert_key (classifier, cache, "cache", 0, false);
}

statfile_ham = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (statfile_ham, ucl_object_fromstring (symbol_ham),
"symbol", 0, false);
ucl_object_insert_key (statfile_ham, ucl_object_frombool (false),
"spam", 0, false);
ucl_object_insert_key (statfile_ham, ucl_object_fromstring (ham_db),
"db", 0, false);

statfile_spam = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (statfile_spam, ucl_object_fromstring (symbol_spam),
"symbol", 0, false);
ucl_object_insert_key (statfile_spam, ucl_object_frombool (true),
"spam", 0, false);
ucl_object_insert_key (statfile_spam, ucl_object_fromstring (spam_db),
"db", 0, false);

DL_APPEND (statfile_ham, statfile_spam);
ucl_object_insert_key (classifier, statfile_ham,
"statfile", 0, false);

/* Deal with redis */

redis = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (obj, redis, "redis", 0, false);

ucl_object_insert_key (redis, ucl_object_fromstring (redis_host),
"host", 0, false);

if (redis_db) {
ucl_object_insert_key (redis, ucl_object_fromstring (redis_db),
"db", 0, false);
}

if (redis_password) {
ucl_object_insert_key (redis, ucl_object_fromstring (redis_password),
"password", 0, false);
}
}

L = rspamd_lua_init ();
rspamd_lua_set_path (L, NULL, NULL);

obj = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (obj, ucl_object_fromstring (source_db),
"source_db", 0, false);
ucl_object_insert_key (obj, ucl_object_fromstring (redis_host),
"redis_host", 0, false);
ucl_object_insert_key (obj, ucl_object_fromstring (symbol),
"symbol", 0, false);
ucl_object_insert_key (obj, ucl_object_frombool (reset_previous),
"reset_previous", 0, false);

if (cache_db != NULL) {
ucl_object_insert_key (obj, ucl_object_fromstring (cache_db),
"cache_db", 0, false);
}

if (redis_password) {
ucl_object_insert_key (obj, ucl_object_fromstring (redis_password),
"redis_password", 0, false);
}

if (redis_db) {
ucl_object_insert_key (obj, ucl_object_fromstring (redis_db),
"redis_db", 0, false);
}

rspamadm_execute_lua_ucl_subr (L,
argc,

Loading…
Cancel
Save