diff options
-rw-r--r-- | lualib/rspamadm/stat_convert.lua | 121 | ||||
-rw-r--r-- | src/rspamadm/rspamadm.c | 16 | ||||
-rw-r--r-- | src/rspamadm/stat_convert.c | 200 |
3 files changed, 153 insertions, 184 deletions
diff --git a/lualib/rspamadm/stat_convert.lua b/lualib/rspamadm/stat_convert.lua index 7b6de9836..d497333a1 100644 --- a/lualib/rspamadm/stat_convert.lua +++ b/lualib/rspamadm/stat_convert.lua @@ -100,126 +100,5 @@ local function convert_learned(cache, server, password, redis_db) end return function (_, res) - local db = sqlite3.open(res['source_db']) - local tokens = {} - local num = 0 - local total = 0 - local nusers = 0 - local lim = 1000 -- Update each 1000 tokens - local users_map = {} - local learns = {} - local redis_password = res['redis_password'] - local redis_db = nil - local cmd = 'HINCRBY' - local ret, err_str - - if res['redis_db'] then - redis_db = tostring(res['redis_db']) - end - if res['reset_previous'] then - cmd = 'HSET' - end - - if res['cache_db'] then - if not convert_learned(res['cache_db'], res['redis_host'], - redis_password, redis_db) then - print('Cannot convert learned cache to redis') - return - end - end - - if not db then - print('Cannot open source db: ' .. res['source_db']) - return - end - - db:sql('BEGIN;') - -- Fill users mapping - for row in db:rows('SELECT * FROM users;') do - if row.id == '0' then - users_map[row.id] = '' - else - users_map[row.id] = row.name - end - learns[row.id] = row.learns - nusers = nusers + 1 - end - - -- Workaround for old databases - for row in db:rows('SELECT * FROM languages') do - if learns['0'] then - learns['0'] = learns['0'] + row.learns - else - learns['0'] = row.learns - end - end - - -- Fill tokens, sending data to redis each `lim` records - for row in db:rows('SELECT token,value,user FROM tokens;') do - local user = '' - if row.user ~= 0 and users_map[row.user] then - user = users_map[row.user] - end - - table.insert(tokens, {row.token, row.value, user}) - - num = num + 1 - total = total + 1 - if num > lim then - ret,err_str = send_redis(res['redis_host'], res['symbol'], - tokens, redis_password, redis_db, cmd) - if not ret then - print('Cannot send tokens to the redis server: ' .. err_str) - return - end - - num = 0 - tokens = {} - end - end - if #tokens > 0 then - ret, err_str = send_redis(res['redis_host'], res['symbol'], tokens, - redis_password, redis_db, cmd) - if not ret then - print('Cannot send tokens to the redis server: ' .. err_str) - return - end - end - -- Now update all users - local conn,err = redis.connect_sync({ - host = res['redis_host'], - }) - - if not conn then - print('Cannot connect to ' .. res['redis_host'] .. ' error: ' .. err) - return false - end - - if redis_password then - conn:add_cmd('AUTH', {redis_password}) - end - if redis_db then - conn:add_cmd('SELECT', {redis_db}) - end - - for id,learned in pairs(learns) do - local user = users_map[id] - if not conn:add_cmd(cmd, {res['symbol'] .. user, 'learns', learned}) then - print('Cannot update learns for user: ' .. user) - end - if not conn:add_cmd('SADD', {res['symbol'] .. '_keys', res['symbol'] .. user}) then - print('Cannot update learns for user: ' .. user) - end - end - db:sql('COMMIT;') - - ret = conn:exec() - - if ret then - print(string.format('Migrated %d tokens for %d users for symbol %s', - total, nusers, res['symbol'])) - else - print('Error occurred during sending data to redis') - end end diff --git a/src/rspamadm/rspamadm.c b/src/rspamadm/rspamadm.c index 73988f848..51c621744 100644 --- a/src/rspamadm/rspamadm.c +++ b/src/rspamadm/rspamadm.c @@ -292,7 +292,13 @@ main (gint argc, gchar **argv, gchar **env) rspamd_main->server_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "rspamadm"); - cfg->log_level = G_LOG_LEVEL_WARNING; + /* Setup logger */ + if (verbose) { + cfg->log_level = G_LOG_LEVEL_DEBUG; + } + else { + cfg->log_level = G_LOG_LEVEL_INFO; + } cfg->log_type = RSPAMD_LOG_CONSOLE; /* Avoid timestamps printing */ @@ -305,14 +311,6 @@ main (gint argc, gchar **argv, gchar **env) rspamd_config_post_load (cfg, RSPAMD_CONFIG_INIT_LIBS|RSPAMD_CONFIG_INIT_URL|RSPAMD_CONFIG_INIT_NO_TLD); - /* Setup logger */ - if (verbose) { - cfg->log_level = G_LOG_LEVEL_DEBUG; - } - else { - cfg->log_level = G_LOG_LEVEL_INFO; - } - gperf_profiler_init (cfg, "rspamadm"); setproctitle ("rspamdadm"); diff --git a/src/rspamadm/stat_convert.c b/src/rspamadm/stat_convert.c index cd15fb8a0..c88bc54bb 100644 --- a/src/rspamadm/stat_convert.c +++ b/src/rspamadm/stat_convert.c @@ -17,10 +17,20 @@ #include "rspamadm.h" #include "lua/lua_common.h" -static gchar *source_db = NULL; -static gchar *redis_host = NULL; -static gchar *symbol = NULL; +#include "contrib/uthash/utlist.h" + +/* Common */ +static gchar *config_file = NULL; +static gchar *symbol_ham = NULL; +static gchar *symbol_spam = NULL; + +/* Inputs */ +static gchar *spam_db = NULL; +static gchar *ham_db = NULL; static gchar *cache_db = NULL; + +/* Outputs */ +static gchar *redis_host = NULL; static gchar *redis_db = NULL; static gchar *redis_password = NULL; static gboolean reset_previous = FALSE; @@ -37,20 +47,27 @@ struct rspamadm_command statconvert_command = { }; static GOptionEntry entries[] = { - {"database", 'd', 0, G_OPTION_ARG_FILENAME, &source_db, - "Input sqlite", NULL}, - {"cache", 'c', 0, G_OPTION_ARG_FILENAME, &cache_db, + {"config", 'c', 0, G_OPTION_ARG_FILENAME, &config_file, + "Config file to read data from", NULL}, + {"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous, + "Reset previous data instead of appending values", NULL}, + + {"symbol-spam", 0, 0, G_OPTION_ARG_STRING, &symbol_spam, + "Symbol for spam (e.g. BAYES_SPAM)", NULL}, + {"symbol-ham", 0, 0, G_OPTION_ARG_STRING, &symbol_ham, + "Symbol for ham (e.g. BAYES_HAM)", NULL}, + {"spam-db", 0, 0, G_OPTION_ARG_STRING, &spam_db, + "Input spam file (sqlite3)", NULL}, + {"ham-db", 0, 0, G_OPTION_ARG_STRING, &ham_db, + "Input ham file (sqlite3)", NULL}, + {"cache", 0, 0, G_OPTION_ARG_FILENAME, &cache_db, "Input learn cache", NULL}, - {"host", 'h', 0, G_OPTION_ARG_STRING, &redis_host, + {"redis-host", 'h', 0, G_OPTION_ARG_STRING, &redis_host, "Output redis ip (in format ip:port)", NULL}, - {"symbol", 's', 0, G_OPTION_ARG_STRING, &symbol, - "Symbol in redis (e.g. BAYES_SPAM)", NULL}, - {"dbname", 'D', 0, G_OPTION_ARG_STRING, &redis_db, - "Database in redis (should be numeric)", NULL}, - {"password", 'p', 0, G_OPTION_ARG_STRING, &redis_password, + {"redis-password", 'p', 0, G_OPTION_ARG_STRING, &redis_password, "Password to connect to redis", NULL}, - {"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous, - "Reset previous data instead of appending values", NULL}, + {"redis-db", 'd', 0, G_OPTION_ARG_STRING, &redis_db, + "Redis database (should be numeric)", NULL}, {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} }; @@ -62,15 +79,20 @@ rspamadm_statconvert_help (gboolean full_help) if (full_help) { help_str = "Convert statistics from sqlite3 to redis\n\n" - "Usage: rspamadm statconvert -d <sqlite_db> -h <redis_ip> -s <symbol>\n" + "Usage: rspamadm statconvert -c /etc/rspamd.conf [-r]\n" "Where options are:\n\n" - "-d: input sqlite\n" - "-h: output redis ip (in format ip:port)\n" - "-s: symbol in redis (e.g. BAYES_SPAM)\n" - "-c: also convert data from the learn cache\n" - "-D: output redis database\n" - "-p: redis password\n" - "-r: reset previous data instead of increasing values\n"; + "-c: config file to read data from\n" + "-r: reset previous data instead of increasing values\n" + "** Or specify options directly **\n" + "--redis-host: output redis ip (in format ip:port)\n" + "--redis-db: output redis database\n" + "--redis-password: redis password\n" + "--cache: sqlite3 file for learn cache\n" + "--spam-db: sqlite3 input file for spam data\n" + "--ham-db: sqlite3 input file for ham data\n" + "--symbol-spam: symbol in redis for spam (e.g. BAYES_SPAM)\n" + "--symbol-ham: symbol in redis for ham (e.g. BAYES_HAM)\n" + ; } else { help_str = "Convert statistics from sqlite3 to redis"; @@ -103,46 +125,116 @@ rspamadm_statconvert (gint argc, gchar **argv) exit (1); } - if (!source_db) { - rspamd_fprintf (stderr, "source db is missing\n"); - exit (1); - } - if (!redis_host) { - rspamd_fprintf (stderr, "redis host is missing\n"); - exit (1); + if (config_file) { + /* Load config file, assuming that it has all information required */ + struct ucl_parser *parser; + + parser = ucl_parser_new (0); + rspamd_ucl_add_conf_variables (parser, ucl_vars); + + if (!ucl_parser_add_file (parser, config_file)) { + msg_err ("ucl parser error: %s", ucl_parser_get_error (parser)); + ucl_parser_free (parser); + + exit (EXIT_FAILURE); + } + + obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); } - if (!symbol) { - rspamd_fprintf (stderr, "symbol is missing\n"); - exit (1); + else { + /* We need to get all information from the command line */ + ucl_object_t *classifier, *statfile_ham, *statfile_spam, *tmp, *redis; + + /* Check arguments sanity */ + if (spam_db == NULL) { + msg_err ("No spam-db specified"); + exit (EXIT_FAILURE); + } + if (ham_db == NULL) { + msg_err ("No ham-db specified"); + exit (EXIT_FAILURE); + } + if (redis_host == NULL) { + msg_err ("No redis-host specified"); + exit (EXIT_FAILURE); + } + if (symbol_ham == NULL) { + msg_err ("No symbol-ham specified"); + exit (EXIT_FAILURE); + } + if (symbol_spam == NULL) { + msg_err ("No symbol-spam specified"); + exit (EXIT_FAILURE); + } + + obj = ucl_object_typed_new (UCL_OBJECT); + + classifier = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (obj, classifier, "classifier", 0, false); + /* Now we need to create "bayes" key in it */ + tmp = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (classifier, tmp, "bayes", 0, false); + classifier = tmp; + ucl_object_insert_key (classifier, ucl_object_fromstring ("sqlite3"), + "backend", 0, false); + + if (cache_db != NULL) { + ucl_object_t *cache; + + cache = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (cache, ucl_object_fromstring ("sqlite3"), + "type", 0, false); + ucl_object_insert_key (cache, ucl_object_fromstring (cache_db), + "file", 0, false); + + ucl_object_insert_key (classifier, cache, "cache", 0, false); + } + + statfile_ham = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (statfile_ham, ucl_object_fromstring (symbol_ham), + "symbol", 0, false); + ucl_object_insert_key (statfile_ham, ucl_object_frombool (false), + "spam", 0, false); + ucl_object_insert_key (statfile_ham, ucl_object_fromstring (ham_db), + "db", 0, false); + + statfile_spam = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (statfile_spam, ucl_object_fromstring (symbol_spam), + "symbol", 0, false); + ucl_object_insert_key (statfile_spam, ucl_object_frombool (true), + "spam", 0, false); + ucl_object_insert_key (statfile_spam, ucl_object_fromstring (spam_db), + "db", 0, false); + + DL_APPEND (statfile_ham, statfile_spam); + ucl_object_insert_key (classifier, statfile_ham, + "statfile", 0, false); + + /* Deal with redis */ + + redis = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (obj, redis, "redis", 0, false); + + ucl_object_insert_key (redis, ucl_object_fromstring (redis_host), + "host", 0, false); + + if (redis_db) { + ucl_object_insert_key (redis, ucl_object_fromstring (redis_db), + "db", 0, false); + } + + if (redis_password) { + ucl_object_insert_key (redis, ucl_object_fromstring (redis_password), + "password", 0, false); + } } L = rspamd_lua_init (); rspamd_lua_set_path (L, NULL, NULL); - - obj = ucl_object_typed_new (UCL_OBJECT); - ucl_object_insert_key (obj, ucl_object_fromstring (source_db), - "source_db", 0, false); - ucl_object_insert_key (obj, ucl_object_fromstring (redis_host), - "redis_host", 0, false); - ucl_object_insert_key (obj, ucl_object_fromstring (symbol), - "symbol", 0, false); ucl_object_insert_key (obj, ucl_object_frombool (reset_previous), "reset_previous", 0, false); - if (cache_db != NULL) { - ucl_object_insert_key (obj, ucl_object_fromstring (cache_db), - "cache_db", 0, false); - } - - if (redis_password) { - ucl_object_insert_key (obj, ucl_object_fromstring (redis_password), - "redis_password", 0, false); - } - - if (redis_db) { - ucl_object_insert_key (obj, ucl_object_fromstring (redis_db), - "redis_db", 0, false); - } rspamadm_execute_lua_ucl_subr (L, argc, |