aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lualib/rspamadm/stat_convert.lua121
-rw-r--r--src/rspamadm/rspamadm.c16
-rw-r--r--src/rspamadm/stat_convert.c200
3 files changed, 153 insertions, 184 deletions
diff --git a/lualib/rspamadm/stat_convert.lua b/lualib/rspamadm/stat_convert.lua
index 7b6de9836..d497333a1 100644
--- a/lualib/rspamadm/stat_convert.lua
+++ b/lualib/rspamadm/stat_convert.lua
@@ -100,126 +100,5 @@ local function convert_learned(cache, server, password, redis_db)
end
return function (_, res)
- local db = sqlite3.open(res['source_db'])
- local tokens = {}
- local num = 0
- local total = 0
- local nusers = 0
- local lim = 1000 -- Update each 1000 tokens
- local users_map = {}
- local learns = {}
- local redis_password = res['redis_password']
- local redis_db = nil
- local cmd = 'HINCRBY'
- local ret, err_str
-
- if res['redis_db'] then
- redis_db = tostring(res['redis_db'])
- end
- if res['reset_previous'] then
- cmd = 'HSET'
- end
-
- if res['cache_db'] then
- if not convert_learned(res['cache_db'], res['redis_host'],
- redis_password, redis_db) then
- print('Cannot convert learned cache to redis')
- return
- end
- end
-
- if not db then
- print('Cannot open source db: ' .. res['source_db'])
- return
- end
-
- db:sql('BEGIN;')
- -- Fill users mapping
- for row in db:rows('SELECT * FROM users;') do
- if row.id == '0' then
- users_map[row.id] = ''
- else
- users_map[row.id] = row.name
- end
- learns[row.id] = row.learns
- nusers = nusers + 1
- end
-
- -- Workaround for old databases
- for row in db:rows('SELECT * FROM languages') do
- if learns['0'] then
- learns['0'] = learns['0'] + row.learns
- else
- learns['0'] = row.learns
- end
- end
-
- -- Fill tokens, sending data to redis each `lim` records
- for row in db:rows('SELECT token,value,user FROM tokens;') do
- local user = ''
- if row.user ~= 0 and users_map[row.user] then
- user = users_map[row.user]
- end
-
- table.insert(tokens, {row.token, row.value, user})
-
- num = num + 1
- total = total + 1
- if num > lim then
- ret,err_str = send_redis(res['redis_host'], res['symbol'],
- tokens, redis_password, redis_db, cmd)
- if not ret then
- print('Cannot send tokens to the redis server: ' .. err_str)
- return
- end
-
- num = 0
- tokens = {}
- end
- end
- if #tokens > 0 then
- ret, err_str = send_redis(res['redis_host'], res['symbol'], tokens,
- redis_password, redis_db, cmd)
- if not ret then
- print('Cannot send tokens to the redis server: ' .. err_str)
- return
- end
- end
- -- Now update all users
- local conn,err = redis.connect_sync({
- host = res['redis_host'],
- })
-
- if not conn then
- print('Cannot connect to ' .. res['redis_host'] .. ' error: ' .. err)
- return false
- end
-
- if redis_password then
- conn:add_cmd('AUTH', {redis_password})
- end
- if redis_db then
- conn:add_cmd('SELECT', {redis_db})
- end
-
- for id,learned in pairs(learns) do
- local user = users_map[id]
- if not conn:add_cmd(cmd, {res['symbol'] .. user, 'learns', learned}) then
- print('Cannot update learns for user: ' .. user)
- end
- if not conn:add_cmd('SADD', {res['symbol'] .. '_keys', res['symbol'] .. user}) then
- print('Cannot update learns for user: ' .. user)
- end
- end
- db:sql('COMMIT;')
-
- ret = conn:exec()
-
- if ret then
- print(string.format('Migrated %d tokens for %d users for symbol %s',
- total, nusers, res['symbol']))
- else
- print('Error occurred during sending data to redis')
- end
end
diff --git a/src/rspamadm/rspamadm.c b/src/rspamadm/rspamadm.c
index 73988f848..51c621744 100644
--- a/src/rspamadm/rspamadm.c
+++ b/src/rspamadm/rspamadm.c
@@ -292,7 +292,13 @@ main (gint argc, gchar **argv, gchar **env)
rspamd_main->server_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
"rspamadm");
- cfg->log_level = G_LOG_LEVEL_WARNING;
+ /* Setup logger */
+ if (verbose) {
+ cfg->log_level = G_LOG_LEVEL_DEBUG;
+ }
+ else {
+ cfg->log_level = G_LOG_LEVEL_INFO;
+ }
cfg->log_type = RSPAMD_LOG_CONSOLE;
/* Avoid timestamps printing */
@@ -305,14 +311,6 @@ main (gint argc, gchar **argv, gchar **env)
rspamd_config_post_load (cfg,
RSPAMD_CONFIG_INIT_LIBS|RSPAMD_CONFIG_INIT_URL|RSPAMD_CONFIG_INIT_NO_TLD);
- /* Setup logger */
- if (verbose) {
- cfg->log_level = G_LOG_LEVEL_DEBUG;
- }
- else {
- cfg->log_level = G_LOG_LEVEL_INFO;
- }
-
gperf_profiler_init (cfg, "rspamadm");
setproctitle ("rspamdadm");
diff --git a/src/rspamadm/stat_convert.c b/src/rspamadm/stat_convert.c
index cd15fb8a0..c88bc54bb 100644
--- a/src/rspamadm/stat_convert.c
+++ b/src/rspamadm/stat_convert.c
@@ -17,10 +17,20 @@
#include "rspamadm.h"
#include "lua/lua_common.h"
-static gchar *source_db = NULL;
-static gchar *redis_host = NULL;
-static gchar *symbol = NULL;
+#include "contrib/uthash/utlist.h"
+
+/* Common */
+static gchar *config_file = NULL;
+static gchar *symbol_ham = NULL;
+static gchar *symbol_spam = NULL;
+
+/* Inputs */
+static gchar *spam_db = NULL;
+static gchar *ham_db = NULL;
static gchar *cache_db = NULL;
+
+/* Outputs */
+static gchar *redis_host = NULL;
static gchar *redis_db = NULL;
static gchar *redis_password = NULL;
static gboolean reset_previous = FALSE;
@@ -37,20 +47,27 @@ struct rspamadm_command statconvert_command = {
};
static GOptionEntry entries[] = {
- {"database", 'd', 0, G_OPTION_ARG_FILENAME, &source_db,
- "Input sqlite", NULL},
- {"cache", 'c', 0, G_OPTION_ARG_FILENAME, &cache_db,
+ {"config", 'c', 0, G_OPTION_ARG_FILENAME, &config_file,
+ "Config file to read data from", NULL},
+ {"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous,
+ "Reset previous data instead of appending values", NULL},
+
+ {"symbol-spam", 0, 0, G_OPTION_ARG_STRING, &symbol_spam,
+ "Symbol for spam (e.g. BAYES_SPAM)", NULL},
+ {"symbol-ham", 0, 0, G_OPTION_ARG_STRING, &symbol_ham,
+ "Symbol for ham (e.g. BAYES_HAM)", NULL},
+ {"spam-db", 0, 0, G_OPTION_ARG_STRING, &spam_db,
+ "Input spam file (sqlite3)", NULL},
+ {"ham-db", 0, 0, G_OPTION_ARG_STRING, &ham_db,
+ "Input ham file (sqlite3)", NULL},
+ {"cache", 0, 0, G_OPTION_ARG_FILENAME, &cache_db,
"Input learn cache", NULL},
- {"host", 'h', 0, G_OPTION_ARG_STRING, &redis_host,
+ {"redis-host", 'h', 0, G_OPTION_ARG_STRING, &redis_host,
"Output redis ip (in format ip:port)", NULL},
- {"symbol", 's', 0, G_OPTION_ARG_STRING, &symbol,
- "Symbol in redis (e.g. BAYES_SPAM)", NULL},
- {"dbname", 'D', 0, G_OPTION_ARG_STRING, &redis_db,
- "Database in redis (should be numeric)", NULL},
- {"password", 'p', 0, G_OPTION_ARG_STRING, &redis_password,
+ {"redis-password", 'p', 0, G_OPTION_ARG_STRING, &redis_password,
"Password to connect to redis", NULL},
- {"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous,
- "Reset previous data instead of appending values", NULL},
+ {"redis-db", 'd', 0, G_OPTION_ARG_STRING, &redis_db,
+ "Redis database (should be numeric)", NULL},
{NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
};
@@ -62,15 +79,20 @@ rspamadm_statconvert_help (gboolean full_help)
if (full_help) {
help_str = "Convert statistics from sqlite3 to redis\n\n"
- "Usage: rspamadm statconvert -d <sqlite_db> -h <redis_ip> -s <symbol>\n"
+ "Usage: rspamadm statconvert -c /etc/rspamd.conf [-r]\n"
"Where options are:\n\n"
- "-d: input sqlite\n"
- "-h: output redis ip (in format ip:port)\n"
- "-s: symbol in redis (e.g. BAYES_SPAM)\n"
- "-c: also convert data from the learn cache\n"
- "-D: output redis database\n"
- "-p: redis password\n"
- "-r: reset previous data instead of increasing values\n";
+ "-c: config file to read data from\n"
+ "-r: reset previous data instead of increasing values\n"
+ "** Or specify options directly **\n"
+ "--redis-host: output redis ip (in format ip:port)\n"
+ "--redis-db: output redis database\n"
+ "--redis-password: redis password\n"
+ "--cache: sqlite3 file for learn cache\n"
+ "--spam-db: sqlite3 input file for spam data\n"
+ "--ham-db: sqlite3 input file for ham data\n"
+ "--symbol-spam: symbol in redis for spam (e.g. BAYES_SPAM)\n"
+ "--symbol-ham: symbol in redis for ham (e.g. BAYES_HAM)\n"
+ ;
}
else {
help_str = "Convert statistics from sqlite3 to redis";
@@ -103,46 +125,116 @@ rspamadm_statconvert (gint argc, gchar **argv)
exit (1);
}
- if (!source_db) {
- rspamd_fprintf (stderr, "source db is missing\n");
- exit (1);
- }
- if (!redis_host) {
- rspamd_fprintf (stderr, "redis host is missing\n");
- exit (1);
+ if (config_file) {
+ /* Load config file, assuming that it has all information required */
+ struct ucl_parser *parser;
+
+ parser = ucl_parser_new (0);
+ rspamd_ucl_add_conf_variables (parser, ucl_vars);
+
+ if (!ucl_parser_add_file (parser, config_file)) {
+ msg_err ("ucl parser error: %s", ucl_parser_get_error (parser));
+ ucl_parser_free (parser);
+
+ exit (EXIT_FAILURE);
+ }
+
+ obj = ucl_parser_get_object (parser);
+ ucl_parser_free (parser);
}
- if (!symbol) {
- rspamd_fprintf (stderr, "symbol is missing\n");
- exit (1);
+ else {
+ /* We need to get all information from the command line */
+ ucl_object_t *classifier, *statfile_ham, *statfile_spam, *tmp, *redis;
+
+ /* Check arguments sanity */
+ if (spam_db == NULL) {
+ msg_err ("No spam-db specified");
+ exit (EXIT_FAILURE);
+ }
+ if (ham_db == NULL) {
+ msg_err ("No ham-db specified");
+ exit (EXIT_FAILURE);
+ }
+ if (redis_host == NULL) {
+ msg_err ("No redis-host specified");
+ exit (EXIT_FAILURE);
+ }
+ if (symbol_ham == NULL) {
+ msg_err ("No symbol-ham specified");
+ exit (EXIT_FAILURE);
+ }
+ if (symbol_spam == NULL) {
+ msg_err ("No symbol-spam specified");
+ exit (EXIT_FAILURE);
+ }
+
+ obj = ucl_object_typed_new (UCL_OBJECT);
+
+ classifier = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (obj, classifier, "classifier", 0, false);
+ /* Now we need to create "bayes" key in it */
+ tmp = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (classifier, tmp, "bayes", 0, false);
+ classifier = tmp;
+ ucl_object_insert_key (classifier, ucl_object_fromstring ("sqlite3"),
+ "backend", 0, false);
+
+ if (cache_db != NULL) {
+ ucl_object_t *cache;
+
+ cache = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (cache, ucl_object_fromstring ("sqlite3"),
+ "type", 0, false);
+ ucl_object_insert_key (cache, ucl_object_fromstring (cache_db),
+ "file", 0, false);
+
+ ucl_object_insert_key (classifier, cache, "cache", 0, false);
+ }
+
+ statfile_ham = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (statfile_ham, ucl_object_fromstring (symbol_ham),
+ "symbol", 0, false);
+ ucl_object_insert_key (statfile_ham, ucl_object_frombool (false),
+ "spam", 0, false);
+ ucl_object_insert_key (statfile_ham, ucl_object_fromstring (ham_db),
+ "db", 0, false);
+
+ statfile_spam = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (statfile_spam, ucl_object_fromstring (symbol_spam),
+ "symbol", 0, false);
+ ucl_object_insert_key (statfile_spam, ucl_object_frombool (true),
+ "spam", 0, false);
+ ucl_object_insert_key (statfile_spam, ucl_object_fromstring (spam_db),
+ "db", 0, false);
+
+ DL_APPEND (statfile_ham, statfile_spam);
+ ucl_object_insert_key (classifier, statfile_ham,
+ "statfile", 0, false);
+
+ /* Deal with redis */
+
+ redis = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (obj, redis, "redis", 0, false);
+
+ ucl_object_insert_key (redis, ucl_object_fromstring (redis_host),
+ "host", 0, false);
+
+ if (redis_db) {
+ ucl_object_insert_key (redis, ucl_object_fromstring (redis_db),
+ "db", 0, false);
+ }
+
+ if (redis_password) {
+ ucl_object_insert_key (redis, ucl_object_fromstring (redis_password),
+ "password", 0, false);
+ }
}
L = rspamd_lua_init ();
rspamd_lua_set_path (L, NULL, NULL);
-
- obj = ucl_object_typed_new (UCL_OBJECT);
- ucl_object_insert_key (obj, ucl_object_fromstring (source_db),
- "source_db", 0, false);
- ucl_object_insert_key (obj, ucl_object_fromstring (redis_host),
- "redis_host", 0, false);
- ucl_object_insert_key (obj, ucl_object_fromstring (symbol),
- "symbol", 0, false);
ucl_object_insert_key (obj, ucl_object_frombool (reset_previous),
"reset_previous", 0, false);
- if (cache_db != NULL) {
- ucl_object_insert_key (obj, ucl_object_fromstring (cache_db),
- "cache_db", 0, false);
- }
-
- if (redis_password) {
- ucl_object_insert_key (obj, ucl_object_fromstring (redis_password),
- "redis_password", 0, false);
- }
-
- if (redis_db) {
- ucl_object_insert_key (obj, ucl_object_fromstring (redis_db),
- "redis_db", 0, false);
- }
rspamadm_execute_lua_ucl_subr (L,
argc,