]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Start major stat_convert rework
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 15 Feb 2018 12:48:33 +0000 (12:48 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 15 Feb 2018 12:48:33 +0000 (12:48 +0000)
lualib/rspamadm/stat_convert.lua
src/rspamadm/rspamadm.c
src/rspamadm/stat_convert.c

index 7b6de9836da009ff5ef22177102c3b5e427e4327..d497333a12f82bbe94a8db8fed5ee303a426f719 100644 (file)
@@ -100,126 +100,5 @@ local function convert_learned(cache, server, password, redis_db)
 end
 
 return function (_, res)
-  local db = sqlite3.open(res['source_db'])
-  local tokens = {}
-  local num = 0
-  local total = 0
-  local nusers = 0
-  local lim = 1000 -- Update each 1000 tokens
-  local users_map = {}
-  local learns = {}
-  local redis_password = res['redis_password']
-  local redis_db = nil
-  local cmd = 'HINCRBY'
-  local ret, err_str
-
-  if res['redis_db'] then
-    redis_db = tostring(res['redis_db'])
-  end
-  if res['reset_previous'] then
-    cmd = 'HSET'
-  end
-
-  if res['cache_db'] then
-    if not convert_learned(res['cache_db'], res['redis_host'],
-      redis_password, redis_db) then
-        print('Cannot convert learned cache to redis')
-        return
-    end
-  end
-
-  if not db then
-    print('Cannot open source db: ' .. res['source_db'])
-    return
-  end
-
-  db:sql('BEGIN;')
-  -- Fill users mapping
-  for row in db:rows('SELECT * FROM users;') do
-    if row.id == '0' then
-      users_map[row.id] = ''
-    else
-      users_map[row.id] = row.name
-    end
-    learns[row.id] = row.learns
-    nusers = nusers + 1
-  end
-
-  -- Workaround for old databases
-  for row in db:rows('SELECT * FROM languages') do
-    if learns['0'] then
-      learns['0'] = learns['0'] + row.learns
-    else
-      learns['0'] = row.learns
-    end
-  end
-
-  -- Fill tokens, sending data to redis each `lim` records
-  for row in db:rows('SELECT token,value,user FROM tokens;') do
-    local user = ''
-    if row.user ~= 0 and users_map[row.user] then
-      user = users_map[row.user]
-    end
-
-    table.insert(tokens, {row.token, row.value, user})
-
-    num = num + 1
-    total = total + 1
-    if num > lim then
-      ret,err_str = send_redis(res['redis_host'], res['symbol'],
-        tokens, redis_password, redis_db, cmd)
-      if not ret then
-        print('Cannot send tokens to the redis server: ' .. err_str)
-        return
-      end
-
-      num = 0
-      tokens = {}
-    end
-  end
-  if #tokens > 0 then
-    ret, err_str = send_redis(res['redis_host'], res['symbol'], tokens,
-      redis_password, redis_db, cmd)
 
-    if not ret then
-      print('Cannot send tokens to the redis server: ' .. err_str)
-      return
-    end
-  end
-  -- Now update all users
-  local conn,err = redis.connect_sync({
-    host = res['redis_host'],
-  })
-
-  if not conn then
-    print('Cannot connect to ' .. res['redis_host'] .. ' error: ' .. err)
-    return false
-  end
-
-  if redis_password then
-    conn:add_cmd('AUTH', {redis_password})
-  end
-  if redis_db then
-    conn:add_cmd('SELECT', {redis_db})
-  end
-
-  for id,learned in pairs(learns) do
-    local user = users_map[id]
-    if not conn:add_cmd(cmd, {res['symbol'] .. user, 'learns', learned}) then
-      print('Cannot update learns for user: ' .. user)
-    end
-    if not conn:add_cmd('SADD', {res['symbol'] .. '_keys', res['symbol'] .. user}) then
-      print('Cannot update learns for user: ' .. user)
-    end
-  end
-  db:sql('COMMIT;')
-
-  ret = conn:exec()
-
-  if ret then
-    print(string.format('Migrated %d tokens for %d users for symbol %s',
-     total, nusers, res['symbol']))
-  else
-    print('Error occurred during sending data to redis')
-  end
 end
index 73988f848fbe6f64b4c5d3cd05f933eac08d61ea..51c6217441ac75b21c4c3919b93ff09566bbd529 100644 (file)
@@ -292,7 +292,13 @@ main (gint argc, gchar **argv, gchar **env)
        rspamd_main->server_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
                        "rspamadm");
 
-       cfg->log_level = G_LOG_LEVEL_WARNING;
+       /* Setup logger */
+       if (verbose) {
+               cfg->log_level = G_LOG_LEVEL_DEBUG;
+       }
+       else {
+               cfg->log_level = G_LOG_LEVEL_INFO;
+       }
 
        cfg->log_type = RSPAMD_LOG_CONSOLE;
        /* Avoid timestamps printing */
@@ -305,14 +311,6 @@ main (gint argc, gchar **argv, gchar **env)
        rspamd_config_post_load (cfg,
                        RSPAMD_CONFIG_INIT_LIBS|RSPAMD_CONFIG_INIT_URL|RSPAMD_CONFIG_INIT_NO_TLD);
 
-       /* Setup logger */
-       if (verbose) {
-               cfg->log_level = G_LOG_LEVEL_DEBUG;
-       }
-       else {
-               cfg->log_level = G_LOG_LEVEL_INFO;
-       }
-
        gperf_profiler_init (cfg, "rspamadm");
        setproctitle ("rspamdadm");
 
index cd15fb8a075acb7b7372b4a5dcdb8afc45b718d6..c88bc54bbec3f17d4dd439900d8af2e5e7a8d886 100644 (file)
 #include "rspamadm.h"
 #include "lua/lua_common.h"
 
-static gchar *source_db = NULL;
-static gchar *redis_host = NULL;
-static gchar *symbol = NULL;
+#include "contrib/uthash/utlist.h"
+
+/* Common */
+static gchar *config_file = NULL;
+static gchar *symbol_ham = NULL;
+static gchar *symbol_spam = NULL;
+
+/* Inputs */
+static gchar *spam_db = NULL;
+static gchar *ham_db = NULL;
 static gchar *cache_db = NULL;
+
+/* Outputs */
+static gchar *redis_host = NULL;
 static gchar *redis_db = NULL;
 static gchar *redis_password = NULL;
 static gboolean reset_previous = FALSE;
@@ -37,20 +47,27 @@ struct rspamadm_command statconvert_command = {
 };
 
 static GOptionEntry entries[] = {
-               {"database", 'd', 0, G_OPTION_ARG_FILENAME, &source_db,
-                               "Input sqlite",      NULL},
-               {"cache", 'c', 0, G_OPTION_ARG_FILENAME, &cache_db,
+               {"config", 'c', 0, G_OPTION_ARG_FILENAME, &config_file,
+                               "Config file to read data from",      NULL},
+               {"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous,
+                               "Reset previous data instead of appending values", NULL},
+
+               {"symbol-spam", 0, 0, G_OPTION_ARG_STRING, &symbol_spam,
+                               "Symbol for spam (e.g. BAYES_SPAM)", NULL},
+               {"symbol-ham", 0, 0, G_OPTION_ARG_STRING, &symbol_ham,
+                               "Symbol for ham (e.g. BAYES_HAM)", NULL},
+               {"spam-db", 0, 0, G_OPTION_ARG_STRING, &spam_db,
+                               "Input spam file (sqlite3)", NULL},
+               {"ham-db", 0, 0, G_OPTION_ARG_STRING, &ham_db,
+                               "Input ham file (sqlite3)", NULL},
+               {"cache", 0, 0, G_OPTION_ARG_FILENAME, &cache_db,
                                "Input learn cache",      NULL},
-               {"host", 'h', 0, G_OPTION_ARG_STRING, &redis_host,
+               {"redis-host", 'h', 0, G_OPTION_ARG_STRING, &redis_host,
                                "Output redis ip (in format ip:port)", NULL},
-               {"symbol", 's', 0, G_OPTION_ARG_STRING, &symbol,
-                               "Symbol in redis (e.g. BAYES_SPAM)", NULL},
-               {"dbname", 'D', 0, G_OPTION_ARG_STRING, &redis_db,
-                               "Database in redis (should be numeric)", NULL},
-               {"password", 'p', 0, G_OPTION_ARG_STRING, &redis_password,
+               {"redis-password", 'p', 0, G_OPTION_ARG_STRING, &redis_password,
                                "Password to connect to redis", NULL},
-               {"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous,
-                               "Reset previous data instead of appending values", NULL},
+               {"redis-db", 'd', 0, G_OPTION_ARG_STRING, &redis_db,
+                               "Redis database (should be numeric)", NULL},
                {NULL,     0,   0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
 };
 
@@ -62,15 +79,20 @@ rspamadm_statconvert_help (gboolean full_help)
 
        if (full_help) {
                help_str = "Convert statistics from sqlite3 to redis\n\n"
-                               "Usage: rspamadm statconvert -d <sqlite_db> -h <redis_ip> -s <symbol>\n"
+                               "Usage: rspamadm statconvert -c /etc/rspamd.conf [-r]\n"
                                "Where options are:\n\n"
-                               "-d: input sqlite\n"
-                               "-h: output redis ip (in format ip:port)\n"
-                               "-s: symbol in redis (e.g. BAYES_SPAM)\n"
-                               "-c: also convert data from the learn cache\n"
-                               "-D: output redis database\n"
-                               "-p: redis password\n"
-                               "-r: reset previous data instead of increasing values\n";
+                               "-c: config file to read data from\n"
+                               "-r: reset previous data instead of increasing values\n"
+                               "** Or specify options directly **\n"
+                               "--redis-host: output redis ip (in format ip:port)\n"
+                               "--redis-db: output redis database\n"
+                               "--redis-password: redis password\n"
+                               "--cache: sqlite3 file for learn cache\n"
+                               "--spam-db: sqlite3 input file for spam data\n"
+                               "--ham-db: sqlite3 input file for ham data\n"
+                               "--symbol-spam: symbol in redis for spam (e.g. BAYES_SPAM)\n"
+                               "--symbol-ham: symbol in redis for ham (e.g. BAYES_HAM)\n"
+                               ;
        }
        else {
                help_str = "Convert statistics from sqlite3 to redis";
@@ -103,46 +125,116 @@ rspamadm_statconvert (gint argc, gchar **argv)
                exit (1);
        }
 
-       if (!source_db) {
-               rspamd_fprintf (stderr, "source db is missing\n");
-               exit (1);
-       }
-       if (!redis_host) {
-               rspamd_fprintf (stderr, "redis host is missing\n");
-               exit (1);
+       if (config_file) {
+               /* Load config file, assuming that it has all information required */
+               struct ucl_parser *parser;
+
+               parser = ucl_parser_new (0);
+               rspamd_ucl_add_conf_variables (parser, ucl_vars);
+
+               if (!ucl_parser_add_file (parser, config_file)) {
+                       msg_err ("ucl parser error: %s", ucl_parser_get_error (parser));
+                       ucl_parser_free (parser);
+
+                       exit (EXIT_FAILURE);
+               }
+
+               obj = ucl_parser_get_object (parser);
+               ucl_parser_free (parser);
        }
-       if (!symbol) {
-               rspamd_fprintf (stderr, "symbol is missing\n");
-               exit (1);
+       else {
+               /* We need to get all information from the command line */
+               ucl_object_t *classifier, *statfile_ham, *statfile_spam, *tmp, *redis;
+
+               /* Check arguments sanity */
+               if (spam_db == NULL) {
+                       msg_err ("No spam-db specified");
+                       exit (EXIT_FAILURE);
+               }
+               if (ham_db == NULL) {
+                       msg_err ("No ham-db specified");
+                       exit (EXIT_FAILURE);
+               }
+               if (redis_host == NULL) {
+                       msg_err ("No redis-host specified");
+                       exit (EXIT_FAILURE);
+               }
+               if (symbol_ham == NULL) {
+                       msg_err ("No symbol-ham specified");
+                       exit (EXIT_FAILURE);
+               }
+               if (symbol_spam == NULL) {
+                       msg_err ("No symbol-spam specified");
+                       exit (EXIT_FAILURE);
+               }
+
+               obj = ucl_object_typed_new (UCL_OBJECT);
+
+               classifier = ucl_object_typed_new (UCL_OBJECT);
+               ucl_object_insert_key (obj, classifier, "classifier", 0, false);
+               /* Now we need to create "bayes" key in it */
+               tmp = ucl_object_typed_new (UCL_OBJECT);
+               ucl_object_insert_key (classifier, tmp, "bayes", 0, false);
+               classifier = tmp;
+               ucl_object_insert_key (classifier, ucl_object_fromstring ("sqlite3"),
+                               "backend", 0, false);
+
+               if (cache_db != NULL) {
+                       ucl_object_t *cache;
+
+                       cache = ucl_object_typed_new (UCL_OBJECT);
+                       ucl_object_insert_key (cache, ucl_object_fromstring ("sqlite3"),
+                                       "type", 0, false);
+                       ucl_object_insert_key (cache, ucl_object_fromstring (cache_db),
+                                       "file", 0, false);
+
+                       ucl_object_insert_key (classifier, cache, "cache", 0, false);
+               }
+
+               statfile_ham = ucl_object_typed_new (UCL_OBJECT);
+               ucl_object_insert_key (statfile_ham, ucl_object_fromstring (symbol_ham),
+                               "symbol", 0, false);
+               ucl_object_insert_key (statfile_ham, ucl_object_frombool (false),
+                               "spam", 0, false);
+               ucl_object_insert_key (statfile_ham, ucl_object_fromstring (ham_db),
+                               "db", 0, false);
+
+               statfile_spam = ucl_object_typed_new (UCL_OBJECT);
+               ucl_object_insert_key (statfile_spam, ucl_object_fromstring (symbol_spam),
+                               "symbol", 0, false);
+               ucl_object_insert_key (statfile_spam, ucl_object_frombool (true),
+                               "spam", 0, false);
+               ucl_object_insert_key (statfile_spam, ucl_object_fromstring (spam_db),
+                               "db", 0, false);
+
+               DL_APPEND (statfile_ham, statfile_spam);
+               ucl_object_insert_key (classifier, statfile_ham,
+                               "statfile", 0, false);
+
+               /* Deal with redis */
+
+               redis = ucl_object_typed_new (UCL_OBJECT);
+               ucl_object_insert_key (obj, redis, "redis", 0, false);
+
+               ucl_object_insert_key (redis, ucl_object_fromstring (redis_host),
+                               "host", 0, false);
+
+               if (redis_db) {
+                       ucl_object_insert_key (redis, ucl_object_fromstring (redis_db),
+                                       "db", 0, false);
+               }
+
+               if (redis_password) {
+                       ucl_object_insert_key (redis, ucl_object_fromstring (redis_password),
+                                       "password", 0, false);
+               }
        }
 
        L = rspamd_lua_init ();
        rspamd_lua_set_path (L, NULL, NULL);
-
-       obj = ucl_object_typed_new (UCL_OBJECT);
-       ucl_object_insert_key (obj, ucl_object_fromstring (source_db),
-                       "source_db", 0, false);
-       ucl_object_insert_key (obj, ucl_object_fromstring (redis_host),
-                       "redis_host", 0, false);
-       ucl_object_insert_key (obj, ucl_object_fromstring (symbol),
-                       "symbol", 0, false);
        ucl_object_insert_key (obj, ucl_object_frombool (reset_previous),
                        "reset_previous", 0, false);
 
-       if (cache_db != NULL) {
-               ucl_object_insert_key (obj, ucl_object_fromstring (cache_db),
-                               "cache_db", 0, false);
-       }
-
-       if (redis_password) {
-               ucl_object_insert_key (obj, ucl_object_fromstring (redis_password),
-                               "redis_password", 0, false);
-       }
-
-       if (redis_db) {
-               ucl_object_insert_key (obj, ucl_object_fromstring (redis_db),
-                               "redis_db", 0, false);
-       }
 
        rspamadm_execute_lua_ucl_subr (L,
                        argc,