aboutsummaryrefslogtreecommitdiffstats
path: root/src/rspamadm/stat_convert.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-02-15 12:48:33 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-02-15 12:48:33 +0000
commit440b588a749911bb064055d4084d99b7be35f815 (patch)
tree34a85a083cb7347fddbc6f7b3eec19d97d50b9d2 /src/rspamadm/stat_convert.c
parent5ee9d4f25d79dfc1197935d301ed4951b3c8a53e (diff)
downloadrspamd-440b588a749911bb064055d4084d99b7be35f815.tar.gz
rspamd-440b588a749911bb064055d4084d99b7be35f815.zip
[Rework] Start major stat_convert rework
Diffstat (limited to 'src/rspamadm/stat_convert.c')
-rw-r--r--src/rspamadm/stat_convert.c200
1 files changed, 146 insertions, 54 deletions
diff --git a/src/rspamadm/stat_convert.c b/src/rspamadm/stat_convert.c
index cd15fb8a0..c88bc54bb 100644
--- a/src/rspamadm/stat_convert.c
+++ b/src/rspamadm/stat_convert.c
@@ -17,10 +17,20 @@
#include "rspamadm.h"
#include "lua/lua_common.h"
-static gchar *source_db = NULL;
-static gchar *redis_host = NULL;
-static gchar *symbol = NULL;
+#include "contrib/uthash/utlist.h"
+
+/* Common */
+static gchar *config_file = NULL;
+static gchar *symbol_ham = NULL;
+static gchar *symbol_spam = NULL;
+
+/* Inputs */
+static gchar *spam_db = NULL;
+static gchar *ham_db = NULL;
static gchar *cache_db = NULL;
+
+/* Outputs */
+static gchar *redis_host = NULL;
static gchar *redis_db = NULL;
static gchar *redis_password = NULL;
static gboolean reset_previous = FALSE;
@@ -37,20 +47,27 @@ struct rspamadm_command statconvert_command = {
};
static GOptionEntry entries[] = {
- {"database", 'd', 0, G_OPTION_ARG_FILENAME, &source_db,
- "Input sqlite", NULL},
- {"cache", 'c', 0, G_OPTION_ARG_FILENAME, &cache_db,
+ {"config", 'c', 0, G_OPTION_ARG_FILENAME, &config_file,
+ "Config file to read data from", NULL},
+ {"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous,
+ "Reset previous data instead of appending values", NULL},
+
+ {"symbol-spam", 0, 0, G_OPTION_ARG_STRING, &symbol_spam,
+ "Symbol for spam (e.g. BAYES_SPAM)", NULL},
+ {"symbol-ham", 0, 0, G_OPTION_ARG_STRING, &symbol_ham,
+ "Symbol for ham (e.g. BAYES_HAM)", NULL},
+ {"spam-db", 0, 0, G_OPTION_ARG_STRING, &spam_db,
+ "Input spam file (sqlite3)", NULL},
+ {"ham-db", 0, 0, G_OPTION_ARG_STRING, &ham_db,
+ "Input ham file (sqlite3)", NULL},
+ {"cache", 0, 0, G_OPTION_ARG_FILENAME, &cache_db,
"Input learn cache", NULL},
- {"host", 'h', 0, G_OPTION_ARG_STRING, &redis_host,
+ {"redis-host", 'h', 0, G_OPTION_ARG_STRING, &redis_host,
"Output redis ip (in format ip:port)", NULL},
- {"symbol", 's', 0, G_OPTION_ARG_STRING, &symbol,
- "Symbol in redis (e.g. BAYES_SPAM)", NULL},
- {"dbname", 'D', 0, G_OPTION_ARG_STRING, &redis_db,
- "Database in redis (should be numeric)", NULL},
- {"password", 'p', 0, G_OPTION_ARG_STRING, &redis_password,
+ {"redis-password", 'p', 0, G_OPTION_ARG_STRING, &redis_password,
"Password to connect to redis", NULL},
- {"reset", 'r', 0, G_OPTION_ARG_NONE, &reset_previous,
- "Reset previous data instead of appending values", NULL},
+ {"redis-db", 'd', 0, G_OPTION_ARG_STRING, &redis_db,
+ "Redis database (should be numeric)", NULL},
{NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
};
@@ -62,15 +79,20 @@ rspamadm_statconvert_help (gboolean full_help)
if (full_help) {
help_str = "Convert statistics from sqlite3 to redis\n\n"
- "Usage: rspamadm statconvert -d <sqlite_db> -h <redis_ip> -s <symbol>\n"
+ "Usage: rspamadm statconvert -c /etc/rspamd.conf [-r]\n"
"Where options are:\n\n"
- "-d: input sqlite\n"
- "-h: output redis ip (in format ip:port)\n"
- "-s: symbol in redis (e.g. BAYES_SPAM)\n"
- "-c: also convert data from the learn cache\n"
- "-D: output redis database\n"
- "-p: redis password\n"
- "-r: reset previous data instead of increasing values\n";
+ "-c: config file to read data from\n"
+ "-r: reset previous data instead of increasing values\n"
+ "** Or specify options directly **\n"
+ "--redis-host: output redis ip (in format ip:port)\n"
+ "--redis-db: output redis database\n"
+ "--redis-password: redis password\n"
+ "--cache: sqlite3 file for learn cache\n"
+ "--spam-db: sqlite3 input file for spam data\n"
+ "--ham-db: sqlite3 input file for ham data\n"
+ "--symbol-spam: symbol in redis for spam (e.g. BAYES_SPAM)\n"
+ "--symbol-ham: symbol in redis for ham (e.g. BAYES_HAM)\n"
+ ;
}
else {
help_str = "Convert statistics from sqlite3 to redis";
@@ -103,46 +125,116 @@ rspamadm_statconvert (gint argc, gchar **argv)
exit (1);
}
- if (!source_db) {
- rspamd_fprintf (stderr, "source db is missing\n");
- exit (1);
- }
- if (!redis_host) {
- rspamd_fprintf (stderr, "redis host is missing\n");
- exit (1);
+ if (config_file) {
+ /* Load config file, assuming that it has all information required */
+ struct ucl_parser *parser;
+
+ parser = ucl_parser_new (0);
+ rspamd_ucl_add_conf_variables (parser, ucl_vars);
+
+ if (!ucl_parser_add_file (parser, config_file)) {
+ msg_err ("ucl parser error: %s", ucl_parser_get_error (parser));
+ ucl_parser_free (parser);
+
+ exit (EXIT_FAILURE);
+ }
+
+ obj = ucl_parser_get_object (parser);
+ ucl_parser_free (parser);
}
- if (!symbol) {
- rspamd_fprintf (stderr, "symbol is missing\n");
- exit (1);
+ else {
+ /* We need to get all information from the command line */
+ ucl_object_t *classifier, *statfile_ham, *statfile_spam, *tmp, *redis;
+
+ /* Check arguments sanity */
+ if (spam_db == NULL) {
+ msg_err ("No spam-db specified");
+ exit (EXIT_FAILURE);
+ }
+ if (ham_db == NULL) {
+ msg_err ("No ham-db specified");
+ exit (EXIT_FAILURE);
+ }
+ if (redis_host == NULL) {
+ msg_err ("No redis-host specified");
+ exit (EXIT_FAILURE);
+ }
+ if (symbol_ham == NULL) {
+ msg_err ("No symbol-ham specified");
+ exit (EXIT_FAILURE);
+ }
+ if (symbol_spam == NULL) {
+ msg_err ("No symbol-spam specified");
+ exit (EXIT_FAILURE);
+ }
+
+ obj = ucl_object_typed_new (UCL_OBJECT);
+
+ classifier = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (obj, classifier, "classifier", 0, false);
+ /* Now we need to create "bayes" key in it */
+ tmp = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (classifier, tmp, "bayes", 0, false);
+ classifier = tmp;
+ ucl_object_insert_key (classifier, ucl_object_fromstring ("sqlite3"),
+ "backend", 0, false);
+
+ if (cache_db != NULL) {
+ ucl_object_t *cache;
+
+ cache = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (cache, ucl_object_fromstring ("sqlite3"),
+ "type", 0, false);
+ ucl_object_insert_key (cache, ucl_object_fromstring (cache_db),
+ "file", 0, false);
+
+ ucl_object_insert_key (classifier, cache, "cache", 0, false);
+ }
+
+ statfile_ham = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (statfile_ham, ucl_object_fromstring (symbol_ham),
+ "symbol", 0, false);
+ ucl_object_insert_key (statfile_ham, ucl_object_frombool (false),
+ "spam", 0, false);
+ ucl_object_insert_key (statfile_ham, ucl_object_fromstring (ham_db),
+ "db", 0, false);
+
+ statfile_spam = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (statfile_spam, ucl_object_fromstring (symbol_spam),
+ "symbol", 0, false);
+ ucl_object_insert_key (statfile_spam, ucl_object_frombool (true),
+ "spam", 0, false);
+ ucl_object_insert_key (statfile_spam, ucl_object_fromstring (spam_db),
+ "db", 0, false);
+
+ DL_APPEND (statfile_ham, statfile_spam);
+ ucl_object_insert_key (classifier, statfile_ham,
+ "statfile", 0, false);
+
+ /* Deal with redis */
+
+ redis = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (obj, redis, "redis", 0, false);
+
+ ucl_object_insert_key (redis, ucl_object_fromstring (redis_host),
+ "host", 0, false);
+
+ if (redis_db) {
+ ucl_object_insert_key (redis, ucl_object_fromstring (redis_db),
+ "db", 0, false);
+ }
+
+ if (redis_password) {
+ ucl_object_insert_key (redis, ucl_object_fromstring (redis_password),
+ "password", 0, false);
+ }
}
L = rspamd_lua_init ();
rspamd_lua_set_path (L, NULL, NULL);
-
- obj = ucl_object_typed_new (UCL_OBJECT);
- ucl_object_insert_key (obj, ucl_object_fromstring (source_db),
- "source_db", 0, false);
- ucl_object_insert_key (obj, ucl_object_fromstring (redis_host),
- "redis_host", 0, false);
- ucl_object_insert_key (obj, ucl_object_fromstring (symbol),
- "symbol", 0, false);
ucl_object_insert_key (obj, ucl_object_frombool (reset_previous),
"reset_previous", 0, false);
- if (cache_db != NULL) {
- ucl_object_insert_key (obj, ucl_object_fromstring (cache_db),
- "cache_db", 0, false);
- }
-
- if (redis_password) {
- ucl_object_insert_key (obj, ucl_object_fromstring (redis_password),
- "redis_password", 0, false);
- }
-
- if (redis_db) {
- ucl_object_insert_key (obj, ucl_object_fromstring (redis_db),
- "redis_db", 0, false);
- }
rspamadm_execute_lua_ucl_subr (L,
argc,