From: Vsevolod Stakhov Date: Thu, 4 Jan 2018 19:19:43 +0000 (+0000) Subject: [Feature] Add timeout to rspamc when doing corpus test X-Git-Tag: 1.7.0~307 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=077d9b3c2f22dd2d4e66971e999dc01ac2fa2b44;p=rspamd.git [Feature] Add timeout to rspamc when doing corpus test --- diff --git a/lualib/rspamadm/corpus_test.lua b/lualib/rspamadm/corpus_test.lua index b29fa5602..fa296a0b4 100644 --- a/lualib/rspamadm/corpus_test.lua +++ b/lualib/rspamadm/corpus_test.lua @@ -4,9 +4,10 @@ local lua_util = require "lua_util" local HAM = "HAM" local SPAM = "SPAM" -local function scan_email(n_parellel, path) +local function scan_email(n_parellel, path, timeout) - local rspamc_command = string.format("rspamc -j --compact -n %s %s", n_parellel, path) + local rspamc_command = string.format("rspamc -j --compact -n %s -t %.3f %s", + n_parellel, timeout, path) local result = assert(io.popen(rspamc_command)) result = result:read("*all") return result @@ -93,7 +94,7 @@ return function (_, res) if ham_directory then io.write("Scanning ham corpus...\n") - local ham_results = scan_email(connections, ham_directory) + local ham_results = scan_email(connections, ham_directory, res["timeout"]) ham_results = scan_results_to_logs(ham_results, HAM) no_of_ham = #ham_results diff --git a/lualib/rspamadm/rescore.lua b/lualib/rspamadm/rescore.lua index 538122f68..4f6cc5075 100644 --- a/lualib/rspamadm/rescore.lua +++ b/lualib/rspamadm/rescore.lua @@ -202,7 +202,7 @@ return function (_, res) local logs = rescore_utility.get_all_logs(res["logdir"]) local all_symbols = rescore_utility.get_all_symbols(logs) - local original_symbol_scores = rescore_utility.get_all_symbol_scores() + local original_symbol_scores = rescore_utility.get_all_symbol_scores(res["timeout"]) shuffle(logs) diff --git a/lualib/rspamadm/rescore_utility.lua b/lualib/rspamadm/rescore_utility.lua index 4c6504e76..2390fc565 100644 --- a/lualib/rspamadm/rescore_utility.lua +++ b/lualib/rspamadm/rescore_utility.lua @@ -69,9 +69,9 @@ function utility.get_all_logs(dir_path) return all_logs end -function utility.get_all_symbol_scores() +function utility.get_all_symbol_scores(timeout) - local output = assert(io.popen("rspamc counters -j --compact")) + local output = assert(io.popen("rspamc counters -j --compact -t " .. tostring(timeout))) output = output:read("*all") local parser = ucl.parser() diff --git a/src/rspamadm/corpus_test.c b/src/rspamadm/corpus_test.c index 62aecb148..7e1aa40e9 100644 --- a/src/rspamadm/corpus_test.c +++ b/src/rspamadm/corpus_test.c @@ -22,6 +22,7 @@ static gchar *ham_directory = NULL; static gchar *spam_directory = NULL; static gchar *output_location = "results.log"; static gint connections = 10; +static gdouble timeout = 60.0; static void rspamadm_corpus_test (gint argc, gchar **argv); static const char *rspamadm_corpus_test_help (gboolean full_help); @@ -35,15 +36,17 @@ struct rspamadm_command corpus_test_command = { // TODO add -nparellel and -o options static GOptionEntry entries[] = { - {"ham", 'a', 0, G_OPTION_ARG_FILENAME, &ham_directory, - "Ham directory", NULL}, - {"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory, - "Spam directory", NULL}, - {"output", 'o', 0, G_OPTION_ARG_FILENAME, &output_location, - "Log output location", NULL}, - {"connections", 'n', 0, G_OPTION_ARG_INT, &connections, - "Number of parellel connections [Default: 10]", NULL}, - {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} + {"ham", 'a', 0, G_OPTION_ARG_FILENAME, &ham_directory, + "Ham directory", NULL}, + {"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory, + "Spam directory", NULL}, + {"output", 'o', 0, G_OPTION_ARG_FILENAME, &output_location, + "Log output location", NULL}, + {"connections", 'n', 0, G_OPTION_ARG_INT, &connections, + "Number of parellel connections [Default: 10]", NULL}, + {"timeout", 't', 0, G_OPTION_ARG_DOUBLE, &timeout, + "Timeout for connections [Default: 60]", NULL}, + {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} }; static const char * @@ -53,13 +56,14 @@ rspamadm_corpus_test_help (gboolean full_help) if (full_help) { help_str = "Create logs files from email corpus\n\n" - "Usage: rspamadm corpus_test [-a ]" - " [-s ]\n" - "Where option are:\n\n" - "-a: path to ham directory\n" - "-s: path to spam directory\n" - "-n: maximum parellel connections\n" - "-o: log output file\n"; + "Usage: rspamadm corpus_test [-a ]" + " [-s ]\n" + "Where option are:\n\n" + "-a: path to ham directory\n" + "-s: path to spam directory\n" + "-n: maximum parallel connections\n" + "-o: log output file\n" + "-t: timeout for rspamc operations (default: 60)\n"; } @@ -79,7 +83,7 @@ rspamadm_corpus_test (gint argc, gchar **argv) ucl_object_t *obj; context = g_option_context_new ( - "corpus_test - Create logs files from email corpus"); + "corpus_test - create logs files from email corpus"); g_option_context_set_summary (context, "Summary:\n Rspamd administration utility version " @@ -102,19 +106,21 @@ rspamadm_corpus_test (gint argc, gchar **argv) obj = ucl_object_typed_new (UCL_OBJECT); ucl_object_insert_key (obj, ucl_object_fromstring (ham_directory), - "ham_directory", 0, false); + "ham_directory", 0, false); ucl_object_insert_key (obj, ucl_object_fromstring (spam_directory), - "spam_directory", 0, false); + "spam_directory", 0, false); ucl_object_insert_key (obj, ucl_object_fromstring (output_location), - "output_location", 0, false); + "output_location", 0, false); ucl_object_insert_key (obj, ucl_object_fromint (connections), - "connections", 0, false); + "connections", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (timeout), + "timeout", 0, false); rspamadm_execute_lua_ucl_subr (L, - argc, - argv, - obj, - "corpus_test"); + argc, + argv, + obj, + "corpus_test"); lua_close (L); ucl_object_unref (obj); diff --git a/src/rspamadm/rescore.c b/src/rspamadm/rescore.c index 48dbc1f1f..ba5619776 100644 --- a/src/rspamadm/rescore.c +++ b/src/rspamadm/rescore.c @@ -29,6 +29,7 @@ static gchar *output = "new.scores"; static gdouble threshold = 15; /* Spam threshold */ static gboolean score_diff = false; /* Print score diff flag */ static gint64 iters = 500; /* Perceptron max iterations */ +gdouble timeout = 60.0; /* TODO: think about adding the config file reading */ @@ -52,6 +53,8 @@ static GOptionEntry entries[] = { "Print score diff", NULL}, {"iters", 'i', 0, G_OPTION_ARG_INT64, &iters, "Max iterations for perceptron [Default: 500]", NULL}, + {"timeout", 't', 0, G_OPTION_ARG_DOUBLE, &timeout, + "Timeout for connections [Default: 60]", NULL}, {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} }; @@ -65,9 +68,10 @@ rspamadm_rescore_help (gboolean full_help) { "Usage: rspamadm rescore -l \n" "Where options are:\n\n" "-l: path to logs directory\n" - "-o: Scores output file location\n" - "-d: Print scores diff\n" - "-i: Max iterations for perceptron\n"; + "-o: scores output file location\n" + "-d: print scores diff\n" + "-i: max iterations for perceptron\n" + "-t: timeout for rspamc operations (default: 60)\n"; } else { help_str = "Estimate optimal symbol weights from log files"; } @@ -127,6 +131,8 @@ rspamadm_rescore (gint argc, gchar **argv) { "iters", 0, false); ucl_object_insert_key (obj, ucl_object_frombool (score_diff), "diff", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (timeout), + "timeout", 0, false); rspamadm_execute_lua_ucl_subr (L, argc,