]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add timeout to rspamc when doing corpus test
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 4 Jan 2018 19:19:43 +0000 (19:19 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 4 Jan 2018 19:19:43 +0000 (19:19 +0000)
lualib/rspamadm/corpus_test.lua
lualib/rspamadm/rescore.lua
lualib/rspamadm/rescore_utility.lua
src/rspamadm/corpus_test.c
src/rspamadm/rescore.c

index b29fa56029ea77799f78c09af5110ebe52f50d5f..fa296a0b499993ccce7d2d8611babe768b32f224 100644 (file)
@@ -4,9 +4,10 @@ local lua_util = require "lua_util"
 local HAM = "HAM"
 local SPAM = "SPAM"
 
-local function scan_email(n_parellel, path)
+local function scan_email(n_parellel, path, timeout)
 
-    local rspamc_command = string.format("rspamc -j --compact -n %s %s", n_parellel, path)
+    local rspamc_command = string.format("rspamc -j --compact -n %s -t %.3f %s",
+        n_parellel, timeout, path)
     local result = assert(io.popen(rspamc_command))
     result = result:read("*all")
     return result
@@ -93,7 +94,7 @@ return function (_, res)
 
     if ham_directory then
         io.write("Scanning ham corpus...\n")
-        local ham_results = scan_email(connections, ham_directory)
+        local ham_results = scan_email(connections, ham_directory, res["timeout"])
         ham_results = scan_results_to_logs(ham_results, HAM)
 
         no_of_ham = #ham_results
index 538122f683141b59789919dcd2f5d7e7a169740a..4f6cc5075194886eedcba5f9ab85ab34cf4e467d 100644 (file)
@@ -202,7 +202,7 @@ return function (_, res)
 
        local logs = rescore_utility.get_all_logs(res["logdir"])
        local all_symbols = rescore_utility.get_all_symbols(logs)
-       local original_symbol_scores = rescore_utility.get_all_symbol_scores()
+       local original_symbol_scores = rescore_utility.get_all_symbol_scores(res["timeout"])
 
        shuffle(logs)
 
index 4c6504e765d36e10aaec23423a3cca1a0b4aa07a..2390fc5656a7d81cb142d7d7d59635876bafade9 100644 (file)
@@ -69,9 +69,9 @@ function utility.get_all_logs(dir_path)
        return all_logs
 end
 
-function utility.get_all_symbol_scores()
+function utility.get_all_symbol_scores(timeout)
 
-       local output = assert(io.popen("rspamc counters -j --compact"))
+       local output = assert(io.popen("rspamc counters -j --compact -t " .. tostring(timeout)))
        output = output:read("*all")
 
        local parser = ucl.parser()
index 62aecb148cb7497fb2d5791a39ceca383b164e33..7e1aa40e988a3317bc996cae384a2cabd4ee00c9 100644 (file)
@@ -22,6 +22,7 @@ static gchar *ham_directory = NULL;
 static gchar *spam_directory = NULL;
 static gchar *output_location = "results.log";
 static gint connections = 10;
+static gdouble timeout = 60.0;
 
 static void rspamadm_corpus_test (gint argc, gchar **argv);
 static const char *rspamadm_corpus_test_help (gboolean full_help);
@@ -35,15 +36,17 @@ struct rspamadm_command corpus_test_command = {
 
 // TODO add -nparellel and -o options
 static GOptionEntry entries[] = {
-                       {"ham", 'a', 0, G_OPTION_ARG_FILENAME, &ham_directory,
-                                                                                               "Ham directory", NULL},
-                       {"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory,
-                                                                                               "Spam directory", NULL},
-                       {"output", 'o', 0, G_OPTION_ARG_FILENAME, &output_location,
-                                                                                               "Log output location", NULL},
-                       {"connections", 'n', 0, G_OPTION_ARG_INT, &connections,
-                                               "Number of parellel connections [Default: 10]", NULL},
-                       {NULL,  0,      0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
+               {"ham", 'a', 0, G_OPTION_ARG_FILENAME, &ham_directory,
+                               "Ham directory", NULL},
+               {"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory,
+                               "Spam directory", NULL},
+               {"output", 'o', 0, G_OPTION_ARG_FILENAME, &output_location,
+                               "Log output location", NULL},
+               {"connections", 'n', 0, G_OPTION_ARG_INT, &connections,
+                               "Number of parellel connections [Default: 10]", NULL},
+               {"timeout", 't', 0, G_OPTION_ARG_DOUBLE, &timeout,
+                               "Timeout for connections [Default: 60]", NULL},
+               {NULL,  0,      0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
 };
 
 static const char *
@@ -53,13 +56,14 @@ rspamadm_corpus_test_help (gboolean full_help)
 
        if (full_help) {
                help_str = "Create logs files from email corpus\n\n"
-                                       "Usage: rspamadm corpus_test [-a <ham_directory>]"
-                                                                                       " [-s <spam_directory>]\n"
-                                       "Where option are:\n\n"
-                                       "-a: path to ham directory\n"
-                                       "-s: path to spam directory\n"
-                                       "-n: maximum parellel connections\n"
-                                       "-o: log output file\n";
+                               "Usage: rspamadm corpus_test [-a <ham_directory>]"
+                               " [-s <spam_directory>]\n"
+                               "Where option are:\n\n"
+                               "-a: path to ham directory\n"
+                               "-s: path to spam directory\n"
+                               "-n: maximum parallel connections\n"
+                               "-o: log output file\n"
+                               "-t: timeout for rspamc operations (default: 60)\n";
 
        }
 
@@ -79,7 +83,7 @@ rspamadm_corpus_test (gint argc, gchar **argv)
        ucl_object_t *obj;
 
        context = g_option_context_new (
-                               "corpus_test - Create logs files from email corpus");
+                               "corpus_test - create logs files from email corpus");
 
        g_option_context_set_summary (context, 
                        "Summary:\n Rspamd administration utility version "
@@ -102,19 +106,21 @@ rspamadm_corpus_test (gint argc, gchar **argv)
 
        obj = ucl_object_typed_new (UCL_OBJECT);
        ucl_object_insert_key (obj, ucl_object_fromstring (ham_directory),
-                                                                                       "ham_directory", 0, false);
+                       "ham_directory", 0, false);
        ucl_object_insert_key (obj, ucl_object_fromstring (spam_directory),
-                                                                                       "spam_directory", 0, false);
+                       "spam_directory", 0, false);
        ucl_object_insert_key (obj, ucl_object_fromstring (output_location),
-                                                                                       "output_location", 0, false);
+                       "output_location", 0, false);
        ucl_object_insert_key (obj, ucl_object_fromint (connections),
-                                                                                       "connections", 0, false);
+                       "connections", 0, false);
+       ucl_object_insert_key (obj, ucl_object_fromdouble (timeout),
+                       "timeout", 0, false);
 
        rspamadm_execute_lua_ucl_subr (L,
-                                               argc,
-                                               argv,
-                                               obj,
-                                               "corpus_test");
+                       argc,
+                       argv,
+                       obj,
+                       "corpus_test");
 
        lua_close (L);
        ucl_object_unref (obj);
index 48dbc1f1f0a77a55876b855de85e942335fa3813..ba5619776d226e15d0c3fcf195f6fe06b38b611e 100644 (file)
@@ -29,6 +29,7 @@ static gchar *output = "new.scores";
 static gdouble threshold = 15; /* Spam threshold */
 static gboolean score_diff = false;  /* Print score diff flag */
 static gint64 iters = 500; /* Perceptron max iterations */
+gdouble timeout = 60.0;
 
 /* TODO: think about adding the config file reading */
 
@@ -52,6 +53,8 @@ static GOptionEntry entries[] = {
                                "Print score diff",                             NULL},
                {"iters",  'i', 0, G_OPTION_ARG_INT64,    &iters,
                                "Max iterations for perceptron [Default: 500]", NULL},
+               {"timeout", 't', 0, G_OPTION_ARG_DOUBLE, &timeout,
+                               "Timeout for connections [Default: 60]", NULL},
                {NULL,     0,   0, G_OPTION_ARG_NONE, NULL, NULL,       NULL}
 };
 
@@ -65,9 +68,10 @@ rspamadm_rescore_help (gboolean full_help) {
                                "Usage: rspamadm rescore -l <log_directory>\n"
                                "Where options are:\n\n"
                                "-l: path to logs directory\n"
-                               "-o: Scores output file location\n"
-                               "-d: Print scores diff\n"
-                               "-i: Max iterations for perceptron\n";
+                               "-o: scores output file location\n"
+                               "-d: print scores diff\n"
+                               "-i: max iterations for perceptron\n"
+                               "-t: timeout for rspamc operations (default: 60)\n";
        } else {
                help_str = "Estimate optimal symbol weights from log files";
        }
@@ -127,6 +131,8 @@ rspamadm_rescore (gint argc, gchar **argv) {
                        "iters", 0, false);
        ucl_object_insert_key (obj, ucl_object_frombool (score_diff),
                        "diff", 0, false);
+       ucl_object_insert_key (obj, ucl_object_fromdouble (timeout),
+                       "timeout", 0, false);
 
        rspamadm_execute_lua_ucl_subr (L,
                        argc,