]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Various improvements to corpus_test script
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 24 Apr 2018 09:48:44 +0000 (10:48 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 24 Apr 2018 09:48:44 +0000 (10:48 +0100)
lualib/rspamadm/corpus_test.lua
src/rspamadm/corpus_test.c

index b71f96e9eec93c8d8dff3ad4508eb7e5a2236e84..cd9f66155cbe05b2642c83d80eb0e0f94ec068e4 100644 (file)
 local rspamd_logger = require "rspamd_logger"
 local ucl = require "ucl"
 local lua_util = require "lua_util"
+local getopt = require "rspamadm/getopt"
 
 local HAM = "HAM"
 local SPAM = "SPAM"
+local opts
+local default_opts = {
+  connect = 'localhost:11334',
+}
 
 local function scan_email(n_parallel, path, timeout)
 
-    local rspamc_command = string.format("rspamc -j --compact -n %s -t %.3f %s",
-        n_parallel, timeout, path)
-    local result = assert(io.popen(rspamc_command))
-    result = result:read("*all")
-    return result
-end   
+  local rspamc_command = string.format("rspamc --connect %s -j --compact -n %s -t %.3f %s",
+      opts.connect, n_parallel, timeout, path)
+  local result = assert(io.popen(rspamc_command))
+  result = result:read("*all")
+  return result
+end
 
 local function write_results(results, file)
 
-    local f = io.open(file, 'w')
+  local f = io.open(file, 'w')
 
-    for _, result in pairs(results) do
-        local log_line = string.format("%s %.2f %s", result.type, result.score, result.action)
+  for _, result in pairs(results) do
+    local log_line = string.format("%s %.2f %s", result.type, result.score, result.action)
 
-        for _, sym in pairs(result.symbols) do
-            log_line = log_line .. " " .. sym
-        end
+    for _, sym in pairs(result.symbols) do
+      log_line = log_line .. " " .. sym
+    end
 
-        log_line = log_line .. " " .. result.scan_time .. " " .. file .. ':' .. result.filename
+    log_line = log_line .. " " .. result.scan_time .. " " .. file .. ':' .. result.filename
 
-        log_line = log_line .. "\r\n"
+    log_line = log_line .. "\r\n"
 
-        f:write(log_line)
-    end
+    f:write(log_line)
+  end
 
-    f:close()
+  f:close()
 end
 
 local function encoded_json_to_log(result)
-   -- Returns table containing score, action, list of symbols
+  -- Returns table containing score, action, list of symbols
 
-    local filtered_result = {}
-    local parser = ucl.parser()
+  local filtered_result = {}
+  local parser = ucl.parser()
 
-    local is_good, err = parser:parse_string(result)
+  local is_good, err = parser:parse_string(result)
 
-    if not is_good then
-      io.stderr:write(rspamd_logger.slog("Parser error: %1\n", err))
-      return nil
-    end
+  if not is_good then
+    rspamd_logger.errx("Parser error: %1", err)
+    return nil
+  end
 
-    result = parser:get_object()
+  result = parser:get_object()
 
-    filtered_result.score = result.score
-    if not result.action then
-      io.stderr:write(rspamd_logger.slog("Bad JSON: %1\n", result))
-      return nil
-    end
-    local action = result.action:gsub("%s+", "_")
-    filtered_result.action = action
+  filtered_result.score = result.score
+  if not result.action then
+    rspamd_logger.errx("Bad JSON: %1", result)
+    return nil
+  end
+  local action = result.action:gsub("%s+", "_")
+  filtered_result.action = action
 
-    filtered_result.symbols = {}
+  filtered_result.symbols = {}
 
-    for sym, _ in pairs(result.symbols) do
-        table.insert(filtered_result.symbols, sym)
-    end
+  for sym, _ in pairs(result.symbols) do
+    table.insert(filtered_result.symbols, sym)
+  end
 
-    filtered_result.filename = result.filename
-    filtered_result.scan_time = result.scan_time
+  filtered_result.filename = result.filename
+  filtered_result.scan_time = result.scan_time
 
-    return filtered_result   
+  return filtered_result
 end
 
 local function scan_results_to_logs(results, actual_email_type)
 
-    local logs = {}
+  local logs = {}
 
-    results = lua_util.rspamd_str_split(results, "\n")
+  results = lua_util.rspamd_str_split(results, "\n")
 
-    if results[#results] == "" then
-        results[#results] = nil
-    end
+  if results[#results] == "" then
+    results[#results] = nil
+  end
 
-    for _, result in pairs(results) do      
-        result = encoded_json_to_log(result)
-        if result then
-          result['type'] = actual_email_type
-          table.insert(logs, result)
-        end
+  for _, result in pairs(results) do
+    result = encoded_json_to_log(result)
+    if result then
+      result['type'] = actual_email_type
+      table.insert(logs, result)
     end
+  end
 
-    return logs
+  return logs
 end
 
-return function (_, res)
-
-    local ham_directory = res['ham_directory']
-    local spam_directory = res['spam_directory']
-    local connections = res["connections"]
-    local output = res["output_location"]
+return function(args, res)
+  opts = default_opts
+  opts = lua_util.override_defaults(opts, getopt.getopt(args, ''))
+  local ham_directory = res['ham_directory']
+  local spam_directory = res['spam_directory']
+  local connections = res["connections"]
+  local output = res["output_location"]
 
-    local results = {}
+  local results = {}
 
-    local start_time = os.time()
-    local no_of_ham = 0
-    local no_of_spam = 0
+  local start_time = os.time()
+  local no_of_ham = 0
+  local no_of_spam = 0
 
-    if ham_directory then
-        io.write("Scanning ham corpus...\n")
-        local ham_results = scan_email(connections, ham_directory, res["timeout"])
-        ham_results = scan_results_to_logs(ham_results, HAM)
+  if ham_directory then
+    rspamd_logger.messagex("Scanning ham corpus...")
+    local ham_results = scan_email(connections, ham_directory, res["timeout"])
+    ham_results = scan_results_to_logs(ham_results, HAM)
 
-        no_of_ham = #ham_results
+    no_of_ham = #ham_results
 
-        for _, result in pairs(ham_results) do
-            table.insert(results, result)
-        end
+    for _, result in pairs(ham_results) do
+      table.insert(results, result)
     end
+  end
 
-    if spam_directory then
-        io.write("Scanning spam corpus...\n")
-        local spam_results = scan_email(connections, spam_directory, res.timeout)
-        spam_results = scan_results_to_logs(spam_results, SPAM)
+  if spam_directory then
+    rspamd_logger.messagex("Scanning spam corpus...")
+    local spam_results = scan_email(connections, spam_directory, res.timeout)
+    spam_results = scan_results_to_logs(spam_results, SPAM)
 
-        no_of_spam = #spam_results
+    no_of_spam = #spam_results
 
-        for _, result in pairs(spam_results) do
-            table.insert(results, result)
-        end
+    for _, result in pairs(spam_results) do
+      table.insert(results, result)
     end
-
-    io.write(string.format("Writing results to %s\n", output))
-    write_results(results, output)
-
-    io.write("\nStats: \n")
-    local elapsed_time = os.time() - start_time
-    local total_msgs = no_of_ham + no_of_spam
-    io.write(string.format("Elapsed time: %ds\n", os.time() - start_time))
-    io.write(string.format("No of ham: %d\n", no_of_ham))
-    io.write(string.format("No of spam: %d\n", no_of_spam))
-    io.write(string.format("Messages/sec: %-.2f\n", (total_msgs/elapsed_time)))
+  end
+
+  rspamd_logger.messagex("Writing results to %s", output)
+  write_results(results, output)
+
+  rspamd_logger.messagex("Stats: ")
+  local elapsed_time = os.time() - start_time
+  local total_msgs = no_of_ham + no_of_spam
+  rspamd_logger.messagex("Elapsed time: %ss", elapsed_time)
+  rspamd_logger.messagex("No of ham: %s", no_of_ham)
+  rspamd_logger.messagex("No of spam: %s", no_of_spam)
+  rspamd_logger.messagex("Messages/sec: %s", (total_msgs / elapsed_time))
 end
index b6918ae1aca95cd8e4f1dd23310de371be4051b1..d72788d21d013bdd73d3b16657bb485b9575af01 100644 (file)
@@ -34,9 +34,8 @@ struct rspamadm_command corpus_test_command = {
        .run = rspamadm_corpus_test
 };
 
-// TODO add -nparellel and -o options
 static GOptionEntry entries[] = {
-               {"ham", 'a', 0, G_OPTION_ARG_FILENAME, &ham_directory,
+               {"ham", 'h', 0, G_OPTION_ARG_FILENAME, &ham_directory,
                                "Ham directory", NULL},
                {"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory,
                                "Spam directory", NULL},
@@ -50,16 +49,16 @@ static GOptionEntry entries[] = {
 };
 
 static const char *
-rspamadm_corpus_test_help (gboolean full_help) 
+rspamadm_corpus_test_help (gboolean full_help)
 {
        const char *help_str;
 
        if (full_help) {
                help_str = "Create logs files from email corpus\n\n"
-                               "Usage: rspamadm corpus_test [-a <ham_directory>]"
+                               "Usage: rspamadm corpus_test [-h <ham_directory>]"
                                " [-s <spam_directory>]\n"
                                "Where option are:\n\n"
-                               "-a: path to ham directory\n"
+                               "-h: path to ham directory\n"
                                "-s: path to spam directory\n"
                                "-n: maximum parallel connections\n"
                                "-o: log output file\n"
@@ -75,7 +74,7 @@ rspamadm_corpus_test_help (gboolean full_help)
 }
 
 static void
-rspamadm_corpus_test (gint argc, gchar **argv) 
+rspamadm_corpus_test (gint argc, gchar **argv)
 {
        GOptionContext *context;
        GError *error = NULL;
@@ -85,15 +84,15 @@ rspamadm_corpus_test (gint argc, gchar **argv)
        context = g_option_context_new (
                                "corpus_test - create logs files from email corpus");
 
-       g_option_context_set_summary (context, 
+       g_option_context_set_summary (context,
                        "Summary:\n Rspamd administration utility version "
                                                RVERSION
                                                "\n Release id: "
                                                RID);
-       
+
        g_option_context_add_main_entries (context, entries, NULL);
        g_option_context_set_ignore_unknown_options (context, TRUE);
-       
+
        if (!g_option_context_parse (context, &argc, &argv, &error)) {
                rspamd_fprintf (stderr, "option parsing failed: %s\n", error->message);
                g_error_free (error);
@@ -103,7 +102,6 @@ rspamadm_corpus_test (gint argc, gchar **argv)
        L = rspamd_lua_init ();
        rspamd_lua_set_path(L, NULL, ucl_vars);
 
-
        obj = ucl_object_typed_new (UCL_OBJECT);
        ucl_object_insert_key (obj, ucl_object_fromstring (ham_directory),
                        "ham_directory", 0, false);