local rspamd_logger = require "rspamd_logger"
local ucl = require "ucl"
local lua_util = require "lua_util"
+local getopt = require "rspamadm/getopt"
local HAM = "HAM"
local SPAM = "SPAM"
+local opts
+local default_opts = {
+ connect = 'localhost:11334',
+}
local function scan_email(n_parallel, path, timeout)
- local rspamc_command = string.format("rspamc -j --compact -n %s -t %.3f %s",
- n_parallel, timeout, path)
- local result = assert(io.popen(rspamc_command))
- result = result:read("*all")
- return result
-end
+ local rspamc_command = string.format("rspamc --connect %s -j --compact -n %s -t %.3f %s",
+ opts.connect, n_parallel, timeout, path)
+ local result = assert(io.popen(rspamc_command))
+ result = result:read("*all")
+ return result
+end
local function write_results(results, file)
- local f = io.open(file, 'w')
+ local f = io.open(file, 'w')
- for _, result in pairs(results) do
- local log_line = string.format("%s %.2f %s", result.type, result.score, result.action)
+ for _, result in pairs(results) do
+ local log_line = string.format("%s %.2f %s", result.type, result.score, result.action)
- for _, sym in pairs(result.symbols) do
- log_line = log_line .. " " .. sym
- end
+ for _, sym in pairs(result.symbols) do
+ log_line = log_line .. " " .. sym
+ end
- log_line = log_line .. " " .. result.scan_time .. " " .. file .. ':' .. result.filename
+ log_line = log_line .. " " .. result.scan_time .. " " .. file .. ':' .. result.filename
- log_line = log_line .. "\r\n"
+ log_line = log_line .. "\r\n"
- f:write(log_line)
- end
+ f:write(log_line)
+ end
- f:close()
+ f:close()
end
local function encoded_json_to_log(result)
- -- Returns table containing score, action, list of symbols
+ -- Returns table containing score, action, list of symbols
- local filtered_result = {}
- local parser = ucl.parser()
+ local filtered_result = {}
+ local parser = ucl.parser()
- local is_good, err = parser:parse_string(result)
+ local is_good, err = parser:parse_string(result)
- if not is_good then
- io.stderr:write(rspamd_logger.slog("Parser error: %1\n", err))
- return nil
- end
+ if not is_good then
+ rspamd_logger.errx("Parser error: %1", err)
+ return nil
+ end
- result = parser:get_object()
+ result = parser:get_object()
- filtered_result.score = result.score
- if not result.action then
- io.stderr:write(rspamd_logger.slog("Bad JSON: %1\n", result))
- return nil
- end
- local action = result.action:gsub("%s+", "_")
- filtered_result.action = action
+ filtered_result.score = result.score
+ if not result.action then
+ rspamd_logger.errx("Bad JSON: %1", result)
+ return nil
+ end
+ local action = result.action:gsub("%s+", "_")
+ filtered_result.action = action
- filtered_result.symbols = {}
+ filtered_result.symbols = {}
- for sym, _ in pairs(result.symbols) do
- table.insert(filtered_result.symbols, sym)
- end
+ for sym, _ in pairs(result.symbols) do
+ table.insert(filtered_result.symbols, sym)
+ end
- filtered_result.filename = result.filename
- filtered_result.scan_time = result.scan_time
+ filtered_result.filename = result.filename
+ filtered_result.scan_time = result.scan_time
- return filtered_result
+ return filtered_result
end
local function scan_results_to_logs(results, actual_email_type)
- local logs = {}
+ local logs = {}
- results = lua_util.rspamd_str_split(results, "\n")
+ results = lua_util.rspamd_str_split(results, "\n")
- if results[#results] == "" then
- results[#results] = nil
- end
+ if results[#results] == "" then
+ results[#results] = nil
+ end
- for _, result in pairs(results) do
- result = encoded_json_to_log(result)
- if result then
- result['type'] = actual_email_type
- table.insert(logs, result)
- end
+ for _, result in pairs(results) do
+ result = encoded_json_to_log(result)
+ if result then
+ result['type'] = actual_email_type
+ table.insert(logs, result)
end
+ end
- return logs
+ return logs
end
-return function (_, res)
-
- local ham_directory = res['ham_directory']
- local spam_directory = res['spam_directory']
- local connections = res["connections"]
- local output = res["output_location"]
+return function(args, res)
+ opts = default_opts
+ opts = lua_util.override_defaults(opts, getopt.getopt(args, ''))
+ local ham_directory = res['ham_directory']
+ local spam_directory = res['spam_directory']
+ local connections = res["connections"]
+ local output = res["output_location"]
- local results = {}
+ local results = {}
- local start_time = os.time()
- local no_of_ham = 0
- local no_of_spam = 0
+ local start_time = os.time()
+ local no_of_ham = 0
+ local no_of_spam = 0
- if ham_directory then
- io.write("Scanning ham corpus...\n")
- local ham_results = scan_email(connections, ham_directory, res["timeout"])
- ham_results = scan_results_to_logs(ham_results, HAM)
+ if ham_directory then
+ rspamd_logger.messagex("Scanning ham corpus...")
+ local ham_results = scan_email(connections, ham_directory, res["timeout"])
+ ham_results = scan_results_to_logs(ham_results, HAM)
- no_of_ham = #ham_results
+ no_of_ham = #ham_results
- for _, result in pairs(ham_results) do
- table.insert(results, result)
- end
+ for _, result in pairs(ham_results) do
+ table.insert(results, result)
end
+ end
- if spam_directory then
- io.write("Scanning spam corpus...\n")
- local spam_results = scan_email(connections, spam_directory, res.timeout)
- spam_results = scan_results_to_logs(spam_results, SPAM)
+ if spam_directory then
+ rspamd_logger.messagex("Scanning spam corpus...")
+ local spam_results = scan_email(connections, spam_directory, res.timeout)
+ spam_results = scan_results_to_logs(spam_results, SPAM)
- no_of_spam = #spam_results
+ no_of_spam = #spam_results
- for _, result in pairs(spam_results) do
- table.insert(results, result)
- end
+ for _, result in pairs(spam_results) do
+ table.insert(results, result)
end
-
- io.write(string.format("Writing results to %s\n", output))
- write_results(results, output)
-
- io.write("\nStats: \n")
- local elapsed_time = os.time() - start_time
- local total_msgs = no_of_ham + no_of_spam
- io.write(string.format("Elapsed time: %ds\n", os.time() - start_time))
- io.write(string.format("No of ham: %d\n", no_of_ham))
- io.write(string.format("No of spam: %d\n", no_of_spam))
- io.write(string.format("Messages/sec: %-.2f\n", (total_msgs/elapsed_time)))
+ end
+
+ rspamd_logger.messagex("Writing results to %s", output)
+ write_results(results, output)
+
+ rspamd_logger.messagex("Stats: ")
+ local elapsed_time = os.time() - start_time
+ local total_msgs = no_of_ham + no_of_spam
+ rspamd_logger.messagex("Elapsed time: %ss", elapsed_time)
+ rspamd_logger.messagex("No of ham: %s", no_of_ham)
+ rspamd_logger.messagex("No of spam: %s", no_of_spam)
+ rspamd_logger.messagex("Messages/sec: %s", (total_msgs / elapsed_time))
end
.run = rspamadm_corpus_test
};
-// TODO add -nparellel and -o options
static GOptionEntry entries[] = {
- {"ham", 'a', 0, G_OPTION_ARG_FILENAME, &ham_directory,
+ {"ham", 'h', 0, G_OPTION_ARG_FILENAME, &ham_directory,
"Ham directory", NULL},
{"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory,
"Spam directory", NULL},
};
static const char *
-rspamadm_corpus_test_help (gboolean full_help)
+rspamadm_corpus_test_help (gboolean full_help)
{
const char *help_str;
if (full_help) {
help_str = "Create logs files from email corpus\n\n"
- "Usage: rspamadm corpus_test [-a <ham_directory>]"
+ "Usage: rspamadm corpus_test [-h <ham_directory>]"
" [-s <spam_directory>]\n"
"Where option are:\n\n"
- "-a: path to ham directory\n"
+ "-h: path to ham directory\n"
"-s: path to spam directory\n"
"-n: maximum parallel connections\n"
"-o: log output file\n"
}
static void
-rspamadm_corpus_test (gint argc, gchar **argv)
+rspamadm_corpus_test (gint argc, gchar **argv)
{
GOptionContext *context;
GError *error = NULL;
context = g_option_context_new (
"corpus_test - create logs files from email corpus");
- g_option_context_set_summary (context,
+ g_option_context_set_summary (context,
"Summary:\n Rspamd administration utility version "
RVERSION
"\n Release id: "
RID);
-
+
g_option_context_add_main_entries (context, entries, NULL);
g_option_context_set_ignore_unknown_options (context, TRUE);
-
+
if (!g_option_context_parse (context, &argc, &argv, &error)) {
rspamd_fprintf (stderr, "option parsing failed: %s\n", error->message);
g_error_free (error);
L = rspamd_lua_init ();
rspamd_lua_set_path(L, NULL, ucl_vars);
-
obj = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (obj, ucl_object_fromstring (ham_directory),
"ham_directory", 0, false);