[Project] Convert corpus_test to new format, document all options

This commit is contained in:
Vsevolod Stakhov 2018-05-29 15:24:26 +01:00
parent b72a6879c8
commit 099cb8fb09
5 changed files with 61 additions and 152 deletions

View File

@ -1,19 +1,48 @@
local rspamd_logger = require "rspamd_logger"
local ucl = require "ucl"
local lua_util = require "lua_util"
local getopt = require "getopt"
local argparse = require "argparse"
local parser = argparse()
:name "rspamadm corpus_test"
:description "Create logs files from email corpus"
:help_description_margin(32)
parser:option "-H --ham"
:description("Ham directory")
:argname("<dir>")
parser:option "-S --spam"
:description("Spam directory")
:argname("<dir>")
parser:option "-n --conns"
:description("Number of parallel connections")
:argname("<N>")
:default(10)
parser:option "-o --output"
:description("Output file")
:argname("<file>")
:default('results.log')
parser:option "-t --timeout"
:description("Timeout for client connections")
:argname("<sec>")
:default(60)
parser:option "-c --connect"
:description("Connect to specific host")
:argname("<host>")
:default('localhost:11334')
parser:option "-r --rspamc"
:description("Use specific rspamc path")
:argname("<path>")
:default('rspamc')
local HAM = "HAM"
local SPAM = "SPAM"
local opts
local default_opts = {
connect = 'localhost:11334',
}
local function scan_email(n_parallel, path, timeout)
local rspamc_command = string.format("rspamc --connect %s -j --compact -n %s -t %.3f %s",
opts.connect, n_parallel, timeout, path)
local rspamc_command = string.format("%s --connect %s -j --compact -n %s -t %.3f %s",
opts.rspamc, opts.connect, n_parallel, timeout, path)
local result = assert(io.popen(rspamc_command))
result = result:read("*all")
return result
@ -24,7 +53,8 @@ local function write_results(results, file)
local f = io.open(file, 'w')
for _, result in pairs(results) do
local log_line = string.format("%s %.2f %s", result.type, result.score, result.action)
local log_line = string.format("%s %.2f %s",
result.type, result.score, result.action)
for _, sym in pairs(result.symbols) do
log_line = log_line .. " " .. sym
@ -96,13 +126,12 @@ local function scan_results_to_logs(results, actual_email_type)
return logs
end
return function(args, res)
opts = default_opts
opts = lua_util.override_defaults(opts, getopt.getopt(args, ''))
local ham_directory = res['ham_directory']
local spam_directory = res['spam_directory']
local connections = res["connections"]
local output = res["output_location"]
local function handler(args)
opts = parser:parse(args)
local ham_directory = opts['ham_directory']
local spam_directory = opts['spam_directory']
local connections = opts["connections"]
local output = opts["output"]
local results = {}
@ -112,7 +141,7 @@ return function(args, res)
if ham_directory then
rspamd_logger.messagex("Scanning ham corpus...")
local ham_results = scan_email(connections, ham_directory, res["timeout"])
local ham_results = scan_email(connections, ham_directory, opts["timeout"])
ham_results = scan_results_to_logs(ham_results, HAM)
no_of_ham = #ham_results
@ -124,7 +153,7 @@ return function(args, res)
if spam_directory then
rspamd_logger.messagex("Scanning spam corpus...")
local spam_results = scan_email(connections, spam_directory, res.timeout)
local spam_results = scan_email(connections, spam_directory, opts.timeout)
spam_results = scan_results_to_logs(spam_results, SPAM)
no_of_spam = #spam_results
@ -145,3 +174,10 @@ return function(args, res)
rspamd_logger.messagex("No of spam: %s", no_of_spam)
rspamd_logger.messagex("Messages/sec: %s", (total_msgs / elapsed_time))
end
return {
name = 'corpus_test',
handler = handler,
description = parser._description
}

View File

@ -22,15 +22,17 @@ local parser = argparse()
:name "rspamadm grep"
:description "Search for patterns in rspamd logs"
:help_description_margin(30)
parser:option "-s --string"
:description('Plain string to search (case-insensitive)')
:argname "<str>"
parser:mutex(
parser:option "-s --string"
:description('Plain string to search (case-insensitive)')
:argname "<str>",
parser:option "-p --pattern"
:description('Pattern to search for (regex)')
:argname "<re>"
)
parser:flag "-l --lua"
:description('Use Lua patterns in string search')
parser:option "-p --pattern"
:description('Pattern to search for (regex)')
:args(1)
:argname "<re>"
parser:argument "input":args "*"
:description('Process specified inputs')
:default("stdin")

View File

@ -9,7 +9,6 @@ SET(RSPAMADMSRC rspamadm.c
control.c
confighelp.c
configwizard.c
corpus_test.c
stat_convert.c
signtool.c
lua_repl.c

View File

@ -31,7 +31,6 @@ extern struct rspamadm_command signtool_command;
extern struct rspamadm_command lua_command;
extern struct rspamadm_command dkim_keygen_command;
extern struct rspamadm_command configwizard_command;
extern struct rspamadm_command corpus_test_command;
extern struct rspamadm_command rescore_command;
const struct rspamadm_command *commands[] = {
@ -49,7 +48,6 @@ const struct rspamadm_command *commands[] = {
&lua_command,
&dkim_keygen_command,
&configwizard_command,
&corpus_test_command,
&rescore_command,
NULL
};

View File

@ -1,126 +0,0 @@
/*-
* Copyright 2017 Pragadeesh C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "rspamadm.h"
#include "config.h"
#include "lua/lua_common.h"
static gchar *ham_directory = NULL;
static gchar *spam_directory = NULL;
static gchar *output_location = "results.log";
static gint connections = 10;
static gdouble timeout = 60.0;
static void rspamadm_corpus_test (gint argc, gchar **argv,
const struct rspamadm_command *cmd);
static const char *rspamadm_corpus_test_help (gboolean full_help,
const struct rspamadm_command *cmd);
struct rspamadm_command corpus_test_command = {
.name = "corpus_test",
.flags = 0,
.help = rspamadm_corpus_test_help,
.run = rspamadm_corpus_test
};
static GOptionEntry entries[] = {
{"ham", 'h', 0, G_OPTION_ARG_FILENAME, &ham_directory,
"Ham directory", NULL},
{"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory,
"Spam directory", NULL},
{"output", 'o', 0, G_OPTION_ARG_FILENAME, &output_location,
"Log output location", NULL},
{"connections", 'n', 0, G_OPTION_ARG_INT, &connections,
"Number of parellel connections [Default: 10]", NULL},
{"timeout", 't', 0, G_OPTION_ARG_DOUBLE, &timeout,
"Timeout for connections [Default: 60]", NULL},
{NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
};
static const char *
rspamadm_corpus_test_help (gboolean full_help, const struct rspamadm_command *cmd)
{
const char *help_str;
if (full_help) {
help_str = "Create logs files from email corpus\n\n"
"Usage: rspamadm corpus_test [-h <ham_directory>]"
" [-s <spam_directory>]\n"
"Where option are:\n\n"
"-h: path to ham directory\n"
"-s: path to spam directory\n"
"-n: maximum parallel connections\n"
"-o: log output file\n"
"-t: timeout for rspamc operations (default: 60)\n";
}
else {
help_str = "Create logs files from email corpus";
}
return help_str;
}
static void
rspamadm_corpus_test (gint argc, gchar **argv, const struct rspamadm_command *cmd)
{
GOptionContext *context;
GError *error = NULL;
lua_State *L;
ucl_object_t *obj;
context = g_option_context_new (
"corpus_test - create logs files from email corpus");
g_option_context_set_summary (context,
"Summary:\n Rspamd administration utility version "
RVERSION
"\n Release id: "
RID);
g_option_context_add_main_entries (context, entries, NULL);
g_option_context_set_ignore_unknown_options (context, TRUE);
if (!g_option_context_parse (context, &argc, &argv, &error)) {
rspamd_fprintf (stderr, "option parsing failed: %s\n", error->message);
g_error_free (error);
exit(1);
}
L = rspamd_lua_init ();
rspamd_lua_set_path(L, NULL, ucl_vars);
obj = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (obj, ucl_object_fromstring (ham_directory),
"ham_directory", 0, false);
ucl_object_insert_key (obj, ucl_object_fromstring (spam_directory),
"spam_directory", 0, false);
ucl_object_insert_key (obj, ucl_object_fromstring (output_location),
"output_location", 0, false);
ucl_object_insert_key (obj, ucl_object_fromint (connections),
"connections", 0, false);
ucl_object_insert_key (obj, ucl_object_fromdouble (timeout),
"timeout", 0, false);
rspamadm_execute_lua_ucl_subr (L,
argc,
argv,
obj,
"corpus_test");
ucl_object_unref (obj);
}