diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-12-07 20:18:49 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-07 20:18:49 +0000 |
commit | 09f3015da643b82e24b054f1704aa6783bfc79e8 (patch) | |
tree | 96c522272ea9373433938c2dbc97ec25e3bf703f /src | |
parent | 1e929b744952674120545cbbd1643b6fd1910aab (diff) | |
parent | 703bd13d5bedc30ed9bbeb7180d3cd083fc0e1f4 (diff) | |
download | rspamd-09f3015da643b82e24b054f1704aa6783bfc79e8.tar.gz rspamd-09f3015da643b82e24b054f1704aa6783bfc79e8.zip |
Merge pull request #1946 from cpragadeesh/rescore-filter
[Feature] added corpus_test, rescore commands
Diffstat (limited to 'src')
-rw-r--r-- | src/rspamadm/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/rspamadm/commands.c | 4 | ||||
-rw-r--r-- | src/rspamadm/corpus_test.c | 121 | ||||
-rw-r--r-- | src/rspamadm/rescore.c | 141 |
4 files changed, 268 insertions, 0 deletions
diff --git a/src/rspamadm/CMakeLists.txt b/src/rspamadm/CMakeLists.txt index 7dfaad691..fb3f25229 100644 --- a/src/rspamadm/CMakeLists.txt +++ b/src/rspamadm/CMakeLists.txt @@ -10,10 +10,12 @@ SET(RSPAMADMSRC rspamadm.c control.c confighelp.c configwizard.c + corpus_test.c stat_convert.c signtool.c lua_repl.c dkim_keygen.c + rescore.c ${CMAKE_BINARY_DIR}/src/workers.c ${CMAKE_BINARY_DIR}/src/modules.c ${CMAKE_SOURCE_DIR}/src/controller.c diff --git a/src/rspamadm/commands.c b/src/rspamadm/commands.c index 1eaa45c81..410306fe3 100644 --- a/src/rspamadm/commands.c +++ b/src/rspamadm/commands.c @@ -29,6 +29,8 @@ extern struct rspamadm_command signtool_command; extern struct rspamadm_command lua_command; extern struct rspamadm_command dkim_keygen_command; extern struct rspamadm_command configwizard_command; +extern struct rspamadm_command corpus_test_command; +extern struct rspamadm_command rescore_command; const struct rspamadm_command *commands[] = { &help_command, @@ -46,6 +48,8 @@ const struct rspamadm_command *commands[] = { &lua_command, &dkim_keygen_command, &configwizard_command, + &corpus_test_command, + &rescore_command, NULL }; diff --git a/src/rspamadm/corpus_test.c b/src/rspamadm/corpus_test.c new file mode 100644 index 000000000..62aecb148 --- /dev/null +++ b/src/rspamadm/corpus_test.c @@ -0,0 +1,121 @@ +/*- + * Copyright 2017 Pragadeesh C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rspamadm.h" +#include "config.h" +#include "lua/lua_common.h" + +static gchar *ham_directory = NULL; +static gchar *spam_directory = NULL; +static gchar *output_location = "results.log"; +static gint connections = 10; + +static void rspamadm_corpus_test (gint argc, gchar **argv); +static const char *rspamadm_corpus_test_help (gboolean full_help); + +struct rspamadm_command corpus_test_command = { + .name = "corpus_test", + .flags = 0, + .help = rspamadm_corpus_test_help, + .run = rspamadm_corpus_test +}; + +// TODO add -nparellel and -o options +static GOptionEntry entries[] = { + {"ham", 'a', 0, G_OPTION_ARG_FILENAME, &ham_directory, + "Ham directory", NULL}, + {"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory, + "Spam directory", NULL}, + {"output", 'o', 0, G_OPTION_ARG_FILENAME, &output_location, + "Log output location", NULL}, + {"connections", 'n', 0, G_OPTION_ARG_INT, &connections, + "Number of parellel connections [Default: 10]", NULL}, + {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} +}; + +static const char * +rspamadm_corpus_test_help (gboolean full_help) +{ + const char *help_str; + + if (full_help) { + help_str = "Create logs files from email corpus\n\n" + "Usage: rspamadm corpus_test [-a <ham_directory>]" + " [-s <spam_directory>]\n" + "Where option are:\n\n" + "-a: path to ham directory\n" + "-s: path to spam directory\n" + "-n: maximum parellel connections\n" + "-o: log output file\n"; + + } + + else { + help_str = "Create logs files from email corpus"; + } + + return help_str; +} + +static void +rspamadm_corpus_test (gint argc, gchar **argv) +{ + GOptionContext *context; + GError *error = NULL; + lua_State *L; + ucl_object_t *obj; + + context = g_option_context_new ( + "corpus_test - Create logs files from email corpus"); + + g_option_context_set_summary (context, + "Summary:\n Rspamd administration utility version " + RVERSION + "\n Release id: " + RID); + + g_option_context_add_main_entries (context, entries, NULL); + g_option_context_set_ignore_unknown_options (context, TRUE); + + if (!g_option_context_parse (context, &argc, &argv, &error)) { + rspamd_fprintf (stderr, "option parsing failed: %s\n", error->message); + g_error_free (error); + exit(1); + } + + L = rspamd_lua_init (); + rspamd_lua_set_path(L, NULL, NULL); + + + obj = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (obj, ucl_object_fromstring (ham_directory), + "ham_directory", 0, false); + ucl_object_insert_key (obj, ucl_object_fromstring (spam_directory), + "spam_directory", 0, false); + ucl_object_insert_key (obj, ucl_object_fromstring (output_location), + "output_location", 0, false); + ucl_object_insert_key (obj, ucl_object_fromint (connections), + "connections", 0, false); + + rspamadm_execute_lua_ucl_subr (L, + argc, + argv, + obj, + "corpus_test"); + + lua_close (L); + ucl_object_unref (obj); +} diff --git a/src/rspamadm/rescore.c b/src/rspamadm/rescore.c new file mode 100644 index 000000000..de5ace272 --- /dev/null +++ b/src/rspamadm/rescore.c @@ -0,0 +1,141 @@ +/*- + * Copyright 2017 Pragadeesh C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rspamadm.h" +#include "config.h" +#include "lua/lua_common.h" + +#if !defined(WITH_TORCH) || !defined(WITH_LUAJIT) + #define HAS_TORCH false +#else + #define HAS_TORCH true +#endif + +static gchar *logdir = NULL; +static gchar *output = "new.scores"; +static gdouble threshold = 15; // Spam threshold +static gboolean score_diff = false; // Print score diff flag +static gint64 iters = 500; // Perceptron max iterations + +static void rspamadm_rescore (gint argc, gchar **argv); +static const char *rspamadm_rescore_help (gboolean full_help); + +struct rspamadm_command rescore_command = { + .name = "rescore", + .flags = 0, + .help = rspamadm_rescore_help, + .run = rspamadm_rescore +}; + +static GOptionEntry entries[] = { + {"logdir", 'l', 0, G_OPTION_ARG_FILENAME, &logdir, + "Logs directory", NULL}, + {"output", 'o', 0, G_OPTION_ARG_FILENAME, &output, + "Scores output locaiton", NULL}, + {"diff", 'd', 0, G_OPTION_ARG_NONE, &score_diff, + "Print score diff", NULL}, + {"iters", 'i', 0, G_OPTION_ARG_INT64, &iters, + "Max iterations for perceptron [Default: 500]", NULL}, + {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} +}; + +static const char * +rspamadm_rescore_help (gboolean full_help) +{ + + const char *help_str; + + if (full_help) { + help_str = "Estimate optimal symbol weights from log files\n\n" + "Usage: rspamadm rescore -l <log_directory>\n" + "Where options are:\n\n" + "-l: path to logs directory\n" + "-o: Scores output file location\n" + "-d: Print scores diff\n" + "-i: Max iterations for perceptron\n"; + } + + else { + help_str = "Estimate optimal symbol weights from log files"; + } + + return help_str; +} + +static void +rspamadm_rescore (gint argc, gchar **argv) +{ + + GOptionContext *context; + GError *error = NULL; + lua_State *L; + ucl_object_t *obj; + + context = g_option_context_new ( + "rescore - Estimate optimal symbol weights from log files"); + + g_option_context_set_summary (context, + "Summary:\n Rspamd administration utility version " + RVERSION + "\n Release id: " + RID); + + g_option_context_add_main_entries (context, entries, NULL); + g_option_context_set_ignore_unknown_options (context, TRUE); + + if (!g_option_context_parse (context, &argc, &argv, &error)) { + rspamd_fprintf (stderr, "option parsing failed: %s\n", error->message); + g_error_free (error); + exit(1); + } + + if (!HAS_TORCH) { + rspamd_fprintf (stderr, "Torch is not enabled. " + "Use -DENABLE_TORCH=ON option while running cmake.\n"); + exit (1); + } + + if (logdir == NULL) { + rspamd_fprintf (stderr, "Please specify log directory.\n"); + exit (1); + } + + L = rspamd_lua_init (); + + rspamd_lua_set_path(L, NULL, NULL); + + obj = ucl_object_typed_new (UCL_OBJECT); + + ucl_object_insert_key (obj, ucl_object_fromstring (logdir), + "logdir", 0, false); + ucl_object_insert_key (obj, ucl_object_fromstring (output), + "output", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (threshold), + "threshold", 0, false); + ucl_object_insert_key (obj, ucl_object_fromint (iters), + "iters", 0, false); + ucl_object_insert_key (obj, ucl_object_frombool (score_diff), + "diff", 0, false); + + rspamadm_execute_lua_ucl_subr (L, + argc, + argv, + obj, + "rescore"); + + lua_close (L); + ucl_object_unref (obj); +}
\ No newline at end of file |