aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-12-07 20:18:49 +0000
committerGitHub <noreply@github.com>2017-12-07 20:18:49 +0000
commit09f3015da643b82e24b054f1704aa6783bfc79e8 (patch)
tree96c522272ea9373433938c2dbc97ec25e3bf703f /src
parent1e929b744952674120545cbbd1643b6fd1910aab (diff)
parent703bd13d5bedc30ed9bbeb7180d3cd083fc0e1f4 (diff)
downloadrspamd-09f3015da643b82e24b054f1704aa6783bfc79e8.tar.gz
rspamd-09f3015da643b82e24b054f1704aa6783bfc79e8.zip
Merge pull request #1946 from cpragadeesh/rescore-filter
[Feature] added corpus_test, rescore commands
Diffstat (limited to 'src')
-rw-r--r--src/rspamadm/CMakeLists.txt2
-rw-r--r--src/rspamadm/commands.c4
-rw-r--r--src/rspamadm/corpus_test.c121
-rw-r--r--src/rspamadm/rescore.c141
4 files changed, 268 insertions, 0 deletions
diff --git a/src/rspamadm/CMakeLists.txt b/src/rspamadm/CMakeLists.txt
index 7dfaad691..fb3f25229 100644
--- a/src/rspamadm/CMakeLists.txt
+++ b/src/rspamadm/CMakeLists.txt
@@ -10,10 +10,12 @@ SET(RSPAMADMSRC rspamadm.c
control.c
confighelp.c
configwizard.c
+ corpus_test.c
stat_convert.c
signtool.c
lua_repl.c
dkim_keygen.c
+ rescore.c
${CMAKE_BINARY_DIR}/src/workers.c
${CMAKE_BINARY_DIR}/src/modules.c
${CMAKE_SOURCE_DIR}/src/controller.c
diff --git a/src/rspamadm/commands.c b/src/rspamadm/commands.c
index 1eaa45c81..410306fe3 100644
--- a/src/rspamadm/commands.c
+++ b/src/rspamadm/commands.c
@@ -29,6 +29,8 @@ extern struct rspamadm_command signtool_command;
extern struct rspamadm_command lua_command;
extern struct rspamadm_command dkim_keygen_command;
extern struct rspamadm_command configwizard_command;
+extern struct rspamadm_command corpus_test_command;
+extern struct rspamadm_command rescore_command;
const struct rspamadm_command *commands[] = {
&help_command,
@@ -46,6 +48,8 @@ const struct rspamadm_command *commands[] = {
&lua_command,
&dkim_keygen_command,
&configwizard_command,
+ &corpus_test_command,
+ &rescore_command,
NULL
};
diff --git a/src/rspamadm/corpus_test.c b/src/rspamadm/corpus_test.c
new file mode 100644
index 000000000..62aecb148
--- /dev/null
+++ b/src/rspamadm/corpus_test.c
@@ -0,0 +1,121 @@
+/*-
+ * Copyright 2017 Pragadeesh C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rspamadm.h"
+#include "config.h"
+#include "lua/lua_common.h"
+
+static gchar *ham_directory = NULL;
+static gchar *spam_directory = NULL;
+static gchar *output_location = "results.log";
+static gint connections = 10;
+
+static void rspamadm_corpus_test (gint argc, gchar **argv);
+static const char *rspamadm_corpus_test_help (gboolean full_help);
+
+struct rspamadm_command corpus_test_command = {
+ .name = "corpus_test",
+ .flags = 0,
+ .help = rspamadm_corpus_test_help,
+ .run = rspamadm_corpus_test
+};
+
+// TODO add -nparellel and -o options
+static GOptionEntry entries[] = {
+ {"ham", 'a', 0, G_OPTION_ARG_FILENAME, &ham_directory,
+ "Ham directory", NULL},
+ {"spam", 's', 0, G_OPTION_ARG_FILENAME, &spam_directory,
+ "Spam directory", NULL},
+ {"output", 'o', 0, G_OPTION_ARG_FILENAME, &output_location,
+ "Log output location", NULL},
+ {"connections", 'n', 0, G_OPTION_ARG_INT, &connections,
+ "Number of parellel connections [Default: 10]", NULL},
+ {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
+};
+
+static const char *
+rspamadm_corpus_test_help (gboolean full_help)
+{
+ const char *help_str;
+
+ if (full_help) {
+ help_str = "Create logs files from email corpus\n\n"
+ "Usage: rspamadm corpus_test [-a <ham_directory>]"
+ " [-s <spam_directory>]\n"
+ "Where option are:\n\n"
+ "-a: path to ham directory\n"
+ "-s: path to spam directory\n"
+ "-n: maximum parellel connections\n"
+ "-o: log output file\n";
+
+ }
+
+ else {
+ help_str = "Create logs files from email corpus";
+ }
+
+ return help_str;
+}
+
+static void
+rspamadm_corpus_test (gint argc, gchar **argv)
+{
+ GOptionContext *context;
+ GError *error = NULL;
+ lua_State *L;
+ ucl_object_t *obj;
+
+ context = g_option_context_new (
+ "corpus_test - Create logs files from email corpus");
+
+ g_option_context_set_summary (context,
+ "Summary:\n Rspamd administration utility version "
+ RVERSION
+ "\n Release id: "
+ RID);
+
+ g_option_context_add_main_entries (context, entries, NULL);
+ g_option_context_set_ignore_unknown_options (context, TRUE);
+
+ if (!g_option_context_parse (context, &argc, &argv, &error)) {
+ rspamd_fprintf (stderr, "option parsing failed: %s\n", error->message);
+ g_error_free (error);
+ exit(1);
+ }
+
+ L = rspamd_lua_init ();
+ rspamd_lua_set_path(L, NULL, NULL);
+
+
+ obj = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (obj, ucl_object_fromstring (ham_directory),
+ "ham_directory", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromstring (spam_directory),
+ "spam_directory", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromstring (output_location),
+ "output_location", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromint (connections),
+ "connections", 0, false);
+
+ rspamadm_execute_lua_ucl_subr (L,
+ argc,
+ argv,
+ obj,
+ "corpus_test");
+
+ lua_close (L);
+ ucl_object_unref (obj);
+}
diff --git a/src/rspamadm/rescore.c b/src/rspamadm/rescore.c
new file mode 100644
index 000000000..de5ace272
--- /dev/null
+++ b/src/rspamadm/rescore.c
@@ -0,0 +1,141 @@
+/*-
+ * Copyright 2017 Pragadeesh C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rspamadm.h"
+#include "config.h"
+#include "lua/lua_common.h"
+
+#if !defined(WITH_TORCH) || !defined(WITH_LUAJIT)
+ #define HAS_TORCH false
+#else
+ #define HAS_TORCH true
+#endif
+
+static gchar *logdir = NULL;
+static gchar *output = "new.scores";
+static gdouble threshold = 15; // Spam threshold
+static gboolean score_diff = false; // Print score diff flag
+static gint64 iters = 500; // Perceptron max iterations
+
+static void rspamadm_rescore (gint argc, gchar **argv);
+static const char *rspamadm_rescore_help (gboolean full_help);
+
+struct rspamadm_command rescore_command = {
+ .name = "rescore",
+ .flags = 0,
+ .help = rspamadm_rescore_help,
+ .run = rspamadm_rescore
+};
+
+static GOptionEntry entries[] = {
+ {"logdir", 'l', 0, G_OPTION_ARG_FILENAME, &logdir,
+ "Logs directory", NULL},
+ {"output", 'o', 0, G_OPTION_ARG_FILENAME, &output,
+ "Scores output locaiton", NULL},
+ {"diff", 'd', 0, G_OPTION_ARG_NONE, &score_diff,
+ "Print score diff", NULL},
+ {"iters", 'i', 0, G_OPTION_ARG_INT64, &iters,
+ "Max iterations for perceptron [Default: 500]", NULL},
+ {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
+};
+
+static const char *
+rspamadm_rescore_help (gboolean full_help)
+{
+
+ const char *help_str;
+
+ if (full_help) {
+ help_str = "Estimate optimal symbol weights from log files\n\n"
+ "Usage: rspamadm rescore -l <log_directory>\n"
+ "Where options are:\n\n"
+ "-l: path to logs directory\n"
+ "-o: Scores output file location\n"
+ "-d: Print scores diff\n"
+ "-i: Max iterations for perceptron\n";
+ }
+
+ else {
+ help_str = "Estimate optimal symbol weights from log files";
+ }
+
+ return help_str;
+}
+
+static void
+rspamadm_rescore (gint argc, gchar **argv)
+{
+
+ GOptionContext *context;
+ GError *error = NULL;
+ lua_State *L;
+ ucl_object_t *obj;
+
+ context = g_option_context_new (
+ "rescore - Estimate optimal symbol weights from log files");
+
+ g_option_context_set_summary (context,
+ "Summary:\n Rspamd administration utility version "
+ RVERSION
+ "\n Release id: "
+ RID);
+
+ g_option_context_add_main_entries (context, entries, NULL);
+ g_option_context_set_ignore_unknown_options (context, TRUE);
+
+ if (!g_option_context_parse (context, &argc, &argv, &error)) {
+ rspamd_fprintf (stderr, "option parsing failed: %s\n", error->message);
+ g_error_free (error);
+ exit(1);
+ }
+
+ if (!HAS_TORCH) {
+ rspamd_fprintf (stderr, "Torch is not enabled. "
+ "Use -DENABLE_TORCH=ON option while running cmake.\n");
+ exit (1);
+ }
+
+ if (logdir == NULL) {
+ rspamd_fprintf (stderr, "Please specify log directory.\n");
+ exit (1);
+ }
+
+ L = rspamd_lua_init ();
+
+ rspamd_lua_set_path(L, NULL, NULL);
+
+ obj = ucl_object_typed_new (UCL_OBJECT);
+
+ ucl_object_insert_key (obj, ucl_object_fromstring (logdir),
+ "logdir", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromstring (output),
+ "output", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromdouble (threshold),
+ "threshold", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromint (iters),
+ "iters", 0, false);
+ ucl_object_insert_key (obj, ucl_object_frombool (score_diff),
+ "diff", 0, false);
+
+ rspamadm_execute_lua_ucl_subr (L,
+ argc,
+ argv,
+ obj,
+ "rescore");
+
+ lua_close (L);
+ ucl_object_unref (obj);
+} \ No newline at end of file