summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-01-06 18:18:07 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-01-06 18:18:07 +0000
commit96fdcd7cc836df8b6ea99ca0551aab4adfe6179b (patch)
tree8f338637af97e53535e143964b2b4f827e11046b
parente3a8596ecf51168910c71ae2891ebb4450ffcb0f (diff)
downloadrspamd-96fdcd7cc836df8b6ea99ca0551aab4adfe6179b.tar.gz
rspamd-96fdcd7cc836df8b6ea99ca0551aab4adfe6179b.zip
Implement autolearn
-rw-r--r--src/libserver/task.c1
-rw-r--r--src/libstat/stat_api.h7
-rw-r--r--src/libstat/stat_process.c68
3 files changed, 76 insertions, 0 deletions
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 91ed48e86..bee7df22d 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -455,6 +455,7 @@ rspamd_task_process (struct rspamd_task *task, guint stages)
case RSPAMD_TASK_STAGE_POST_FILTERS:
rspamd_lua_call_post_filters (task);
+ rspamd_stat_check_autolearn (task);
break;
case RSPAMD_TASK_STAGE_LEARN:
diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h
index a63ee3734..28fbf2429 100644
--- a/src/libstat/stat_api.h
+++ b/src/libstat/stat_api.h
@@ -68,6 +68,13 @@ rspamd_stat_result_t rspamd_stat_classify (struct rspamd_task *task,
/**
+ * Check if a task should be learned and set the appropriate flags for it
+ * @param task
+ * @return
+ */
+gboolean rspamd_stat_check_autolearn (struct rspamd_task *task);
+
+/**
* Learn task as spam or ham, task must be processed prior to this call
* @param task task to learn
* @param spam if TRUE learn spam, otherwise learn ham
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 8d88540aa..6635f8aa7 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -26,6 +26,7 @@
#include "rspamd.h"
#include "stat_internal.h"
#include "libmime/message.h"
+#include "libmime/filter.h"
#include "libmime/images.h"
#include "libserver/html.h"
#include "lua/lua_common.h"
@@ -642,6 +643,73 @@ rspamd_stat_learn (struct rspamd_task *task,
return ret;
}
+gboolean
+rspamd_stat_check_autolearn (struct rspamd_task *task)
+{
+ struct rspamd_stat_ctx *st_ctx;
+ struct rspamd_classifier *cl;
+ const ucl_object_t *obj;
+ struct metric_result *mres;
+ guint i;
+ gboolean ret = FALSE;
+
+ g_assert (RSPAMD_TASK_IS_CLASSIFIED (task));
+ st_ctx = rspamd_stat_get_ctx ();
+ g_assert (st_ctx != NULL);
+
+ for (i = 0; i < st_ctx->classifiers->len; i ++) {
+ cl = g_ptr_array_index (st_ctx->classifiers, i);
+
+ if (cl->cfg->opts) {
+ obj = ucl_object_find_key (cl->cfg->opts, "autolearn");
+
+ /* TODO: support range and lua for this option */
+ if (ucl_object_type (obj) == UCL_BOOLEAN) {
+ if (ucl_object_toboolean (obj)) {
+ /*
+ * Default learning algorithm:
+ *
+ * - We learn spam if action is ACTION_REJECT
+ * - We learn ham if score is less than zero
+ */
+ mres = g_hash_table_lookup (task->results, DEFAULT_METRIC);
+
+ if (mres) {
+ mres->action = rspamd_check_action_metric (task,
+ mres->score,
+ &mres->required_score,
+ mres->metric);
+
+ if (mres->action == METRIC_ACTION_REJECT) {
+ task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM;
+ msg_info_task ("<%s>: autolearn spam for classifier "
+ "'%s' as message's "
+ "action is reject, score: %.2f",
+ task->message_id, cl->cfg->name,
+ mres->score);
+ ret = TRUE;
+ break;
+ }
+ else if (mres->score < 0) {
+ task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM;
+ msg_info_task ("<%s>: autolearn ham for classifier "
+ "'%s' as message's "
+ "score is negative: %.2f",
+ task->message_id, cl->cfg->name,
+ mres->score);
+
+ ret = TRUE;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
/**
* Get the overall statistics for all statfile backends
* @param cfg configuration