Implement autolearn

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Wed, 6 Jan 2016 18:18:07 +0000 (18:18 +0000)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Wed, 6 Jan 2016 18:18:07 +0000 (18:18 +0000)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 6 Jan 2016 18:18:07 +0000 (18:18 +0000)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 6 Jan 2016 18:18:07 +0000 (18:18 +0000)
diff --git a/src/libserver/task.c b/src/libserver/task.c

index 91ed48e8663c3db50cd3f1445fe2b6f143e1ebf5..bee7df22dd065f8c384c302ba174ae718f529f18 100644 (file)
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -455,6 +455,7 @@ rspamd_task_process (struct rspamd_task *task, guint stages)
  
         case RSPAMD_TASK_STAGE_POST_FILTERS:
                 rspamd_lua_call_post_filters (task);
+               rspamd_stat_check_autolearn (task);
                 break;
  
         case RSPAMD_TASK_STAGE_LEARN:
diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h

index a63ee373480cb03f2a36797433833c4e568f86cc..28fbf2429b6ef676f7a1d9c9b165d830a3323c97 100644 (file)
--- a/src/libstat/stat_api.h
+++ b/src/libstat/stat_api.h
@@ -67,6 +67,13 @@ rspamd_stat_result_t rspamd_stat_classify (struct rspamd_task *task,
                 lua_State *L, guint stage, GError **err);
  
  
+/**
+ * Check if a task should be learned and set the appropriate flags for it
+ * @param task
+ * @return
+ */
+gboolean rspamd_stat_check_autolearn (struct rspamd_task *task);
+
  /**
   * Learn task as spam or ham, task must be processed prior to this call
   * @param task task to learn
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c

index 8d88540aaa36023eb94fac6ac9a3a7ca138d63ac..6635f8aa748d8ab4d6594f0aa3da7946b3fb2c56 100644 (file)
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -26,6 +26,7 @@
  #include "rspamd.h"
  #include "stat_internal.h"
  #include "libmime/message.h"
+#include "libmime/filter.h"
  #include "libmime/images.h"
  #include "libserver/html.h"
  #include "lua/lua_common.h"
@@ -642,6 +643,73 @@ rspamd_stat_learn (struct rspamd_task *task,
         return ret;
  }
  
+gboolean
+rspamd_stat_check_autolearn (struct rspamd_task *task)
+{
+       struct rspamd_stat_ctx *st_ctx;
+       struct rspamd_classifier *cl;
+       const ucl_object_t *obj;
+       struct metric_result *mres;
+       guint i;
+       gboolean ret = FALSE;
+
+       g_assert (RSPAMD_TASK_IS_CLASSIFIED (task));
+       st_ctx = rspamd_stat_get_ctx ();
+       g_assert (st_ctx != NULL);
+
+       for (i = 0; i < st_ctx->classifiers->len; i ++) {
+               cl = g_ptr_array_index (st_ctx->classifiers, i);
+
+               if (cl->cfg->opts) {
+                       obj = ucl_object_find_key (cl->cfg->opts, "autolearn");
+
+                       /* TODO: support range and lua for this option */
+                       if (ucl_object_type (obj) == UCL_BOOLEAN) {
+                               if (ucl_object_toboolean (obj)) {
+                                       /*
+                                        * Default learning algorithm:
+                                        *
+                                        * - We learn spam if action is ACTION_REJECT
+                                        * - We learn ham if score is less than zero
+                                        */
+                                       mres = g_hash_table_lookup (task->results, DEFAULT_METRIC);
+
+                                       if (mres) {
+                                               mres->action = rspamd_check_action_metric (task,
+                                                               mres->score,
+                                                               &mres->required_score,
+                                                               mres->metric);
+
+                                               if (mres->action == METRIC_ACTION_REJECT) {
+                                                       task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM;
+                                                       msg_info_task ("<%s>: autolearn spam for classifier "
+                                                                       "'%s' as message's "
+                                                                       "action is reject, score: %.2f",
+                                                                       task->message_id, cl->cfg->name,
+                                                                       mres->score);
+                                                       ret = TRUE;
+                                                       break;
+                                               }
+                                               else if (mres->score < 0) {
+                                                       task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM;
+                                                       msg_info_task ("<%s>: autolearn ham for classifier "
+                                                                       "'%s' as message's "
+                                                                       "score is negative: %.2f",
+                                                                       task->message_id, cl->cfg->name,
+                                                                       mres->score);
+
+                                                       ret = TRUE;
+                                                       break;
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       return ret;
+}
+
  /**
   * Get the overall statistics for all statfile backends
   * @param cfg configuration
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Wed, 6 Jan 2016 18:18:07 +0000 (18:18 +0000)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Wed, 6 Jan 2016 18:18:07 +0000 (18:18 +0000)
src/libserver/task.c		patch \| blob \| history
src/libstat/stat_api.h		patch \| blob \| history
src/libstat/stat_process.c		patch \| blob \| history