diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-01-06 18:18:07 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-01-06 18:18:07 +0000 |
commit | 96fdcd7cc836df8b6ea99ca0551aab4adfe6179b (patch) | |
tree | 8f338637af97e53535e143964b2b4f827e11046b | |
parent | e3a8596ecf51168910c71ae2891ebb4450ffcb0f (diff) | |
download | rspamd-96fdcd7cc836df8b6ea99ca0551aab4adfe6179b.tar.gz rspamd-96fdcd7cc836df8b6ea99ca0551aab4adfe6179b.zip |
Implement autolearn
-rw-r--r-- | src/libserver/task.c | 1 | ||||
-rw-r--r-- | src/libstat/stat_api.h | 7 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 68 |
3 files changed, 76 insertions, 0 deletions
diff --git a/src/libserver/task.c b/src/libserver/task.c index 91ed48e86..bee7df22d 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -455,6 +455,7 @@ rspamd_task_process (struct rspamd_task *task, guint stages) case RSPAMD_TASK_STAGE_POST_FILTERS: rspamd_lua_call_post_filters (task); + rspamd_stat_check_autolearn (task); break; case RSPAMD_TASK_STAGE_LEARN: diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h index a63ee3734..28fbf2429 100644 --- a/src/libstat/stat_api.h +++ b/src/libstat/stat_api.h @@ -68,6 +68,13 @@ rspamd_stat_result_t rspamd_stat_classify (struct rspamd_task *task, /** + * Check if a task should be learned and set the appropriate flags for it + * @param task + * @return + */ +gboolean rspamd_stat_check_autolearn (struct rspamd_task *task); + +/** * Learn task as spam or ham, task must be processed prior to this call * @param task task to learn * @param spam if TRUE learn spam, otherwise learn ham diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 8d88540aa..6635f8aa7 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -26,6 +26,7 @@ #include "rspamd.h" #include "stat_internal.h" #include "libmime/message.h" +#include "libmime/filter.h" #include "libmime/images.h" #include "libserver/html.h" #include "lua/lua_common.h" @@ -642,6 +643,73 @@ rspamd_stat_learn (struct rspamd_task *task, return ret; } +gboolean +rspamd_stat_check_autolearn (struct rspamd_task *task) +{ + struct rspamd_stat_ctx *st_ctx; + struct rspamd_classifier *cl; + const ucl_object_t *obj; + struct metric_result *mres; + guint i; + gboolean ret = FALSE; + + g_assert (RSPAMD_TASK_IS_CLASSIFIED (task)); + st_ctx = rspamd_stat_get_ctx (); + g_assert (st_ctx != NULL); + + for (i = 0; i < st_ctx->classifiers->len; i ++) { + cl = g_ptr_array_index (st_ctx->classifiers, i); + + if (cl->cfg->opts) { + obj = ucl_object_find_key (cl->cfg->opts, "autolearn"); + + /* TODO: support range and lua for this option */ + if (ucl_object_type (obj) == UCL_BOOLEAN) { + if (ucl_object_toboolean (obj)) { + /* + * Default learning algorithm: + * + * - We learn spam if action is ACTION_REJECT + * - We learn ham if score is less than zero + */ + mres = g_hash_table_lookup (task->results, DEFAULT_METRIC); + + if (mres) { + mres->action = rspamd_check_action_metric (task, + mres->score, + &mres->required_score, + mres->metric); + + if (mres->action == METRIC_ACTION_REJECT) { + task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM; + msg_info_task ("<%s>: autolearn spam for classifier " + "'%s' as message's " + "action is reject, score: %.2f", + task->message_id, cl->cfg->name, + mres->score); + ret = TRUE; + break; + } + else if (mres->score < 0) { + task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM; + msg_info_task ("<%s>: autolearn ham for classifier " + "'%s' as message's " + "score is negative: %.2f", + task->message_id, cl->cfg->name, + mres->score); + + ret = TRUE; + break; + } + } + } + } + } + } + + return ret; +} + /** * Get the overall statistics for all statfile backends * @param cfg configuration |