From: Vsevolod Stakhov <vsevolod@highsecure.ru>
Date: Wed, 6 Jan 2016 23:03:25 +0000 (+0000)
Subject: Do not autolearn if we have the same class for this message
X-Git-Tag: 1.1.0~113
X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=5876bd0e9b4bea3154d305f013e5c1bd4c713511;p=rspamd.git

Do not autolearn if we have the same class for this message
---

diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 4045951eb..1d12c67ba 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -520,7 +520,7 @@ rspamd_stat_backends_learn (struct rspamd_stat_ctx *st_ctx,
 			}
 
 			if (!task->flags & RSPAMD_TASK_FLAG_UNLEARN) {
-				if (spam != st->stcf->is_spam) {
+				if (!!spam != !!st->stcf->is_spam) {
 					/* If we are not unlearning, then do not touch another class */
 					continue;
 				}
@@ -575,7 +575,7 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx,
 			}
 
 			if (!task->flags & RSPAMD_TASK_FLAG_UNLEARN) {
-				if (spam != st->stcf->is_spam) {
+				if (!!spam != !!st->stcf->is_spam) {
 					/* If we are not unlearning, then do not touch another class */
 					continue;
 				}
@@ -583,7 +583,7 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx,
 				st->backend->inc_learns (task, bk_run, st_ctx);
 			}
 			else {
-				if (spam == st->stcf->is_spam) {
+				if (!!spam == !!st->stcf->is_spam) {
 					st->backend->inc_learns (task, bk_run, st_ctx);
 				}
 				else {
@@ -643,6 +643,37 @@ rspamd_stat_learn (struct rspamd_task *task,
 	return ret;
 }
 
+static gboolean
+rspamd_stat_has_classifier_symbols (struct rspamd_task *task,
+		struct metric_result *mres,
+		struct rspamd_classifier *cl)
+{
+	guint i;
+	gint id;
+	struct rspamd_statfile *st;
+	struct rspamd_stat_ctx *st_ctx;
+	gboolean is_spam;
+
+	st_ctx = rspamd_stat_get_ctx ();
+	is_spam = !!(task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM);
+
+	for (i = 0; i < cl->statfiles_ids->len; i ++) {
+		id = g_array_index (cl->statfiles_ids, gint, i);
+		st = g_ptr_array_index (st_ctx->statfiles, id);
+
+		if (g_hash_table_lookup (mres->symbols, st->stcf->symbol)) {
+			if (is_spam == !!st->stcf->is_spam) {
+				msg_debug_task ("do not autolearn %s as symbol %s is already "
+						"added", st->stcf->symbol);
+
+				return TRUE;
+			}
+		}
+	}
+
+	return FALSE;
+}
+
 gboolean
 rspamd_stat_check_autolearn (struct rspamd_task *task)
 {
@@ -682,23 +713,38 @@ rspamd_stat_check_autolearn (struct rspamd_task *task)
 
 						if (mres->action == METRIC_ACTION_REJECT) {
 							task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM;
-							msg_info_task ("<%s>: autolearn spam for classifier "
-									"'%s' as message's "
-									"action is reject, score: %.2f",
-									task->message_id, cl->cfg->name,
-									mres->score);
+
 							ret = TRUE;
-							break;
 						}
 						else if (mres->score < 0) {
 							task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM;
-							msg_info_task ("<%s>: autolearn ham for classifier "
-									"'%s' as message's "
-									"score is negative: %.2f",
-									task->message_id, cl->cfg->name,
-									mres->score);
-
 							ret = TRUE;
+						}
+
+						/* Do not autolearn if we have this symbol already */
+						if (ret &&
+								rspamd_stat_has_classifier_symbols (task, mres, cl)) {
+							ret = FALSE;
+							task->flags &= ~(RSPAMD_TASK_FLAG_LEARN_HAM |
+									RSPAMD_TASK_FLAG_LEARN_SPAM);
+						}
+						else {
+							if (task->flags & RSPAMD_TASK_FLAG_LEARN_HAM) {
+								msg_info_task ("<%s>: autolearn ham for classifier "
+										"'%s' as message's "
+										"score is negative: %.2f",
+										task->message_id, cl->cfg->name,
+										mres->score);
+							}
+							else {
+								msg_info_task ("<%s>: autolearn spam for classifier "
+										"'%s' as message's "
+										"action is reject, score: %.2f",
+										task->message_id, cl->cfg->name,
+										mres->score);
+							}
+
+							task->classifier = cl->cfg->name;
 							break;
 						}
 					}