aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-02-13 17:03:10 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-02-13 17:03:10 +0000
commitcb0ded1e07c56db657cf1008338ef35f8697e6a6 (patch)
treeced2aba44d12be5b7966cd3d7ef7269fc13e27e4
parentfdff2c694ba91d9e96300b1020d577cfbe8fbd3b (diff)
downloadrspamd-cb0ded1e07c56db657cf1008338ef35f8697e6a6.tar.gz
rspamd-cb0ded1e07c56db657cf1008338ef35f8697e6a6.zip
Add ignore duplicate learnings logic.
-rw-r--r--src/libstat/stat_process.c23
1 files changed, 22 insertions, 1 deletions
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index dc58e0ac4..8492349a4 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -34,6 +34,7 @@ struct preprocess_cb_data {
GList *classifier_runtimes;
struct rspamd_tokenizer_runtime *tok;
guint results_count;
+ gboolean unlearn;
};
static struct rspamd_tokenizer_runtime *
@@ -450,8 +451,10 @@ rspamd_stat_learn (struct rspamd_task *task, gboolean spam, lua_State *L,
struct preprocess_cb_data cbdata;
GList *cl_runtimes;
GList *cur, *curst;
- gboolean ret = FALSE;
+ gboolean ret = FALSE, unlearn = FALSE;
gulong nrev;
+ rspamd_learn_t learn_res = RSPAMD_LEARN_OK;
+ guint i;
st_ctx = rspamd_stat_get_ctx ();
g_assert (st_ctx != NULL);
@@ -483,6 +486,23 @@ rspamd_stat_learn (struct rspamd_task *task, gboolean spam, lua_State *L,
cur = g_list_next (cur);
}
+ /* Check whether we have learned that file */
+ for (i = 0; i < st_ctx->caches_count; i ++) {
+ learn_res = st_ctx->caches[i].process (task, spam,
+ st_ctx->caches[i].ctx);
+
+ if (learn_res == RSPAMD_LEARN_INGORE) {
+ /* Do not learn twice */
+ g_set_error (err, rspamd_stat_quark (), 404, "<%s> has been already "
+ "learned as %s, ignore it", task->message_id,
+ spam ? "spam" : "ham");
+ return FALSE;
+ }
+ else if (learn_res == RSPAMD_LEARN_UNLEARN) {
+ unlearn = TRUE;
+ }
+ }
+
/* Initialize classifiers and statfiles runtime */
if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, tklist, L,
TRUE, spam, err)) == NULL) {
@@ -507,6 +527,7 @@ rspamd_stat_learn (struct rspamd_task *task, gboolean spam, lua_State *L,
cbdata.classifier_runtimes = cur;
cbdata.task = task;
cbdata.tok = cl_run->tok;
+ cbdata.unlearn = unlearn;
g_tree_foreach (cl_run->tok->tokens, rspamd_stat_learn_token,
&cbdata);