From 4aec836b76851fdedcc5af034b1854de321f76e3 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 8 Jan 2016 16:08:07 +0000 Subject: [PATCH] Implement the case of incrementing backends for bayes --- src/libserver/cfg_file.h | 6 ++++++ src/libstat/backends/redis_backend.c | 1 + src/libstat/classifiers/bayes.c | 27 ++++++++++++++++----------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index b03facd3a..7b49e4866 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -130,6 +130,12 @@ struct rspamd_tokenizer_config { /* Classifier has all integer values (e.g. bayes) */ #define RSPAMD_FLAG_CLASSIFIER_INTEGER (1 << 0) +/* + * Set if backend for a classifier is intended to increment and not set values + * (e.g. redis) + */ +#define RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND (1 << 1) + /** * Classifier config definition */ diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c index a83ab3081..240584a51 100644 --- a/src/libstat/backends/redis_backend.c +++ b/src/libstat/backends/redis_backend.c @@ -609,6 +609,7 @@ rspamd_redis_init (struct rspamd_stat_ctx *ctx, backend->timeout = REDIS_DEFAULT_TIMEOUT; } + stf->clcf->flags |= RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND; return (gpointer)backend; } diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 694898db5..3204efc16 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -311,10 +311,13 @@ bayes_learn_spam (struct rspamd_classifier * ctx, gint id; struct rspamd_statfile *st; rspamd_token_t *tok; + gboolean incrementing; g_assert (ctx != NULL); g_assert (tokens != NULL); + incrementing = ctx->cfg->flags & RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND; + for (i = 0; i < tokens->len; i++) { tok = g_ptr_array_index (tokens, i); @@ -323,24 +326,26 @@ bayes_learn_spam (struct rspamd_classifier * ctx, st = g_ptr_array_index (ctx->ctx->statfiles, id); g_assert (st != NULL); - if (is_spam) { - if (st->stcf->is_spam) { - tok->values[id]++; + if (!!st->stcf->is_spam == !!is_spam) { + if (incrementing) { + tok->values[id] = 1; } - else if (tok->values[id] > 0 && unlearn) { - /* Unlearning */ - tok->values[id]--; + else { + tok->values[id]++; } } - else { - if (!st->stcf->is_spam) { - tok->values[id]++; + else if (tok->values[id] > 0 && unlearn) { + /* Unlearning */ + if (incrementing) { + tok->values[id] = -1; } - else if (tok->values[id] > 0 && unlearn) { - /* Unlearning */ + else { tok->values[id]--; } } + else if (incrementing) { + tok->values[id] = 0; + } } } -- 2.39.5