From dd061a3fd83860f26522f0cc4e44e5be2488614d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 19 Apr 2016 09:32:47 +0100 Subject: [PATCH] [Feature] Add min learns to classifiers --- src/libserver/cfg_file.h | 1 + src/libserver/cfg_rcl.c | 6 ++++++ src/libstat/classifiers/bayes.c | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 67749c276..01183af35 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -153,6 +153,7 @@ struct rspamd_classifier_config { gchar *name; /**< unique name of classifier */ guint32 min_tokens; /**< minimal number of tokens to process classifier */ guint32 max_tokens; /**< maximum number of tokens */ + guint min_learns; /**< minimum number of learns for each statfile */ guint flags; }; diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index e93ba1d45..db4d26b76 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -2085,6 +2085,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg) G_STRUCT_OFFSET (struct rspamd_classifier_config, max_tokens), RSPAMD_CL_FLAG_INT_32, "Maximum count of tokens (words) to be considered for statistics"); + rspamd_rcl_add_default_handler (sub, + "max_tokens", + rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct rspamd_classifier_config, min_learns), + RSPAMD_CL_FLAG_UINT, + "Minimum number of learns for each statfile to use this classifier"); rspamd_rcl_add_default_handler (sub, "backend", rspamd_rcl_parse_struct_string, diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 912fa5c2d..867fe4dc6 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -211,6 +211,24 @@ bayes_classify (struct rspamd_classifier * ctx, memset (&cl, 0, sizeof (cl)); cl.task = task; + /* Check min learns */ + if (ctx->cfg->min_learns > 0) { + if (ctx->ham_learns < ctx->cfg->min_learns) { + msg_info_task ("skip classification as ham class has not enough " + "learns: %ul, %ud required", + ctx->ham_learns, ctx->cfg->min_learns); + + return TRUE; + } + if (ctx->spam_learns < ctx->cfg->min_learns) { + msg_info_task ("skip classification as spam class has not enough " + "learns: %ul, %ud required", + ctx->spam_learns, ctx->cfg->min_learns); + + return TRUE; + } + } + for (i = 0; i < tokens->len; i ++) { tok = g_ptr_array_index (tokens, i); -- 2.39.5