aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libserver/cfg_file.h1
-rw-r--r--src/libserver/cfg_rcl.c6
-rw-r--r--src/libstat/classifiers/bayes.c18
3 files changed, 25 insertions, 0 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 67749c276..01183af35 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -153,6 +153,7 @@ struct rspamd_classifier_config {
gchar *name; /**< unique name of classifier */
guint32 min_tokens; /**< minimal number of tokens to process classifier */
guint32 max_tokens; /**< maximum number of tokens */
+ guint min_learns; /**< minimum number of learns for each statfile */
guint flags;
};
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index e93ba1d45..db4d26b76 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -2086,6 +2086,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg)
RSPAMD_CL_FLAG_INT_32,
"Maximum count of tokens (words) to be considered for statistics");
rspamd_rcl_add_default_handler (sub,
+ "max_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, min_learns),
+ RSPAMD_CL_FLAG_UINT,
+ "Minimum number of learns for each statfile to use this classifier");
+ rspamd_rcl_add_default_handler (sub,
"backend",
rspamd_rcl_parse_struct_string,
G_STRUCT_OFFSET (struct rspamd_classifier_config, backend),
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index 912fa5c2d..867fe4dc6 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -211,6 +211,24 @@ bayes_classify (struct rspamd_classifier * ctx,
memset (&cl, 0, sizeof (cl));
cl.task = task;
+ /* Check min learns */
+ if (ctx->cfg->min_learns > 0) {
+ if (ctx->ham_learns < ctx->cfg->min_learns) {
+ msg_info_task ("skip classification as ham class has not enough "
+ "learns: %ul, %ud required",
+ ctx->ham_learns, ctx->cfg->min_learns);
+
+ return TRUE;
+ }
+ if (ctx->spam_learns < ctx->cfg->min_learns) {
+ msg_info_task ("skip classification as spam class has not enough "
+ "learns: %ul, %ud required",
+ ctx->spam_learns, ctx->cfg->min_learns);
+
+ return TRUE;
+ }
+ }
+
for (i = 0; i < tokens->len; i ++) {
tok = g_ptr_array_index (tokens, i);