summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-04-19 09:32:47 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-04-19 09:32:47 +0100
commitdd061a3fd83860f26522f0cc4e44e5be2488614d (patch)
tree08d8cd122894bcf2cde90f02ec3cde2a9be86ef0 /src
parent8778b569f84e566f4c1c059a3e42e47181fe2beb (diff)
downloadrspamd-dd061a3fd83860f26522f0cc4e44e5be2488614d.tar.gz
rspamd-dd061a3fd83860f26522f0cc4e44e5be2488614d.zip
[Feature] Add min learns to classifiers
Diffstat (limited to 'src')
-rw-r--r--src/libserver/cfg_file.h1
-rw-r--r--src/libserver/cfg_rcl.c6
-rw-r--r--src/libstat/classifiers/bayes.c18
3 files changed, 25 insertions, 0 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 67749c276..01183af35 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -153,6 +153,7 @@ struct rspamd_classifier_config {
gchar *name; /**< unique name of classifier */
guint32 min_tokens; /**< minimal number of tokens to process classifier */
guint32 max_tokens; /**< maximum number of tokens */
+ guint min_learns; /**< minimum number of learns for each statfile */
guint flags;
};
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index e93ba1d45..db4d26b76 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -2086,6 +2086,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg)
RSPAMD_CL_FLAG_INT_32,
"Maximum count of tokens (words) to be considered for statistics");
rspamd_rcl_add_default_handler (sub,
+ "max_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, min_learns),
+ RSPAMD_CL_FLAG_UINT,
+ "Minimum number of learns for each statfile to use this classifier");
+ rspamd_rcl_add_default_handler (sub,
"backend",
rspamd_rcl_parse_struct_string,
G_STRUCT_OFFSET (struct rspamd_classifier_config, backend),
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index 912fa5c2d..867fe4dc6 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -211,6 +211,24 @@ bayes_classify (struct rspamd_classifier * ctx,
memset (&cl, 0, sizeof (cl));
cl.task = task;
+ /* Check min learns */
+ if (ctx->cfg->min_learns > 0) {
+ if (ctx->ham_learns < ctx->cfg->min_learns) {
+ msg_info_task ("skip classification as ham class has not enough "
+ "learns: %ul, %ud required",
+ ctx->ham_learns, ctx->cfg->min_learns);
+
+ return TRUE;
+ }
+ if (ctx->spam_learns < ctx->cfg->min_learns) {
+ msg_info_task ("skip classification as spam class has not enough "
+ "learns: %ul, %ud required",
+ ctx->spam_learns, ctx->cfg->min_learns);
+
+ return TRUE;
+ }
+ }
+
for (i = 0; i < tokens->len; i ++) {
tok = g_ptr_array_index (tokens, i);