diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-19 09:32:47 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-19 09:32:47 +0100 |
commit | dd061a3fd83860f26522f0cc4e44e5be2488614d (patch) | |
tree | 08d8cd122894bcf2cde90f02ec3cde2a9be86ef0 /src | |
parent | 8778b569f84e566f4c1c059a3e42e47181fe2beb (diff) | |
download | rspamd-dd061a3fd83860f26522f0cc4e44e5be2488614d.tar.gz rspamd-dd061a3fd83860f26522f0cc4e44e5be2488614d.zip |
[Feature] Add min learns to classifiers
Diffstat (limited to 'src')
-rw-r--r-- | src/libserver/cfg_file.h | 1 | ||||
-rw-r--r-- | src/libserver/cfg_rcl.c | 6 | ||||
-rw-r--r-- | src/libstat/classifiers/bayes.c | 18 |
3 files changed, 25 insertions, 0 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 67749c276..01183af35 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -153,6 +153,7 @@ struct rspamd_classifier_config { gchar *name; /**< unique name of classifier */ guint32 min_tokens; /**< minimal number of tokens to process classifier */ guint32 max_tokens; /**< maximum number of tokens */ + guint min_learns; /**< minimum number of learns for each statfile */ guint flags; }; diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index e93ba1d45..db4d26b76 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -2086,6 +2086,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg) RSPAMD_CL_FLAG_INT_32, "Maximum count of tokens (words) to be considered for statistics"); rspamd_rcl_add_default_handler (sub, + "max_tokens", + rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct rspamd_classifier_config, min_learns), + RSPAMD_CL_FLAG_UINT, + "Minimum number of learns for each statfile to use this classifier"); + rspamd_rcl_add_default_handler (sub, "backend", rspamd_rcl_parse_struct_string, G_STRUCT_OFFSET (struct rspamd_classifier_config, backend), diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 912fa5c2d..867fe4dc6 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -211,6 +211,24 @@ bayes_classify (struct rspamd_classifier * ctx, memset (&cl, 0, sizeof (cl)); cl.task = task; + /* Check min learns */ + if (ctx->cfg->min_learns > 0) { + if (ctx->ham_learns < ctx->cfg->min_learns) { + msg_info_task ("skip classification as ham class has not enough " + "learns: %ul, %ud required", + ctx->ham_learns, ctx->cfg->min_learns); + + return TRUE; + } + if (ctx->spam_learns < ctx->cfg->min_learns) { + msg_info_task ("skip classification as spam class has not enough " + "learns: %ul, %ud required", + ctx->spam_learns, ctx->cfg->min_learns); + + return TRUE; + } + } + for (i = 0; i < tokens->len; i ++) { tok = g_ptr_array_index (tokens, i); |