]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add min learns to classifiers
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 19 Apr 2016 08:32:47 +0000 (09:32 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 19 Apr 2016 08:32:47 +0000 (09:32 +0100)
src/libserver/cfg_file.h
src/libserver/cfg_rcl.c
src/libstat/classifiers/bayes.c

index 67749c27641dc7971f3030a1bf38944b31a750b4..01183af352452b5d837b6e5b1aa38156a5ef5818 100644 (file)
@@ -153,6 +153,7 @@ struct rspamd_classifier_config {
        gchar *name;                                                                    /**< unique name of classifier                                                  */
        guint32 min_tokens;                                                             /**< minimal number of tokens to process classifier     */
        guint32 max_tokens;                                                             /**< maximum number of tokens                                                   */
+       guint min_learns;                                                               /**< minimum number of learns for each statfile                 */
        guint flags;
 };
 
index e93ba1d45459d2de6ff78f5d9499aba464f180ce..db4d26b7685313861aba0ca1f235fd49d211e0c1 100644 (file)
@@ -2085,6 +2085,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg)
                        G_STRUCT_OFFSET (struct rspamd_classifier_config, max_tokens),
                        RSPAMD_CL_FLAG_INT_32,
                        "Maximum count of tokens (words) to be considered for statistics");
+       rspamd_rcl_add_default_handler (sub,
+                       "max_tokens",
+                       rspamd_rcl_parse_struct_integer,
+                       G_STRUCT_OFFSET (struct rspamd_classifier_config, min_learns),
+                       RSPAMD_CL_FLAG_UINT,
+                       "Minimum number of learns for each statfile to use this classifier");
        rspamd_rcl_add_default_handler (sub,
                        "backend",
                        rspamd_rcl_parse_struct_string,
index 912fa5c2d28737f8f09422309a467b94b6764b87..867fe4dc649da3e55398158ae2e9764a54df4291 100644 (file)
@@ -211,6 +211,24 @@ bayes_classify (struct rspamd_classifier * ctx,
        memset (&cl, 0, sizeof (cl));
        cl.task = task;
 
+       /* Check min learns */
+       if (ctx->cfg->min_learns > 0) {
+               if (ctx->ham_learns < ctx->cfg->min_learns) {
+                       msg_info_task ("skip classification as ham class has not enough "
+                                       "learns: %ul, %ud required",
+                                       ctx->ham_learns, ctx->cfg->min_learns);
+
+                       return TRUE;
+               }
+               if (ctx->spam_learns < ctx->cfg->min_learns) {
+                       msg_info_task ("skip classification as spam class has not enough "
+                                       "learns: %ul, %ud required",
+                                       ctx->spam_learns, ctx->cfg->min_learns);
+
+                       return TRUE;
+               }
+       }
+
        for (i = 0; i < tokens->len; i ++) {
                tok = g_ptr_array_index (tokens, i);