gchar *name; /**< unique name of classifier */
guint32 min_tokens; /**< minimal number of tokens to process classifier */
guint32 max_tokens; /**< maximum number of tokens */
+ guint min_learns; /**< minimum number of learns for each statfile */
guint flags;
};
G_STRUCT_OFFSET (struct rspamd_classifier_config, max_tokens),
RSPAMD_CL_FLAG_INT_32,
"Maximum count of tokens (words) to be considered for statistics");
+ rspamd_rcl_add_default_handler (sub,
+ "max_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, min_learns),
+ RSPAMD_CL_FLAG_UINT,
+ "Minimum number of learns for each statfile to use this classifier");
rspamd_rcl_add_default_handler (sub,
"backend",
rspamd_rcl_parse_struct_string,
memset (&cl, 0, sizeof (cl));
cl.task = task;
+ /* Check min learns */
+ if (ctx->cfg->min_learns > 0) {
+ if (ctx->ham_learns < ctx->cfg->min_learns) {
+ msg_info_task ("skip classification as ham class has not enough "
+ "learns: %ul, %ud required",
+ ctx->ham_learns, ctx->cfg->min_learns);
+
+ return TRUE;
+ }
+ if (ctx->spam_learns < ctx->cfg->min_learns) {
+ msg_info_task ("skip classification as spam class has not enough "
+ "learns: %ul, %ud required",
+ ctx->spam_learns, ctx->cfg->min_learns);
+
+ return TRUE;
+ }
+ }
+
for (i = 0; i < tokens->len; i ++) {
tok = g_ptr_array_index (tokens, i);