gchar *name; /**< unique name of classifier */
guint32 min_tokens; /**< minimal number of tokens to process classifier */
guint32 max_tokens; /**< maximum number of tokens */
+ guint min_token_hits; /**< minimum number of hits for a token to be considered */
+ gdouble min_prob_strength; /**< use only tokens with probability in [0.5 - MPS, 0.5 + MPS] */
guint min_learns; /**< minimum number of learns for each statfile */
guint flags;
};
G_STRUCT_OFFSET (struct rspamd_classifier_config, min_tokens),
RSPAMD_CL_FLAG_INT_32,
"Minimum count of tokens (words) to be considered for statistics");
+ rspamd_rcl_add_default_handler (sub,
+ "min_token_hits",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, min_token_hits),
+ RSPAMD_CL_FLAG_UINT,
+ "Minimum number of hits for a token to be considered");
+ rspamd_rcl_add_default_handler (sub,
+ "min_prob_strength",
+ rspamd_rcl_parse_struct_double,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, min_token_hits),
+ 0,
+ "Use only tokens with probability in [0.5 - MPS, 0.5 + MPS]");
rspamd_rcl_add_default_handler (sub,
"max_tokens",
rspamd_rcl_parse_struct_integer,
c =
rspamd_mempool_alloc0 (cfg->cfg_pool,
sizeof (struct rspamd_classifier_config));
+ c->min_prob_strength = 0.343;
+ c->min_token_hits = 2;
}
+
if (c->labels == NULL) {
c->labels = g_hash_table_new_full (rspamd_str_hash,
rspamd_str_equal,