@@ -174,6 +174,8 @@ struct rspamd_classifier_config { | |||
gchar *name; /**< unique name of classifier */ | |||
guint32 min_tokens; /**< minimal number of tokens to process classifier */ | |||
guint32 max_tokens; /**< maximum number of tokens */ | |||
guint min_token_hits; /**< minimum number of hits for a token to be considered */ | |||
gdouble min_prob_strength; /**< use only tokens with probability in [0.5 - MPS, 0.5 + MPS] */ | |||
guint min_learns; /**< minimum number of learns for each statfile */ | |||
guint flags; | |||
}; |
@@ -2147,6 +2147,18 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections) | |||
G_STRUCT_OFFSET (struct rspamd_classifier_config, min_tokens), | |||
RSPAMD_CL_FLAG_INT_32, | |||
"Minimum count of tokens (words) to be considered for statistics"); | |||
rspamd_rcl_add_default_handler (sub, | |||
"min_token_hits", | |||
rspamd_rcl_parse_struct_integer, | |||
G_STRUCT_OFFSET (struct rspamd_classifier_config, min_token_hits), | |||
RSPAMD_CL_FLAG_UINT, | |||
"Minimum number of hits for a token to be considered"); | |||
rspamd_rcl_add_default_handler (sub, | |||
"min_prob_strength", | |||
rspamd_rcl_parse_struct_double, | |||
G_STRUCT_OFFSET (struct rspamd_classifier_config, min_token_hits), | |||
0, | |||
"Use only tokens with probability in [0.5 - MPS, 0.5 + MPS]"); | |||
rspamd_rcl_add_default_handler (sub, | |||
"max_tokens", | |||
rspamd_rcl_parse_struct_integer, |
@@ -971,7 +971,10 @@ rspamd_config_new_classifier (struct rspamd_config *cfg, | |||
c = | |||
rspamd_mempool_alloc0 (cfg->cfg_pool, | |||
sizeof (struct rspamd_classifier_config)); | |||
c->min_prob_strength = 0.343; | |||
c->min_token_hits = 2; | |||
} | |||
if (c->labels == NULL) { | |||
c->labels = g_hash_table_new_full (rspamd_str_hash, | |||
rspamd_str_equal, |