From: Vsevolod Stakhov Date: Mon, 26 Jan 2015 13:22:59 +0000 (+0000) Subject: Implement min and max tokens. X-Git-Tag: 0.9.0~833 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=1503e994ecace47a1cfc48a681744ac57a439a4a;p=rspamd.git Implement min and max tokens. --- diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 611f1c371..a0eb149df 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -154,6 +154,8 @@ struct rspamd_classifier_config { GList *pre_callbacks; /**< list of callbacks that are called before classification */ GList *post_callbacks; /**< list of callbacks that are called after classification */ gchar *name; /**< unique name of classifier */ + guint32 min_tokens; /**< minimal number of tokens to process classifier */ + guint32 max_tokens; /**< maximum number of tokens */ }; struct rspamd_worker_bind_conf { diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index 60ec7f641..53cd0fb0a 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -930,7 +930,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg, { const ucl_object_t *val, *cur; ucl_object_iter_t it = NULL; - const gchar *key, *type; + const gchar *key; struct rspamd_classifier_config *ccf; gboolean res = TRUE; struct rspamd_rcl_section *stat_section; @@ -1361,6 +1361,16 @@ rspamd_rcl_config_init (void) rspamd_rcl_parse_struct_string, G_STRUCT_OFFSET (struct rspamd_classifier_config, tokenizer), 0); + rspamd_rcl_add_default_handler (sub, + "min_tokens", + rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct rspamd_classifier_config, min_tokens), + RSPAMD_CL_FLAG_INT_32); + rspamd_rcl_add_default_handler (sub, + "max_tokens", + rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct rspamd_classifier_config, max_tokens), + RSPAMD_CL_FLAG_INT_32); /* * Statfile defaults diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 524640e4e..b20ad7fa7 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -37,7 +37,9 @@ struct rspamd_tokenizer_runtime { }; struct preprocess_cb_data { + struct rspamd_task *task; GList *classifier_runtimes; + struct rspamd_tokenizer_runtime *tok; guint results_count; }; @@ -59,6 +61,18 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d) while (cur) { cl_runtime = (struct rspamd_classifier_runtime *)cur->data; + + if (cl_runtime->clcf->min_tokens > 0 && + (guint32)g_tree_nnodes (cbdata->tok->tokens) < cl_runtime->clcf->min_tokens) { + /* Skip this classifier */ + msg_debug ("<%s> contains less tokens than required for %s classifier: " + "%ud < %ud", cbdata->task->message_id, cl_runtime->clcf->name, + g_tree_nnodes (cbdata->tok->tokens), + cl_runtime->clcf->min_tokens); + cur = g_list_next (cur); + continue; + } + res = &g_array_index (t->results, struct rspamd_token_result, i); curst = res->cl_runtime->st_runtime; @@ -72,6 +86,16 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d) if (st_runtime->backend->process_token (t, res, st_runtime->backend->ctx)) { cl_runtime->processed_tokens ++; + + if (cl_runtime->clcf->max_tokens > 0 && + cl_runtime->processed_tokens > cl_runtime->clcf->max_tokens) { + msg_debug ("<%s> contains more tokens than allowed for %s classifier: " + "%ud > %ud", cbdata->task, cl_runtime->clcf->name, + cl_runtime->processed_tokens, + cl_runtime->clcf->max_tokens); + + return TRUE; + } } i ++; @@ -192,9 +216,11 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx, cbdata.results_count = result_size; cbdata.classifier_runtimes = cl_runtimes; + cbdata.task = task; /* Allocate token results */ LL_FOREACH (tklist, tok) { + cbdata.tok = tok; g_tree_foreach (tok->tokens, preprocess_init_stat_token, &cbdata); } }