aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libserver/cfg_file.h2
-rw-r--r--src/libserver/cfg_rcl.c12
-rw-r--r--src/libstat/stat_process.c26
3 files changed, 39 insertions, 1 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 611f1c371..a0eb149df 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -154,6 +154,8 @@ struct rspamd_classifier_config {
GList *pre_callbacks; /**< list of callbacks that are called before classification */
GList *post_callbacks; /**< list of callbacks that are called after classification */
gchar *name; /**< unique name of classifier */
+ guint32 min_tokens; /**< minimal number of tokens to process classifier */
+ guint32 max_tokens; /**< maximum number of tokens */
};
struct rspamd_worker_bind_conf {
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index 60ec7f641..53cd0fb0a 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -930,7 +930,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg,
{
const ucl_object_t *val, *cur;
ucl_object_iter_t it = NULL;
- const gchar *key, *type;
+ const gchar *key;
struct rspamd_classifier_config *ccf;
gboolean res = TRUE;
struct rspamd_rcl_section *stat_section;
@@ -1361,6 +1361,16 @@ rspamd_rcl_config_init (void)
rspamd_rcl_parse_struct_string,
G_STRUCT_OFFSET (struct rspamd_classifier_config, tokenizer),
0);
+ rspamd_rcl_add_default_handler (sub,
+ "min_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, min_tokens),
+ RSPAMD_CL_FLAG_INT_32);
+ rspamd_rcl_add_default_handler (sub,
+ "max_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, max_tokens),
+ RSPAMD_CL_FLAG_INT_32);
/*
* Statfile defaults
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 524640e4e..b20ad7fa7 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -37,7 +37,9 @@ struct rspamd_tokenizer_runtime {
};
struct preprocess_cb_data {
+ struct rspamd_task *task;
GList *classifier_runtimes;
+ struct rspamd_tokenizer_runtime *tok;
guint results_count;
};
@@ -59,6 +61,18 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)
while (cur) {
cl_runtime = (struct rspamd_classifier_runtime *)cur->data;
+
+ if (cl_runtime->clcf->min_tokens > 0 &&
+ (guint32)g_tree_nnodes (cbdata->tok->tokens) < cl_runtime->clcf->min_tokens) {
+ /* Skip this classifier */
+ msg_debug ("<%s> contains less tokens than required for %s classifier: "
+ "%ud < %ud", cbdata->task->message_id, cl_runtime->clcf->name,
+ g_tree_nnodes (cbdata->tok->tokens),
+ cl_runtime->clcf->min_tokens);
+ cur = g_list_next (cur);
+ continue;
+ }
+
res = &g_array_index (t->results, struct rspamd_token_result, i);
curst = res->cl_runtime->st_runtime;
@@ -72,6 +86,16 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)
if (st_runtime->backend->process_token (t, res,
st_runtime->backend->ctx)) {
cl_runtime->processed_tokens ++;
+
+ if (cl_runtime->clcf->max_tokens > 0 &&
+ cl_runtime->processed_tokens > cl_runtime->clcf->max_tokens) {
+ msg_debug ("<%s> contains more tokens than allowed for %s classifier: "
+ "%ud > %ud", cbdata->task, cl_runtime->clcf->name,
+ cl_runtime->processed_tokens,
+ cl_runtime->clcf->max_tokens);
+
+ return TRUE;
+ }
}
i ++;
@@ -192,9 +216,11 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,
cbdata.results_count = result_size;
cbdata.classifier_runtimes = cl_runtimes;
+ cbdata.task = task;
/* Allocate token results */
LL_FOREACH (tklist, tok) {
+ cbdata.tok = tok;
g_tree_foreach (tok->tokens, preprocess_init_stat_token, &cbdata);
}
}