GList *pre_callbacks; /**< list of callbacks that are called before classification */
GList *post_callbacks; /**< list of callbacks that are called after classification */
gchar *name; /**< unique name of classifier */
+ guint32 min_tokens; /**< minimal number of tokens to process classifier */
+ guint32 max_tokens; /**< maximum number of tokens */
};
struct rspamd_worker_bind_conf {
{
const ucl_object_t *val, *cur;
ucl_object_iter_t it = NULL;
- const gchar *key, *type;
+ const gchar *key;
struct rspamd_classifier_config *ccf;
gboolean res = TRUE;
struct rspamd_rcl_section *stat_section;
rspamd_rcl_parse_struct_string,
G_STRUCT_OFFSET (struct rspamd_classifier_config, tokenizer),
0);
+ rspamd_rcl_add_default_handler (sub,
+ "min_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, min_tokens),
+ RSPAMD_CL_FLAG_INT_32);
+ rspamd_rcl_add_default_handler (sub,
+ "max_tokens",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_classifier_config, max_tokens),
+ RSPAMD_CL_FLAG_INT_32);
/*
* Statfile defaults
};
struct preprocess_cb_data {
+ struct rspamd_task *task;
GList *classifier_runtimes;
+ struct rspamd_tokenizer_runtime *tok;
guint results_count;
};
while (cur) {
cl_runtime = (struct rspamd_classifier_runtime *)cur->data;
+
+ if (cl_runtime->clcf->min_tokens > 0 &&
+ (guint32)g_tree_nnodes (cbdata->tok->tokens) < cl_runtime->clcf->min_tokens) {
+ /* Skip this classifier */
+ msg_debug ("<%s> contains less tokens than required for %s classifier: "
+ "%ud < %ud", cbdata->task->message_id, cl_runtime->clcf->name,
+ g_tree_nnodes (cbdata->tok->tokens),
+ cl_runtime->clcf->min_tokens);
+ cur = g_list_next (cur);
+ continue;
+ }
+
res = &g_array_index (t->results, struct rspamd_token_result, i);
curst = res->cl_runtime->st_runtime;
if (st_runtime->backend->process_token (t, res,
st_runtime->backend->ctx)) {
cl_runtime->processed_tokens ++;
+
+ if (cl_runtime->clcf->max_tokens > 0 &&
+ cl_runtime->processed_tokens > cl_runtime->clcf->max_tokens) {
+ msg_debug ("<%s> contains more tokens than allowed for %s classifier: "
+ "%ud > %ud", cbdata->task, cl_runtime->clcf->name,
+ cl_runtime->processed_tokens,
+ cl_runtime->clcf->max_tokens);
+
+ return TRUE;
+ }
}
i ++;
cbdata.results_count = result_size;
cbdata.classifier_runtimes = cl_runtimes;
+ cbdata.task = task;
/* Allocate token results */
LL_FOREACH (tklist, tok) {
+ cbdata.tok = tok;
g_tree_foreach (tok->tokens, preprocess_init_stat_token, &cbdata);
}
}