Browse Source

Implement min and max tokens.

tags/0.9.0
Vsevolod Stakhov 9 years ago
parent
commit
1503e994ec
3 changed files with 39 additions and 1 deletions
  1. 2
    0
      src/libserver/cfg_file.h
  2. 11
    1
      src/libserver/cfg_rcl.c
  3. 26
    0
      src/libstat/stat_process.c

+ 2
- 0
src/libserver/cfg_file.h View File

@@ -154,6 +154,8 @@ struct rspamd_classifier_config {
GList *pre_callbacks; /**< list of callbacks that are called before classification */
GList *post_callbacks; /**< list of callbacks that are called after classification */
gchar *name; /**< unique name of classifier */
guint32 min_tokens; /**< minimal number of tokens to process classifier */
guint32 max_tokens; /**< maximum number of tokens */
};

struct rspamd_worker_bind_conf {

+ 11
- 1
src/libserver/cfg_rcl.c View File

@@ -930,7 +930,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg,
{
const ucl_object_t *val, *cur;
ucl_object_iter_t it = NULL;
const gchar *key, *type;
const gchar *key;
struct rspamd_classifier_config *ccf;
gboolean res = TRUE;
struct rspamd_rcl_section *stat_section;
@@ -1361,6 +1361,16 @@ rspamd_rcl_config_init (void)
rspamd_rcl_parse_struct_string,
G_STRUCT_OFFSET (struct rspamd_classifier_config, tokenizer),
0);
rspamd_rcl_add_default_handler (sub,
"min_tokens",
rspamd_rcl_parse_struct_integer,
G_STRUCT_OFFSET (struct rspamd_classifier_config, min_tokens),
RSPAMD_CL_FLAG_INT_32);
rspamd_rcl_add_default_handler (sub,
"max_tokens",
rspamd_rcl_parse_struct_integer,
G_STRUCT_OFFSET (struct rspamd_classifier_config, max_tokens),
RSPAMD_CL_FLAG_INT_32);

/*
* Statfile defaults

+ 26
- 0
src/libstat/stat_process.c View File

@@ -37,7 +37,9 @@ struct rspamd_tokenizer_runtime {
};

struct preprocess_cb_data {
struct rspamd_task *task;
GList *classifier_runtimes;
struct rspamd_tokenizer_runtime *tok;
guint results_count;
};

@@ -59,6 +61,18 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)

while (cur) {
cl_runtime = (struct rspamd_classifier_runtime *)cur->data;

if (cl_runtime->clcf->min_tokens > 0 &&
(guint32)g_tree_nnodes (cbdata->tok->tokens) < cl_runtime->clcf->min_tokens) {
/* Skip this classifier */
msg_debug ("<%s> contains less tokens than required for %s classifier: "
"%ud < %ud", cbdata->task->message_id, cl_runtime->clcf->name,
g_tree_nnodes (cbdata->tok->tokens),
cl_runtime->clcf->min_tokens);
cur = g_list_next (cur);
continue;
}

res = &g_array_index (t->results, struct rspamd_token_result, i);

curst = res->cl_runtime->st_runtime;
@@ -72,6 +86,16 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)
if (st_runtime->backend->process_token (t, res,
st_runtime->backend->ctx)) {
cl_runtime->processed_tokens ++;

if (cl_runtime->clcf->max_tokens > 0 &&
cl_runtime->processed_tokens > cl_runtime->clcf->max_tokens) {
msg_debug ("<%s> contains more tokens than allowed for %s classifier: "
"%ud > %ud", cbdata->task, cl_runtime->clcf->name,
cl_runtime->processed_tokens,
cl_runtime->clcf->max_tokens);

return TRUE;
}
}

i ++;
@@ -192,9 +216,11 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,

cbdata.results_count = result_size;
cbdata.classifier_runtimes = cl_runtimes;
cbdata.task = task;

/* Allocate token results */
LL_FOREACH (tklist, tok) {
cbdata.tok = tok;
g_tree_foreach (tok->tokens, preprocess_init_stat_token, &cbdata);
}
}

Loading…
Cancel
Save