]> source.dussan.org Git - rspamd.git/commitdiff
Implement min and max tokens.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 26 Jan 2015 13:22:59 +0000 (13:22 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 26 Jan 2015 13:22:59 +0000 (13:22 +0000)
src/libserver/cfg_file.h
src/libserver/cfg_rcl.c
src/libstat/stat_process.c

index 611f1c371de0e8f777b8680af1f6afa874f1c677..a0eb149df47511f1da6fae3d13fdaee48890cf75 100644 (file)
@@ -154,6 +154,8 @@ struct rspamd_classifier_config {
        GList *pre_callbacks;                           /**< list of callbacks that are called before classification */
        GList *post_callbacks;                          /**< list of callbacks that are called after classification */
        gchar *name;                                                                    /**< unique name of classifier                                                  */
+       guint32 min_tokens;                                                             /**< minimal number of tokens to process classifier     */
+       guint32 max_tokens;                                                             /**< maximum number of tokens                                                   */
 };
 
 struct rspamd_worker_bind_conf {
index 60ec7f641f796fbe355e9d72534129972ffb10bc..53cd0fb0acd51eb70e1c241f1a0aaf58188c8908 100644 (file)
@@ -930,7 +930,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg,
 {
        const ucl_object_t *val, *cur;
        ucl_object_iter_t it = NULL;
-       const gchar *key, *type;
+       const gchar *key;
        struct rspamd_classifier_config *ccf;
        gboolean res = TRUE;
        struct rspamd_rcl_section *stat_section;
@@ -1361,6 +1361,16 @@ rspamd_rcl_config_init (void)
                rspamd_rcl_parse_struct_string,
                G_STRUCT_OFFSET (struct rspamd_classifier_config, tokenizer),
                0);
+       rspamd_rcl_add_default_handler (sub,
+               "min_tokens",
+               rspamd_rcl_parse_struct_integer,
+               G_STRUCT_OFFSET (struct rspamd_classifier_config, min_tokens),
+               RSPAMD_CL_FLAG_INT_32);
+       rspamd_rcl_add_default_handler (sub,
+               "max_tokens",
+               rspamd_rcl_parse_struct_integer,
+               G_STRUCT_OFFSET (struct rspamd_classifier_config, max_tokens),
+               RSPAMD_CL_FLAG_INT_32);
 
        /*
         * Statfile defaults
index 524640e4e6e317db2c8522550fb9c12052bc7dc7..b20ad7fa70354b45c6165d65862fd8c65d2094c8 100644 (file)
@@ -37,7 +37,9 @@ struct rspamd_tokenizer_runtime {
 };
 
 struct preprocess_cb_data {
+       struct rspamd_task *task;
        GList *classifier_runtimes;
+       struct rspamd_tokenizer_runtime *tok;
        guint results_count;
 };
 
@@ -59,6 +61,18 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)
 
        while (cur) {
                cl_runtime = (struct rspamd_classifier_runtime *)cur->data;
+
+               if (cl_runtime->clcf->min_tokens > 0 &&
+                               (guint32)g_tree_nnodes (cbdata->tok->tokens) < cl_runtime->clcf->min_tokens) {
+                       /* Skip this classifier */
+                       msg_debug ("<%s> contains less tokens than required for %s classifier: "
+                                       "%ud < %ud", cbdata->task->message_id, cl_runtime->clcf->name,
+                                       g_tree_nnodes (cbdata->tok->tokens),
+                                       cl_runtime->clcf->min_tokens);
+                       cur = g_list_next (cur);
+                       continue;
+               }
+
                res = &g_array_index (t->results, struct rspamd_token_result, i);
 
                curst = res->cl_runtime->st_runtime;
@@ -72,6 +86,16 @@ preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)
                        if (st_runtime->backend->process_token (t, res,
                                        st_runtime->backend->ctx)) {
                                cl_runtime->processed_tokens ++;
+
+                               if (cl_runtime->clcf->max_tokens > 0 &&
+                                               cl_runtime->processed_tokens > cl_runtime->clcf->max_tokens) {
+                                       msg_debug ("<%s> contains more tokens than allowed for %s classifier: "
+                                                       "%ud > %ud", cbdata->task, cl_runtime->clcf->name,
+                                                       cl_runtime->processed_tokens,
+                                                       cl_runtime->clcf->max_tokens);
+
+                                       return TRUE;
+                               }
                        }
 
                        i ++;
@@ -192,9 +216,11 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,
 
                cbdata.results_count = result_size;
                cbdata.classifier_runtimes = cl_runtimes;
+               cbdata.task = task;
 
                /* Allocate token results */
                LL_FOREACH (tklist, tok) {
+                       cbdata.tok = tok;
                        g_tree_foreach (tok->tokens, preprocess_init_stat_token, &cbdata);
                }
        }