Fix statistics processing

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Tue, 5 Jan 2016 18:55:56 +0000 (18:55 +0000)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Tue, 5 Jan 2016 18:55:56 +0000 (18:55 +0000)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Jan 2016 18:55:56 +0000 (18:55 +0000)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Jan 2016 18:55:56 +0000 (18:55 +0000)
diff --git a/src/libserver/task.h b/src/libserver/task.h

index 359b2f41f284010b0355e88989f867a549e1698b..ed18d99d0795348b532f5310fb6448c0ade4eec0 100644 (file)
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -149,8 +149,7 @@ struct rspamd_task {
         GHashTable *raw_headers;                                                /**< list of raw headers                                                        */
         GHashTable *results;                                                    /**< hash table of metric_result indexed by
                                                                                                          *    metric's name                                                                     */
-       GHashTable *tokens;                                                             /**< hash table of tokens indexed by tokenizer
-                                                                                                        *    pointer                                                                           */
+       GPtrArray *tokens;                                                              /**< statistics tokens */
         InternetAddressList *rcpt_mime;                                 /**< list of all recipients                                                     */
         InternetAddressList *rcpt_envelope;                             /**< list of all recipients                                                     */
         InternetAddressList *from_mime;
@@ -158,7 +157,7 @@ struct rspamd_task {
  
         GList *messages;                                                                /**< list of messages that would be reported            */
         struct rspamd_re_runtime *re_rt;                                /**< regexp runtime                                                                     */
-       GList *cl_runtimes;                                                             /**< classifiers runtime                                                        */
+       GPtrArray *stat_runtimes;                                               /**< backend runtime                                                    */
         struct rspamd_config *cfg;                                              /**< pointer to config object                                           */
         GError *err;
         rspamd_mempool_t *task_pool;                                    /**< memory pool for task                                                       */
diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c

index 1cf19d412550d7f97ed3c661b4303d29598a50d9..32d09f4a99c61244bd2c49c62db306e98d550caa 100644 (file)
--- a/src/libstat/stat_config.c
+++ b/src/libstat/stat_config.c
@@ -135,6 +135,9 @@ rspamd_stat_init (struct rspamd_config *cfg)
                 cl->cfg = clf;
                 cl->ctx = stat_ctx;
                 cl->statfiles_ids = g_array_new (FALSE, FALSE, sizeof (gint));
+               cl->subrs = rspamd_stat_get_classifier (clf->name);
+               g_assert (cl->subrs != NULL);
+               cl->subrs->init_func (cfg->cfg_pool, cl);
  
                 /* Init classifier cache */
                 if (clf->opts) {
diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h

index edced84deda7000102ac435744c25a8d44f56c19..591c473814916b391c9842dc59b0d5ffe9ca6a39 100644 (file)
--- a/src/libstat/stat_internal.h
+++ b/src/libstat/stat_internal.h
@@ -46,6 +46,7 @@ struct rspamd_classifier {
         gulong spam_learns;
         gulong ham_learns;
         struct rspamd_classifier_config *cfg;
+       struct rspamd_stat_classifier *subrs;
  };
  
  struct rspamd_statfile {
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c

index 1506f4d489557d3b6c914520d15711977be572ce..8a426972713bd4b7a498247acbd4b22ef5dd42b8 100644 (file)
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2015, Vsevolod Stakhov
+/* Copyright (c) 2015-2016, Vsevolod Stakhov
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -37,19 +37,8 @@
  
  static const gint similarity_treshold = 80;
  
-#if 0
-struct preprocess_cb_data {
-       struct rspamd_task *task;
-       GList *classifier_runtimes;
-       struct rspamd_tokenizer_runtime *tok;
-       guint results_count;
-       gboolean unlearn;
-       gboolean spam;
-};
-
  static void
  rspamd_stat_tokenize_header (struct rspamd_task *task,
-               struct rspamd_tokenizer_runtime *tok,
                 const gchar *name, const gchar *prefix, GArray *ar)
  {
         struct raw_header *rh, *cur;
@@ -82,8 +71,8 @@ rspamd_stat_tokenize_header (struct rspamd_task *task,
  }
  
  static void
-rspamd_stat_tokenize_parts_metadata (struct rspamd_task *task,
-               struct rspamd_tokenizer_runtime *tok)
+rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx,
+               struct rspamd_task *task)
  {
         struct rspamd_image *img;
         struct mime_part *part;
@@ -165,16 +154,17 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_task *task,
         cur = g_list_first (task->cfg->classify_headers);
  
         while (cur) {
-               rspamd_stat_tokenize_header (task, tok, cur->data, "UA:", ar);
+               rspamd_stat_tokenize_header (task, cur->data, "UA:", ar);
  
                 cur = g_list_next (cur);
         }
  
-       tok->tokenizer->tokenize_func (tok,
+       st_ctx->tokenizer->tokenize_func (st_ctx,
                         task->task_pool,
                         ar,
                         TRUE,
-                       "META:");
+                       "META:",
+                       task->tokens);
  
         g_array_free (ar, TRUE);
  }
@@ -184,24 +174,36 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_task *task,
   */
  static void
  rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
-               struct rspamd_task *task, struct rspamd_tokenizer_runtime *tok)
+               struct rspamd_task *task)
  {
         struct mime_text_part *part;
         GArray *words;
         gchar *sub;
-       guint i;
+       guint i, reserved_len = 0;
         gint *pdiff;
-       gboolean compat;
  
-       compat = tok->tokenizer->is_compat (tok);
+       for (i = 0; i < task->text_parts->len; i++) {
+               part = g_ptr_array_index (task->text_parts, i);
+
+               if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) {
+                       reserved_len += part->normalized_words->len;
+               }
+               /* XXX: normal window size */
+               reserved_len += 5;
+       }
+
+       task->tokens = g_ptr_array_sized_new (reserved_len);
+       rspamd_mempool_add_destructor (task->task_pool,
+                       rspamd_ptr_array_free_hard, task->tokens);
         pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance");
  
         for (i = 0; i < task->text_parts->len; i ++) {
                 part = g_ptr_array_index (task->text_parts, i);
  
                 if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) {
-                       tok->tokenizer->tokenize_func (tok, task->task_pool,
-                                       part->normalized_words, IS_PART_UTF (part), NULL);
+                       st_ctx->tokenizer->tokenize_func (st_ctx, task->task_pool,
+                                       part->normalized_words, IS_PART_UTF (part),
+                                       NULL, task->tokens);
                 }
  
  
@@ -220,324 +222,118 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
         }
  
         if (sub != NULL) {
-               words = rspamd_tokenize_text (sub, strlen (sub), TRUE, NULL, NULL, compat,
+               words = rspamd_tokenize_text (sub, strlen (sub), TRUE, NULL, NULL, FALSE,
                                 NULL);
                 if (words != NULL) {
-                       tok->tokenizer->tokenize_func (tok,
+                       st_ctx->tokenizer->tokenize_func (st_ctx,
                                         task->task_pool,
                                         words,
                                         TRUE,
-                                       "SUBJECT");
+                                       "SUBJECT",
+                                       task->tokens);
                         g_array_free (words, TRUE);
                 }
         }
  
-       rspamd_stat_tokenize_parts_metadata (task, tok);
+       rspamd_stat_tokenize_parts_metadata (st_ctx, task);
  }
  
-static struct rspamd_tokenizer_runtime *
-rspamd_stat_get_tokenizer_runtime (struct rspamd_tokenizer_config *cf,
-               struct rspamd_stat_ctx *st_ctx,
-               struct rspamd_task *task,
-               struct rspamd_classifier_runtime *cl_runtime,
-               gpointer conf, gsize conf_len)
+static void
+rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,
+               struct rspamd_task *task, gboolean learn)
  {
-       struct rspamd_tokenizer_runtime *tok = NULL;
-       const gchar *name;
-
-       if (cf == NULL || cf->name == NULL) {
-               name = RSPAMD_DEFAULT_TOKENIZER;
-               cf->name = name;
-       }
-       else {
-               name = cf->name;
-       }
-
-       tok = rspamd_mempool_alloc (task->task_pool, sizeof (*tok));
-       tok->tokenizer = rspamd_stat_get_tokenizer (name);
-       tok->tkcf = cf;
-
-       if (tok->tokenizer == NULL) {
-               return NULL;
-       }
-
-       if (!tok->tokenizer->load_config (task->task_pool, tok, conf, conf_len)) {
-               return NULL;
-       }
+       guint i;
+       struct rspamd_statfile *st;
+       gpointer bk_run;
  
-       tok->tokens = g_tree_new (token_node_compare_func);
+       rspamd_stat_process_tokenize (st_ctx, task);
+       task->stat_runtimes = g_ptr_array_sized_new (st_ctx->statfiles->len);
         rspamd_mempool_add_destructor (task->task_pool,
-                       (rspamd_mempool_destruct_t)g_tree_destroy, tok->tokens);
-       tok->name = name;
-       rspamd_stat_process_tokenize (st_ctx, task, tok);
-       cl_runtime->tok = tok;
-
-       return tok;
-}
+                       rspamd_ptr_array_free_hard, task->stat_runtimes);
  
-static gboolean
-preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)
-{
-       rspamd_token_t *t = (rspamd_token_t *)v;
-       struct preprocess_cb_data *cbdata = (struct preprocess_cb_data *)d;
-       struct rspamd_statfile_runtime *st_runtime;
-       struct rspamd_classifier_runtime *cl_runtime;
-       struct rspamd_token_result *res;
-       GList *cur, *curst;
-       struct rspamd_task *task;
-       gint i = 0;
+       for (i = 0; i < st_ctx->statfiles->len; i ++) {
+               st = g_ptr_array_index (st_ctx->statfiles, i);
+               g_assert (st != NULL);
  
-       task = cbdata->task;
-       t->results = g_array_sized_new (FALSE, TRUE,
-                       sizeof (struct rspamd_token_result), cbdata->results_count);
-       g_array_set_size (t->results, cbdata->results_count);
-       rspamd_mempool_add_destructor (cbdata->task->task_pool,
-                       rspamd_array_free_hard, t->results);
-
-       cur = g_list_first (cbdata->classifier_runtimes);
+               bk_run = st->backend->runtime (task, st->stcf, learn, st->bkcf);
  
-       while (cur) {
-               cl_runtime = (struct rspamd_classifier_runtime *)cur->data;
-
-               if (cl_runtime->clcf->min_tokens > 0 &&
-                               (guint32)g_tree_nnodes (cbdata->tok->tokens) < cl_runtime->clcf->min_tokens) {
-                       /* Skip this classifier */
-                       cur = g_list_next (cur);
-                       cl_runtime->skipped = TRUE;
-                       continue;
+               if (bk_run == NULL) {
+                       msg_err_task ("cannot init backend %s for statfile %s",
+                                       st->backend->name, st->stcf->symbol);
                 }
  
-               curst = cl_runtime->st_runtime;
-
-               while (curst) {
-
-                       st_runtime = (struct rspamd_statfile_runtime *)curst->data;
-                       res = &g_array_index (t->results, struct rspamd_token_result, i);
-                       res->cl_runtime = cl_runtime;
-                       res->st_runtime = st_runtime;
-
-                       if (cl_runtime->backend->process_token (cbdata->task, t, res,
-                                       cl_runtime->backend->ctx)) {
-
-                               if (cl_runtime->clcf->max_tokens > 0 &&
-                                               cl_runtime->processed_tokens > cl_runtime->clcf->max_tokens) {
-                                       msg_debug_task ("message contains more tokens than allowed for %s classifier: "
-                                                       "%uL > %ud", cl_runtime->clcf->name,
-                                                       cl_runtime->processed_tokens,
-                                                       cl_runtime->clcf->max_tokens);
-
-                                       return TRUE;
-                               }
-                       }
-
-                       i ++;
-                       curst = g_list_next (curst);
-               }
-
-               cur = g_list_next (cur);
+               g_ptr_array_add (task->stat_runtimes, bk_run);
         }
-
-
-       return FALSE;
  }
  
-static GList*
-rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,
-               struct rspamd_task *task,
-               lua_State *L,
-               gint op,
-               gboolean spam,
-               const gchar *classifier,
-               GError **err)
+static void
+rspamd_stat_backends_process (struct rspamd_stat_ctx *st_ctx,
+               struct rspamd_task *task)
  {
-       struct rspamd_classifier_config *clcf;
-       struct rspamd_statfile_config *stcf;
-       struct rspamd_classifier_runtime *cl_runtime;
-       struct rspamd_statfile_runtime *st_runtime;
-       struct rspamd_stat_backend *bk;
-       gpointer backend_runtime, tok_config;
-       GList *cur, *st_list = NULL, *curst;
-       GList *cl_runtimes = NULL;
-       guint result_size = 0, start_pos = 0, end_pos = 0;
-       gsize conf_len;
-       struct preprocess_cb_data cbdata;
-
-       cur = g_list_first (task->cfg->classifiers);
-
-       while (cur) {
-               clcf = (struct rspamd_classifier_config *)cur->data;
-               st_list = NULL;
-
-               if (classifier != NULL &&
-                                       (clcf->name == NULL || strcmp (clcf->name, classifier) != 0)) {
-                       /* Skip this classifier */
-                       msg_debug_task ("skip classifier %s, as we are requested to check %s only",
-                                       clcf->name, classifier);
-                       cur = g_list_next (cur);
-                       continue;
-               }
-
-               if (clcf->pre_callbacks != NULL) {
-                       st_list = rspamd_lua_call_cls_pre_callbacks (clcf, task, FALSE,
-                                       FALSE, L);
-               }
-               if (st_list != NULL) {
-                       rspamd_mempool_add_destructor (task->task_pool,
-                                       (rspamd_mempool_destruct_t)g_list_free, st_list);
-               }
-               else {
-                       st_list = clcf->statfiles;
-               }
-
-               /* Now init runtime values */
-               cl_runtime = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cl_runtime));
-               cl_runtime->cl = rspamd_stat_get_classifier (clcf->classifier);
-
-               if (cl_runtime->cl == NULL) {
-                       g_set_error (err, rspamd_stat_quark(), 500,
-                                       "classifier %s is not defined", clcf->classifier);
-                       g_list_free (cl_runtimes);
-                       return NULL;
-               }
-
-               cl_runtime->clcf = clcf;
-
-               bk = rspamd_stat_get_backend (clcf->backend);
-               if (bk == NULL) {
-                       g_set_error (err, rspamd_stat_quark(), 500,
-                                       "backend %s is not defined", clcf->backend);
-                       g_list_free (cl_runtimes);
-                       return NULL;
-               }
-
-               cl_runtime->backend = bk;
-
-               curst = st_list;
-               while (curst != NULL) {
-                       stcf = (struct rspamd_statfile_config *)curst->data;
-
-                       /* On learning skip statfiles that do not belong to class */
-                       if (op == RSPAMD_LEARN_OP && (spam != stcf->is_spam)) {
-                               curst = g_list_next (curst);
-                               continue;
-                       }
-
-                       backend_runtime = bk->runtime (task, stcf, op != RSPAMD_CLASSIFY_OP,
-                                       bk->ctx);
-
-                       if (backend_runtime == NULL) {
-                               if (op != RSPAMD_CLASSIFY_OP) {
-                                       /* Assume backend absence as fatal error */
-                                       g_set_error (err, rspamd_stat_quark(), 500,
-                                                       "cannot open backend for statfile %s", stcf->symbol);
-                                       g_list_free (cl_runtimes);
-
-                                       return NULL;
-                               }
-                               else {
-                                       /* Just skip this element */
-                                       msg_warn ("backend of type %s does not exist: %s",
-                                                       clcf->backend, stcf->symbol);
-                                       curst = g_list_next (curst);
-                                       continue;
-                               }
-                       }
-
-                       tok_config = bk->load_tokenizer_config (backend_runtime,
-                                       &conf_len);
-
-                       if (cl_runtime->tok == NULL) {
-                               cl_runtime->tok = rspamd_stat_get_tokenizer_runtime (clcf->tokenizer,
-                                               st_ctx, task, cl_runtime, tok_config, conf_len);
-
-                               if (cl_runtime->tok == NULL) {
-                                       g_set_error (err, rspamd_stat_quark(), 500,
-                                                       "cannot initialize tokenizer for statfile %s", stcf->symbol);
-                                       g_list_free (cl_runtimes);
+       guint i;
+       struct rspamd_statfile *st;
+       struct rspamd_classifier *cl;
+       gpointer bk_run;
  
-                                       return NULL;
-                               }
-                       }
+       g_assert (task->stat_runtimes != NULL);
  
-                       if (!cl_runtime->tok->tokenizer->compatible_config (
-                                       cl_runtime->tok, tok_config, conf_len)) {
-                               g_set_error (err, rspamd_stat_quark(), 500,
-                                               "incompatible tokenizer for statfile %s", stcf->symbol);
-                               g_list_free (cl_runtimes);
+       for (i = 0; i < st_ctx->statfiles->len; i++) {
+               st = g_ptr_array_index (st_ctx->statfiles, i);
+               bk_run = g_ptr_array_index (task->stat_runtimes, i);
+               cl = st->classifier;
+               g_assert (st != NULL);
  
-                               return NULL;
-                       }
+               if (bk_run != NULL) {
+                       st->backend->process_tokens (task, task->tokens, i, bk_run);
  
-                       st_runtime = rspamd_mempool_alloc0 (task->task_pool,
-                                       sizeof (*st_runtime));
-                       st_runtime->st = stcf;
-                       st_runtime->backend_runtime = backend_runtime;
-
-                       if (stcf->is_spam) {
-                               cl_runtime->total_spam += bk->total_learns (task, backend_runtime,
-                                               bk->ctx);
+                       if (st->stcf->is_spam) {
+                               cl->spam_learns = st->backend->total_learns (task,
+                                               bk_run,
+                                               st_ctx);
                         }
                         else {
-                               cl_runtime->total_ham += bk->total_learns (task, backend_runtime,
-                                               bk->ctx);
+                               cl->ham_learns = st->backend->total_learns (task,
+                                               bk_run,
+                                               st_ctx);
                         }
-
-                       cl_runtime->st_runtime = g_list_prepend (cl_runtime->st_runtime,
-                                       st_runtime);
-                       result_size ++;
-
-                       curst = g_list_next (curst);
-                       end_pos ++;
-               }
-
-               if (cl_runtime->st_runtime != NULL) {
-                       rspamd_mempool_add_destructor (task->task_pool,
-                                       (rspamd_mempool_destruct_t)g_list_free,
-                                       cl_runtime->st_runtime);
-                       cl_runtimes = g_list_prepend (cl_runtimes, cl_runtime);
                 }
+       }
+}
  
-               /* Set positions in the results array */
-               cl_runtime->start_pos = start_pos;
-               cl_runtime->end_pos = end_pos;
+static void
+rspamd_stat_backends_post_process (struct rspamd_stat_ctx *st_ctx,
+               struct rspamd_task *task)
+{
+       guint i;
+       struct rspamd_statfile *st;
+       gpointer bk_run;
  
-               msg_debug_task ("added runtime for %s classifier from %ud to %ud",
-                               clcf->name, start_pos, end_pos);
+       g_assert (task->stat_runtimes != NULL);
  
-               start_pos = end_pos;
+       for (i = 0; i < st_ctx->statfiles->len; i++) {
+               st = g_ptr_array_index (st_ctx->statfiles, i);
+               bk_run = g_ptr_array_index (task->stat_runtimes, i);
+               g_assert (st != NULL);
  
-               /* Next classifier */
-               cur = g_list_next (cur);
+               if (bk_run != NULL) {
+                       st->backend->finalize_process (task, bk_run, st_ctx);
+               }
         }
+}
  
-       if (cl_runtimes != NULL) {
-               /* Reverse list as we have used g_list_prepend */
-               cl_runtimes = g_list_reverse (cl_runtimes);
-               rspamd_mempool_add_destructor (task->task_pool,
-                               (rspamd_mempool_destruct_t) g_list_free,
-                               cl_runtimes);
-               cur = g_list_first (cl_runtimes);
-
-               while (cur) {
-                       cl_runtime = cur->data;
+static void
+rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx,
+               struct rspamd_task *task)
+{
+       guint i;
+       struct rspamd_classifier *cl;
  
-                       cbdata.results_count = result_size;
-                       cbdata.classifier_runtimes = cl_runtimes;
-                       cbdata.task = task;
-                       cbdata.tok = cl_runtime->tok;
-                       g_tree_foreach (cbdata.tok->tokens, preprocess_init_stat_token,
-                                       &cbdata);
+       for (i = 0; i < st_ctx->classifiers->len; i++) {
+               cl = g_ptr_array_index (st_ctx->classifiers, i);
+               g_assert (cl != NULL);
  
-                       cur = g_list_next (cur);
-               }
+               cl->subrs->classify_func (cl, task->tokens, task);
         }
-       else if (classifier != NULL) {
-               /* We likely cannot find any classifier with this name */
-               g_set_error (err, rspamd_stat_quark (), 404,
-                               "cannot find classifier %s", classifier);
-       }
-
-       return cl_runtimes;
  }
  
  rspamd_stat_result_t
@@ -545,102 +341,30 @@ rspamd_stat_classify (struct rspamd_task *task, lua_State *L, guint stage,
                 GError **err)
  {
         struct rspamd_stat_ctx *st_ctx;
-       struct rspamd_statfile_runtime *st_run;
-       struct rspamd_classifier_runtime *cl_run;
-       GList *cl_runtimes;
-       GList *cur, *curst;
-       gboolean ret = RSPAMD_STAT_PROCESS_OK;
+       rspamd_stat_result_t ret = RSPAMD_STAT_PROCESS_OK;
  
         st_ctx = rspamd_stat_get_ctx ();
         g_assert (st_ctx != NULL);
-       cl_runtimes = task->cl_runtimes;
  
-       if (stage == RSPAMD_TASK_STAGE_CLASSIFIERS_PRE) {
-               /* Initialize classifiers and statfiles runtime */
-               if (task->cl_runtimes == NULL) {
-                       if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, L,
-                                       RSPAMD_CLASSIFY_OP, FALSE, NULL, err)) == NULL) {
-                               return RSPAMD_STAT_PROCESS_OK;
-                       }
-
-                       task->cl_runtimes = cl_runtimes;
-                       cur = cl_runtimes;
-
-                       /* Finalize backend so it can load tokens delayed if needed */
-                       while (cur) {
-                               cl_run = (struct rspamd_classifier_runtime *) cur->data;
-                               curst = cl_run->st_runtime;
-
-                               while (curst) {
-                                       st_run = curst->data;
-                                       cl_run->backend->finalize_process (task,
-                                                       st_run->backend_runtime,
-                                                       cl_run->backend->ctx);
-                                       curst = g_list_next (curst);
-                               }
  
-                               cur = g_list_next (cur);
-                       }
-               }
+       if (stage == RSPAMD_TASK_STAGE_CLASSIFIERS_PRE) {
+               /* Preprocess tokens */
+               rspamd_stat_preprocess (st_ctx, task, FALSE);
         }
         else if (stage == RSPAMD_TASK_STAGE_CLASSIFIERS) {
-               cur = cl_runtimes;
-
-               /* The first stage of classification */
-               while (cur) {
-                       cl_run = (struct rspamd_classifier_runtime *) cur->data;
-                       cl_run->stage = RSPAMD_STAT_STAGE_PRE;
-
-                       if (cl_run->cl) {
-                               cl_run->clctx = cl_run->cl->init_func (task->task_pool,
-                                               cl_run->clcf);
-
-                               if (cl_run->clctx != NULL) {
-                                       cl_run->cl->classify_func (cl_run->clctx, cl_run->tok->tokens,
-                                                       cl_run, task);
-                               }
-                       }
-
-                       cur = g_list_next (cur);
-               }
+               /* Process backends */
+               rspamd_stat_backends_process (st_ctx, task);
         }
         else if (stage == RSPAMD_TASK_STAGE_CLASSIFIERS_POST) {
-               cur = cl_runtimes;
-               /* The second stage of classification */
-               while (cur) {
-                       cl_run = (struct rspamd_classifier_runtime *) cur->data;
-                       cl_run->stage = RSPAMD_STAT_STAGE_POST;
-
-                       if (cl_run->skipped) {
-                               cur = g_list_next (cur);
-                               continue;
-                       }
-
-                       cl_run = (struct rspamd_classifier_runtime *) cur->data;
-                       cl_run->stage = RSPAMD_STAT_STAGE_POST;
-
-                       if (cl_run->skipped) {
-                               cur = g_list_next (cur);
-                               continue;
-                       }
-
-                       if (cl_run->cl) {
-                               if (cl_run->clctx != NULL) {
-                                       if (cl_run->cl->classify_func (cl_run->clctx,
-                                                       cl_run->tok->tokens,
-                                                       cl_run, task)) {
-                                               ret = RSPAMD_STAT_PROCESS_OK;
-                                       }
-                               }
-                       }
-
-                       cur = g_list_next (cur);
-               }
+               /* Process classifiers */
+               rspamd_stat_backends_post_process (st_ctx, task);
+               rspamd_stat_classifiers_process (st_ctx, task);
         }
  
         return ret;
  }
  
+#if 0
  static gboolean
  rspamd_stat_learn_token (gpointer k, gpointer v, gpointer d)
  {
@@ -911,4 +635,26 @@ rspamd_stat_result_t rspamd_stat_statistics (struct rspamd_task *task,
  
         return RSPAMD_STAT_PROCESS_OK;
  }
+#else
+/* TODO: finish learning */
+rspamd_stat_result_t rspamd_stat_learn (struct rspamd_task *task,
+               gboolean spam, lua_State *L, const gchar *classifier,
+               GError **err)
+{
+       return RSPAMD_STAT_PROCESS_ERROR;
+}
+
+/**
+ * Get the overall statistics for all statfile backends
+ * @param cfg configuration
+ * @param total_learns the total number of learns is stored here
+ * @return array of statistical information
+ */
+rspamd_stat_result_t rspamd_stat_statistics (struct rspamd_task *task,
+               struct rspamd_config *cfg,
+               guint64 *total_learns,
+               ucl_object_t **res)
+{
+       return RSPAMD_STAT_PROCESS_ERROR;
+}
  #endif
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Tue, 5 Jan 2016 18:55:56 +0000 (18:55 +0000)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Tue, 5 Jan 2016 18:55:56 +0000 (18:55 +0000)
src/libserver/task.h		patch \| blob \| history
src/libstat/stat_config.c		patch \| blob \| history
src/libstat/stat_internal.h		patch \| blob \| history
src/libstat/stat_process.c		patch \| blob \| history