From 603f3664584a2942b718a16a2c303212fdc4871a Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 5 Jan 2016 15:20:28 +0000 Subject: [PATCH] Start rework --- src/libstat/backends/backends.h | 6 ++- src/libstat/classifiers/bayes.c | 16 ++------ src/libstat/classifiers/classifiers.h | 25 +++++------- src/libstat/stat_config.c | 56 +++++++++++++++++++++++++-- src/libstat/stat_internal.h | 27 ++++++++++--- 5 files changed, 90 insertions(+), 40 deletions(-) diff --git a/src/libstat/backends/backends.h b/src/libstat/backends/backends.h index 4ac59655c..42eba8d61 100644 --- a/src/libstat/backends/backends.h +++ b/src/libstat/backends/backends.h @@ -42,7 +42,8 @@ struct rspamd_task; struct rspamd_stat_backend { const char *name; - gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg); + gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg, + struct rspamd_statfile *st); gpointer (*runtime)(struct rspamd_task *task, struct rspamd_statfile_config *stcf, gboolean learn, gpointer ctx); gboolean (*process_token)(struct rspamd_task *task, struct token_node_s *tok, @@ -67,7 +68,8 @@ struct rspamd_stat_backend { }; #define RSPAMD_STAT_BACKEND_DEF(name) \ - gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg); \ + gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, \ + struct rspamd_config *cfg, struct rspamd_statfile *st); \ gpointer rspamd_##name##_runtime (struct rspamd_task *task, \ struct rspamd_statfile_config *stcf, \ gboolean learn, gpointer ctx); \ diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 180aa4658..a271a424a 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -191,21 +191,13 @@ bayes_normalize_prob (gdouble x) return a*x4 + b*x3 + c*x2 + d*xx; } -struct classifier_ctx * -bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier_config *cfg) +void +bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier *cl) { - struct classifier_ctx *ctx = - rspamd_mempool_alloc (pool, sizeof (struct classifier_ctx)); - - ctx->pool = pool; - ctx->cfg = cfg; - ctx->debug = FALSE; - - return ctx; } gboolean -bayes_classify (struct classifier_ctx * ctx, +bayes_classify (struct rspamd_classifier * ctx, GTree *input, struct rspamd_classifier_runtime *rt, struct rspamd_task *task) @@ -357,7 +349,7 @@ bayes_learn_ham_callback (gpointer key, gpointer value, gpointer data) } gboolean -bayes_learn_spam (struct classifier_ctx * ctx, +bayes_learn_spam (struct rspamd_classifier * ctx, GTree *input, struct rspamd_classifier_runtime *rt, struct rspamd_task *task, diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h index 9a30039df..62abb0052 100644 --- a/src/libstat/classifiers/classifiers.h +++ b/src/libstat/classifiers/classifiers.h @@ -9,39 +9,32 @@ struct rspamd_classifier_config; struct rspamd_task; - -/* Common classifier structure */ -struct classifier_ctx { - rspamd_mempool_t *pool; - GHashTable *results; - gboolean debug; - struct rspamd_classifier_config *cfg; -}; +struct rspamd_classifier; struct token_node_s; struct rspamd_classifier_runtime; struct rspamd_stat_classifier { char *name; - struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, - struct rspamd_classifier_config *cf); - gboolean (*classify_func)(struct classifier_ctx * ctx, + void (*init_func)(rspamd_mempool_t *pool, + struct rspamd_classifier *cl); + gboolean (*classify_func)(struct rspamd_classifier * ctx, GTree *input, struct rspamd_classifier_runtime *rt, struct rspamd_task *task); - gboolean (*learn_spam_func)(struct classifier_ctx * ctx, + gboolean (*learn_spam_func)(struct rspamd_classifier * ctx, GTree *input, struct rspamd_classifier_runtime *rt, struct rspamd_task *task, gboolean is_spam, GError **err); }; /* Bayes algorithm */ -struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, - struct rspamd_classifier_config *cf); -gboolean bayes_classify (struct classifier_ctx * ctx, +void bayes_init (rspamd_mempool_t *pool, + struct rspamd_classifier *); +gboolean bayes_classify (struct rspamd_classifier * ctx, GTree *input, struct rspamd_classifier_runtime *rt, struct rspamd_task *task); -gboolean bayes_learn_spam (struct classifier_ctx * ctx, +gboolean bayes_learn_spam (struct rspamd_classifier * ctx, GTree *input, struct rspamd_classifier_runtime *rt, struct rspamd_task *task, diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c index dbfe16c27..a8afc1358 100644 --- a/src/libstat/stat_config.c +++ b/src/libstat/stat_config.c @@ -93,6 +93,11 @@ void rspamd_stat_init (struct rspamd_config *cfg) { guint i; + GList *cur, *curst; + struct rspamd_classifier_config *clf; + struct rspamd_statfile_config *stf; + struct rspamd_stat_backend *bk; + struct rspamd_statfile *st; if (stat_ctx == NULL) { stat_ctx = g_slice_alloc0 (sizeof (*stat_ctx)); @@ -107,11 +112,52 @@ rspamd_stat_init (struct rspamd_config *cfg) stat_ctx->caches = stat_caches; stat_ctx->caches_count = G_N_ELEMENTS (stat_caches); stat_ctx->cfg = cfg; + REF_RETAIN (stat_ctx->cfg); + + /* Create statfiles from the classifiers */ + cur = cfg->classifiers; + + while (cur) { + clf = cur->data; + bk = rspamd_stat_get_backend (clf->backend); + g_assert (bk != NULL); + + /* XXX: + * Here we get the first classifier tokenizer config as the only one + * We NO LONGER support multiple tokenizers per rspamd instance + */ + if (stat_ctx->tkcf == NULL) { + stat_ctx->tokenizer = rspamd_stat_get_tokenizer (clf->tokenizer); + g_assert (stat_ctx->tokenizer != NULL); + stat_ctx->tkcf = stat_ctx->tokenizer->get_config (cfg->cfg_pool, + clf->tokenizer, NULL); + } - /* Init backends */ - for (i = 0; i < stat_ctx->backends_count; i ++) { - stat_ctx->backends[i].ctx = stat_ctx->backends[i].init (stat_ctx, cfg); - msg_debug_config ("added backend %s", stat_ctx->backends[i].name); + curst = clf->statfiles; + + while (curst) { + stf = curst->data; + st = g_slice_alloc0 (sizeof (*st)); + st->clcf = clf; + st->stcf = stf; + st->tkcf = stat_ctx->tkcf; + st->bkcf = stat_ctx->backends[i].init (stat_ctx, cfg, st); + msg_debug_config ("added backend %s", stat_ctx->backends[i].name); + + if (st->bkcf == NULL) { + msg_err_config ("cannot init backend %s for statfile %s", + clf->backend, stf->symbol); + + g_slice_free1 (sizeof (*st), st); + } + else { + g_ptr_array_add (stat_ctx->statfiles, st); + } + + curst = curst->next; + } + + cur = cur->next; } /* Init caches */ @@ -135,6 +181,8 @@ rspamd_stat_close (void) msg_debug_config ("closed backend %s", stat_ctx->backends[i].name); } } + + REF_RELEASE (stat_ctx->cfg); } struct rspamd_stat_ctx * diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h index 640196788..18d412ce3 100644 --- a/src/libstat/stat_internal.h +++ b/src/libstat/stat_internal.h @@ -69,10 +69,20 @@ struct rspamd_classifier_runtime { gboolean skipped; }; -struct rspamd_token_result { - double value; - struct rspamd_statfile_runtime *st_runtime; - struct rspamd_classifier_runtime *cl_runtime; +/* Common classifier structure */ +struct rspamd_classifier { + struct rspamd_stat_cache *cache; + gpointer cachecf; + GArray *statfiles_ids; + struct rspamd_classifier_config *cfg; +}; + +struct rspamd_statfile { + gint id; + struct rspamd_statfile_config *stcf; + struct rspamd_classifier *classifier; + struct rspamd_stat_backend *backend; + gpointer bkcf; }; #define RSPAMD_MAX_TOKEN_LEN 16 @@ -80,10 +90,11 @@ typedef struct token_node_s { guchar data[RSPAMD_MAX_TOKEN_LEN]; guint window_idx; guint datalen; - GArray *results; + gdouble values[1]; } rspamd_token_t; struct rspamd_stat_ctx { + /* Subroutines for all objects */ struct rspamd_stat_classifier *classifiers; guint classifiers_count; struct rspamd_stat_tokenizer *tokenizers; @@ -93,8 +104,12 @@ struct rspamd_stat_ctx { struct rspamd_stat_cache *caches; guint caches_count; - guint statfiles; + /* Runtime configuration */ + GPtrArray *statfiles; /* struct statfile */ struct rspamd_config *cfg; + /* Global tokenizer */ + struct rspamd_stat_tokenizer *tokenizer; + gpointer tkcf; }; typedef enum rspamd_learn_cache_result { -- 2.39.5