]> source.dussan.org Git - rspamd.git/commitdiff
Start rework
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Jan 2016 15:20:28 +0000 (15:20 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Jan 2016 15:20:28 +0000 (15:20 +0000)
src/libstat/backends/backends.h
src/libstat/classifiers/bayes.c
src/libstat/classifiers/classifiers.h
src/libstat/stat_config.c
src/libstat/stat_internal.h

index 4ac59655ccfcdd9e2e21ed5bc0c476cb2ef3ef77..42eba8d61dfdbe21296a27b46a45cb4d3ed7e7dd 100644 (file)
@@ -42,7 +42,8 @@ struct rspamd_task;
 
 struct rspamd_stat_backend {
        const char *name;
-       gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg);
+       gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg,
+                       struct rspamd_statfile *st);
        gpointer (*runtime)(struct rspamd_task *task,
                        struct rspamd_statfile_config *stcf, gboolean learn, gpointer ctx);
        gboolean (*process_token)(struct rspamd_task *task, struct token_node_s *tok,
@@ -67,7 +68,8 @@ struct rspamd_stat_backend {
 };
 
 #define RSPAMD_STAT_BACKEND_DEF(name) \
-               gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg); \
+               gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, \
+                       struct rspamd_config *cfg, struct rspamd_statfile *st); \
                gpointer rspamd_##name##_runtime (struct rspamd_task *task, \
                                struct rspamd_statfile_config *stcf, \
                                gboolean learn, gpointer ctx); \
index 180aa465855d0d1d6e3005188916a5e4cbaf53f1..a271a424a0784eacaa65b5a36572a62fb0b6540c 100644 (file)
@@ -191,21 +191,13 @@ bayes_normalize_prob (gdouble x)
        return a*x4 + b*x3 + c*x2 + d*xx;
 }
 
-struct classifier_ctx *
-bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier_config *cfg)
+void
+bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier *cl)
 {
-       struct classifier_ctx *ctx =
-               rspamd_mempool_alloc (pool, sizeof (struct classifier_ctx));
-
-       ctx->pool = pool;
-       ctx->cfg = cfg;
-       ctx->debug = FALSE;
-
-       return ctx;
 }
 
 gboolean
-bayes_classify (struct classifier_ctx * ctx,
+bayes_classify (struct rspamd_classifier * ctx,
        GTree *input,
        struct rspamd_classifier_runtime *rt,
        struct rspamd_task *task)
@@ -357,7 +349,7 @@ bayes_learn_ham_callback (gpointer key, gpointer value, gpointer data)
 }
 
 gboolean
-bayes_learn_spam (struct classifier_ctx * ctx,
+bayes_learn_spam (struct rspamd_classifier * ctx,
        GTree *input,
        struct rspamd_classifier_runtime *rt,
        struct rspamd_task *task,
index 9a30039df767e47486e3654d45adb734b7deef3e..62abb00521e6169228409ca7736a6fdda58dd6da 100644 (file)
@@ -9,39 +9,32 @@
 
 struct rspamd_classifier_config;
 struct rspamd_task;
-
-/* Common classifier structure */
-struct classifier_ctx {
-       rspamd_mempool_t *pool;
-       GHashTable *results;
-       gboolean debug;
-       struct rspamd_classifier_config *cfg;
-};
+struct rspamd_classifier;
 
 struct token_node_s;
 struct rspamd_classifier_runtime;
 
 struct rspamd_stat_classifier {
        char *name;
-       struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool,
-               struct rspamd_classifier_config *cf);
-       gboolean (*classify_func)(struct classifier_ctx * ctx,
+       void (*init_func)(rspamd_mempool_t *pool,
+               struct rspamd_classifier *cl);
+       gboolean (*classify_func)(struct rspamd_classifier * ctx,
                GTree *input, struct rspamd_classifier_runtime *rt,
                struct rspamd_task *task);
-       gboolean (*learn_spam_func)(struct classifier_ctx * ctx,
+       gboolean (*learn_spam_func)(struct rspamd_classifier * ctx,
                GTree *input, struct rspamd_classifier_runtime *rt,
                struct rspamd_task *task, gboolean is_spam,
                GError **err);
 };
 
 /* Bayes algorithm */
-struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
-       struct rspamd_classifier_config *cf);
-gboolean bayes_classify (struct classifier_ctx * ctx,
+void bayes_init (rspamd_mempool_t *pool,
+       struct rspamd_classifier *);
+gboolean bayes_classify (struct rspamd_classifier * ctx,
        GTree *input,
        struct rspamd_classifier_runtime *rt,
        struct rspamd_task *task);
-gboolean bayes_learn_spam (struct classifier_ctx * ctx,
+gboolean bayes_learn_spam (struct rspamd_classifier * ctx,
        GTree *input,
        struct rspamd_classifier_runtime *rt,
        struct rspamd_task *task,
index dbfe16c27323d932c74d5438860c1eef6e6c34dc..a8afc1358566104a588871fdf323ca95ccffdc7f 100644 (file)
@@ -93,6 +93,11 @@ void
 rspamd_stat_init (struct rspamd_config *cfg)
 {
        guint i;
+       GList *cur, *curst;
+       struct rspamd_classifier_config *clf;
+       struct rspamd_statfile_config *stf;
+       struct rspamd_stat_backend *bk;
+       struct rspamd_statfile *st;
 
        if (stat_ctx == NULL) {
                stat_ctx = g_slice_alloc0 (sizeof (*stat_ctx));
@@ -107,11 +112,52 @@ rspamd_stat_init (struct rspamd_config *cfg)
        stat_ctx->caches = stat_caches;
        stat_ctx->caches_count = G_N_ELEMENTS (stat_caches);
        stat_ctx->cfg = cfg;
+       REF_RETAIN (stat_ctx->cfg);
+
+       /* Create statfiles from the classifiers */
+       cur = cfg->classifiers;
+
+       while (cur) {
+               clf = cur->data;
+               bk = rspamd_stat_get_backend (clf->backend);
+               g_assert (bk != NULL);
+
+               /* XXX:
+                * Here we get the first classifier tokenizer config as the only one
+                * We NO LONGER support multiple tokenizers per rspamd instance
+                */
+               if (stat_ctx->tkcf == NULL) {
+                       stat_ctx->tokenizer = rspamd_stat_get_tokenizer (clf->tokenizer);
+                       g_assert (stat_ctx->tokenizer != NULL);
+                       stat_ctx->tkcf = stat_ctx->tokenizer->get_config (cfg->cfg_pool,
+                                       clf->tokenizer, NULL);
+               }
 
-       /* Init backends */
-       for (i = 0; i < stat_ctx->backends_count; i ++) {
-               stat_ctx->backends[i].ctx = stat_ctx->backends[i].init (stat_ctx, cfg);
-               msg_debug_config ("added backend %s", stat_ctx->backends[i].name);
+               curst = clf->statfiles;
+
+               while (curst) {
+                       stf = curst->data;
+                       st = g_slice_alloc0 (sizeof (*st));
+                       st->clcf = clf;
+                       st->stcf = stf;
+                       st->tkcf = stat_ctx->tkcf;
+                       st->bkcf = stat_ctx->backends[i].init (stat_ctx, cfg, st);
+                       msg_debug_config ("added backend %s", stat_ctx->backends[i].name);
+
+                       if (st->bkcf == NULL) {
+                               msg_err_config ("cannot init backend %s for statfile %s",
+                                               clf->backend, stf->symbol);
+
+                               g_slice_free1 (sizeof (*st), st);
+                       }
+                       else {
+                               g_ptr_array_add (stat_ctx->statfiles, st);
+                       }
+
+                       curst = curst->next;
+               }
+
+               cur = cur->next;
        }
 
        /* Init caches */
@@ -135,6 +181,8 @@ rspamd_stat_close (void)
                        msg_debug_config ("closed backend %s", stat_ctx->backends[i].name);
                }
        }
+
+       REF_RELEASE (stat_ctx->cfg);
 }
 
 struct rspamd_stat_ctx *
index 64019678836260c6f37aed6d18352b2f6eb66635..18d412ce31b28cfaf40a6407bc1190fde6a6a6fb 100644 (file)
@@ -69,10 +69,20 @@ struct rspamd_classifier_runtime {
        gboolean skipped;
 };
 
-struct rspamd_token_result {
-       double value;
-       struct rspamd_statfile_runtime *st_runtime;
-       struct rspamd_classifier_runtime *cl_runtime;
+/* Common classifier structure */
+struct rspamd_classifier {
+       struct rspamd_stat_cache *cache;
+       gpointer cachecf;
+       GArray *statfiles_ids;
+       struct rspamd_classifier_config *cfg;
+};
+
+struct rspamd_statfile {
+       gint id;
+       struct rspamd_statfile_config *stcf;
+       struct rspamd_classifier *classifier;
+       struct rspamd_stat_backend *backend;
+       gpointer bkcf;
 };
 
 #define RSPAMD_MAX_TOKEN_LEN 16
@@ -80,10 +90,11 @@ typedef struct token_node_s {
        guchar data[RSPAMD_MAX_TOKEN_LEN];
        guint window_idx;
        guint datalen;
-       GArray *results;
+       gdouble values[1];
 } rspamd_token_t;
 
 struct rspamd_stat_ctx {
+       /* Subroutines for all objects */
        struct rspamd_stat_classifier *classifiers;
        guint classifiers_count;
        struct rspamd_stat_tokenizer *tokenizers;
@@ -93,8 +104,12 @@ struct rspamd_stat_ctx {
        struct rspamd_stat_cache *caches;
        guint caches_count;
 
-       guint statfiles;
+       /* Runtime configuration */
+       GPtrArray *statfiles; /* struct statfile */
        struct rspamd_config *cfg;
+       /* Global tokenizer */
+       struct rspamd_stat_tokenizer *tokenizer;
+       gpointer tkcf;
 };
 
 typedef enum rspamd_learn_cache_result {