aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libstat/backends/backends.h6
-rw-r--r--src/libstat/classifiers/bayes.c16
-rw-r--r--src/libstat/classifiers/classifiers.h25
-rw-r--r--src/libstat/stat_config.c56
-rw-r--r--src/libstat/stat_internal.h27
5 files changed, 90 insertions, 40 deletions
diff --git a/src/libstat/backends/backends.h b/src/libstat/backends/backends.h
index 4ac59655c..42eba8d61 100644
--- a/src/libstat/backends/backends.h
+++ b/src/libstat/backends/backends.h
@@ -42,7 +42,8 @@ struct rspamd_task;
struct rspamd_stat_backend {
const char *name;
- gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg);
+ gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg,
+ struct rspamd_statfile *st);
gpointer (*runtime)(struct rspamd_task *task,
struct rspamd_statfile_config *stcf, gboolean learn, gpointer ctx);
gboolean (*process_token)(struct rspamd_task *task, struct token_node_s *tok,
@@ -67,7 +68,8 @@ struct rspamd_stat_backend {
};
#define RSPAMD_STAT_BACKEND_DEF(name) \
- gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg); \
+ gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, \
+ struct rspamd_config *cfg, struct rspamd_statfile *st); \
gpointer rspamd_##name##_runtime (struct rspamd_task *task, \
struct rspamd_statfile_config *stcf, \
gboolean learn, gpointer ctx); \
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index 180aa4658..a271a424a 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -191,21 +191,13 @@ bayes_normalize_prob (gdouble x)
return a*x4 + b*x3 + c*x2 + d*xx;
}
-struct classifier_ctx *
-bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier_config *cfg)
+void
+bayes_init (rspamd_mempool_t *pool, struct rspamd_classifier *cl)
{
- struct classifier_ctx *ctx =
- rspamd_mempool_alloc (pool, sizeof (struct classifier_ctx));
-
- ctx->pool = pool;
- ctx->cfg = cfg;
- ctx->debug = FALSE;
-
- return ctx;
}
gboolean
-bayes_classify (struct classifier_ctx * ctx,
+bayes_classify (struct rspamd_classifier * ctx,
GTree *input,
struct rspamd_classifier_runtime *rt,
struct rspamd_task *task)
@@ -357,7 +349,7 @@ bayes_learn_ham_callback (gpointer key, gpointer value, gpointer data)
}
gboolean
-bayes_learn_spam (struct classifier_ctx * ctx,
+bayes_learn_spam (struct rspamd_classifier * ctx,
GTree *input,
struct rspamd_classifier_runtime *rt,
struct rspamd_task *task,
diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h
index 9a30039df..62abb0052 100644
--- a/src/libstat/classifiers/classifiers.h
+++ b/src/libstat/classifiers/classifiers.h
@@ -9,39 +9,32 @@
struct rspamd_classifier_config;
struct rspamd_task;
-
-/* Common classifier structure */
-struct classifier_ctx {
- rspamd_mempool_t *pool;
- GHashTable *results;
- gboolean debug;
- struct rspamd_classifier_config *cfg;
-};
+struct rspamd_classifier;
struct token_node_s;
struct rspamd_classifier_runtime;
struct rspamd_stat_classifier {
char *name;
- struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool,
- struct rspamd_classifier_config *cf);
- gboolean (*classify_func)(struct classifier_ctx * ctx,
+ void (*init_func)(rspamd_mempool_t *pool,
+ struct rspamd_classifier *cl);
+ gboolean (*classify_func)(struct rspamd_classifier * ctx,
GTree *input, struct rspamd_classifier_runtime *rt,
struct rspamd_task *task);
- gboolean (*learn_spam_func)(struct classifier_ctx * ctx,
+ gboolean (*learn_spam_func)(struct rspamd_classifier * ctx,
GTree *input, struct rspamd_classifier_runtime *rt,
struct rspamd_task *task, gboolean is_spam,
GError **err);
};
/* Bayes algorithm */
-struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
- struct rspamd_classifier_config *cf);
-gboolean bayes_classify (struct classifier_ctx * ctx,
+void bayes_init (rspamd_mempool_t *pool,
+ struct rspamd_classifier *);
+gboolean bayes_classify (struct rspamd_classifier * ctx,
GTree *input,
struct rspamd_classifier_runtime *rt,
struct rspamd_task *task);
-gboolean bayes_learn_spam (struct classifier_ctx * ctx,
+gboolean bayes_learn_spam (struct rspamd_classifier * ctx,
GTree *input,
struct rspamd_classifier_runtime *rt,
struct rspamd_task *task,
diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c
index dbfe16c27..a8afc1358 100644
--- a/src/libstat/stat_config.c
+++ b/src/libstat/stat_config.c
@@ -93,6 +93,11 @@ void
rspamd_stat_init (struct rspamd_config *cfg)
{
guint i;
+ GList *cur, *curst;
+ struct rspamd_classifier_config *clf;
+ struct rspamd_statfile_config *stf;
+ struct rspamd_stat_backend *bk;
+ struct rspamd_statfile *st;
if (stat_ctx == NULL) {
stat_ctx = g_slice_alloc0 (sizeof (*stat_ctx));
@@ -107,11 +112,52 @@ rspamd_stat_init (struct rspamd_config *cfg)
stat_ctx->caches = stat_caches;
stat_ctx->caches_count = G_N_ELEMENTS (stat_caches);
stat_ctx->cfg = cfg;
+ REF_RETAIN (stat_ctx->cfg);
+
+ /* Create statfiles from the classifiers */
+ cur = cfg->classifiers;
+
+ while (cur) {
+ clf = cur->data;
+ bk = rspamd_stat_get_backend (clf->backend);
+ g_assert (bk != NULL);
+
+ /* XXX:
+ * Here we get the first classifier tokenizer config as the only one
+ * We NO LONGER support multiple tokenizers per rspamd instance
+ */
+ if (stat_ctx->tkcf == NULL) {
+ stat_ctx->tokenizer = rspamd_stat_get_tokenizer (clf->tokenizer);
+ g_assert (stat_ctx->tokenizer != NULL);
+ stat_ctx->tkcf = stat_ctx->tokenizer->get_config (cfg->cfg_pool,
+ clf->tokenizer, NULL);
+ }
- /* Init backends */
- for (i = 0; i < stat_ctx->backends_count; i ++) {
- stat_ctx->backends[i].ctx = stat_ctx->backends[i].init (stat_ctx, cfg);
- msg_debug_config ("added backend %s", stat_ctx->backends[i].name);
+ curst = clf->statfiles;
+
+ while (curst) {
+ stf = curst->data;
+ st = g_slice_alloc0 (sizeof (*st));
+ st->clcf = clf;
+ st->stcf = stf;
+ st->tkcf = stat_ctx->tkcf;
+ st->bkcf = stat_ctx->backends[i].init (stat_ctx, cfg, st);
+ msg_debug_config ("added backend %s", stat_ctx->backends[i].name);
+
+ if (st->bkcf == NULL) {
+ msg_err_config ("cannot init backend %s for statfile %s",
+ clf->backend, stf->symbol);
+
+ g_slice_free1 (sizeof (*st), st);
+ }
+ else {
+ g_ptr_array_add (stat_ctx->statfiles, st);
+ }
+
+ curst = curst->next;
+ }
+
+ cur = cur->next;
}
/* Init caches */
@@ -135,6 +181,8 @@ rspamd_stat_close (void)
msg_debug_config ("closed backend %s", stat_ctx->backends[i].name);
}
}
+
+ REF_RELEASE (stat_ctx->cfg);
}
struct rspamd_stat_ctx *
diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h
index 640196788..18d412ce3 100644
--- a/src/libstat/stat_internal.h
+++ b/src/libstat/stat_internal.h
@@ -69,10 +69,20 @@ struct rspamd_classifier_runtime {
gboolean skipped;
};
-struct rspamd_token_result {
- double value;
- struct rspamd_statfile_runtime *st_runtime;
- struct rspamd_classifier_runtime *cl_runtime;
+/* Common classifier structure */
+struct rspamd_classifier {
+ struct rspamd_stat_cache *cache;
+ gpointer cachecf;
+ GArray *statfiles_ids;
+ struct rspamd_classifier_config *cfg;
+};
+
+struct rspamd_statfile {
+ gint id;
+ struct rspamd_statfile_config *stcf;
+ struct rspamd_classifier *classifier;
+ struct rspamd_stat_backend *backend;
+ gpointer bkcf;
};
#define RSPAMD_MAX_TOKEN_LEN 16
@@ -80,10 +90,11 @@ typedef struct token_node_s {
guchar data[RSPAMD_MAX_TOKEN_LEN];
guint window_idx;
guint datalen;
- GArray *results;
+ gdouble values[1];
} rspamd_token_t;
struct rspamd_stat_ctx {
+ /* Subroutines for all objects */
struct rspamd_stat_classifier *classifiers;
guint classifiers_count;
struct rspamd_stat_tokenizer *tokenizers;
@@ -93,8 +104,12 @@ struct rspamd_stat_ctx {
struct rspamd_stat_cache *caches;
guint caches_count;
- guint statfiles;
+ /* Runtime configuration */
+ GPtrArray *statfiles; /* struct statfile */
struct rspamd_config *cfg;
+ /* Global tokenizer */
+ struct rspamd_stat_tokenizer *tokenizer;
+ gpointer tkcf;
};
typedef enum rspamd_learn_cache_result {