aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-24 22:42:41 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-24 22:42:41 +0000
commit5cac9fc0e5d6b918d817d019be85e04543ba27f2 (patch)
tree8392fd993d4092777e58b2157440e84e488b3de8 /src/libstat
parentde474c927f3bdf4b809cb275a79c0a0a42bc501c (diff)
downloadrspamd-5cac9fc0e5d6b918d817d019be85e04543ba27f2.tar.gz
rspamd-5cac9fc0e5d6b918d817d019be85e04543ba27f2.zip
Add preprocessing routine for classifiers.
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/stat_internal.h6
-rw-r--r--src/libstat/stat_process.c105
2 files changed, 98 insertions, 13 deletions
diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h
index 6587f595e..54f7e13d9 100644
--- a/src/libstat/stat_internal.h
+++ b/src/libstat/stat_internal.h
@@ -31,18 +31,20 @@
struct rspamd_statfile_runtime {
struct rspamd_statfile_config *st;
- gpointer statfile_data;
+ gpointer backend_runtime;
guint64 hits;
guint64 total_hits;
};
struct rspamd_classifier_runtime {
+ struct rspamd_classifier_config *clcf;
+ struct rspamd_stat_classifier *cl;
double ham_prob;
double spam_prob;
guint64 total_spam;
guint64 total_ham;
guint64 processed_tokens;
- gsize max_tokens;
+ GList *st_runtime;
};
struct rspamd_token_result {
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 10a6079c1..fcc068e5b 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -36,42 +36,123 @@ struct rspamd_tokenizer_runtime {
struct rspamd_tokenizer_runtime *next;
};
+struct preprocess_cb_data {
+ GList *classifier_runtimes;
+ guint results_count;
+};
+
static gboolean
preprocess_init_stat_token (gpointer k, gpointer v, gpointer d)
{
rspamd_token_t *t = (rspamd_token_t *)v;
- struct rspamd_stat_ctx *st_ctx = (struct rspamd_stat_ctx *)d;
+ struct preprocess_cb_data *cbdata = (struct preprocess_cb_data *)d;
t->results = g_array_sized_new (FALSE, TRUE,
- sizeof (struct rspamd_token_result), st_ctx->statfiles);
+ sizeof (struct rspamd_token_result), cbdata->results_count);
+
+ /* TODO: add filling of results array */
return FALSE;
}
-static gboolean
+static GList*
rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,
struct rspamd_task *task, struct rspamd_tokenizer_runtime *tklist,
- GError **err)
+ lua_State *L, GError **err)
{
- struct rspamd_stat_classifier *cls;
struct rspamd_classifier_config *clcf;
- GList *cur;
+ struct rspamd_statfile_config *stcf;
struct rspamd_tokenizer_runtime *tok;
+ struct rspamd_classifier_runtime *cl_runtime;
+ struct rspamd_statfile_runtime *st_runtime;
+ struct rspamd_stat_backend *bk;
+ gpointer backend_runtime;
+ GList *cur, *st_list = NULL, *curst;
+ GList *cl_runtimes = NULL;
+ guint result_size = 0;
+ struct preprocess_cb_data cbdata;
cur = g_list_first (task->cfg->classifiers);
while (cur) {
clcf = (struct rspamd_classifier_config *)cur->data;
+ if (clcf->pre_callbacks != NULL) {
+ st_list = rspamd_lua_call_cls_pre_callbacks (clcf, task, FALSE,
+ FALSE, L);
+ }
+ if (st_list != NULL) {
+ rspamd_mempool_add_destructor (task->task_pool,
+ (rspamd_mempool_destruct_t)g_list_free, st_list);
+ }
+ else {
+ st_list = clcf->statfiles;
+ }
+
+ /* Now init runtime values */
+ cl_runtime = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cl_runtime));
+ cl_runtime->cl = rspamd_stat_get_classifier (clcf->classifier);
+
+ if (cl_runtime->cl == NULL) {
+ g_set_error (err, rspamd_stat_quark(), 500,
+ "classifier %s is not defined", clcf->classifier);
+ g_list_free (cl_runtimes);
+ return NULL;
+ }
+
+ cl_runtime->clcf = clcf;
+
+ curst = clcf->statfiles;
+ while (curst != NULL) {
+ stcf = (struct rspamd_statfile_config *)curst->data;
- cur = cur->next;
+ bk = rspamd_stat_get_backend (stcf->backend);
+
+ if (bk == NULL) {
+ msg_warn ("backend of type %s is not defined", stcf->backend);
+ curst = g_list_next (curst);
+ continue;
+ }
+
+ backend_runtime = bk->runtime (stcf, bk->ctx);
+
+ st_runtime = rspamd_mempool_alloc0 (task->task_pool,
+ sizeof (*st_runtime));
+ st_runtime->st = stcf;
+ st_runtime->backend_runtime = backend_runtime;
+
+ cl_runtime->st_runtime = g_list_prepend (cl_runtime->st_runtime,
+ st_runtime);
+ result_size ++;
+
+ curst = g_list_next (curst);
+ }
+
+ if (cl_runtime->st_runtime != NULL) {
+ rspamd_mempool_add_destructor (task->task_pool,
+ (rspamd_mempool_destruct_t)g_list_free,
+ cl_runtime->st_runtime);
+ cl_runtimes = g_list_prepend (cl_runtimes, cl_runtime);
+ }
+
+ cur = g_list_next (cur);
}
- LL_FOREACH (tklist, tok) {
- g_tree_foreach (tok->tokens, preprocess_init_stat_token, st_ctx);
+ if (cl_runtimes != NULL) {
+ rspamd_mempool_add_destructor (task->task_pool,
+ (rspamd_mempool_destruct_t)g_list_free,
+ cl_runtimes);
+
+ cbdata.results_count = result_size;
+ cbdata.classifier_runtimes = cl_runtimes;
+
+ /* Allocate token results */
+ LL_FOREACH (tklist, tok) {
+ g_tree_foreach (tok->tokens, preprocess_init_stat_token, &cbdata);
+ }
}
- return TRUE;
+ return cl_runtimes;
}
static struct rspamd_tokenizer_runtime *
@@ -162,6 +243,7 @@ rspamd_stat_classify (struct rspamd_task *task, lua_State *L, GError **err)
GList *cur;
struct rspamd_stat_ctx *st_ctx;
struct rspamd_tokenizer_runtime *tklist = NULL, *tok;
+ GList *cl_runtimes;
st_ctx = rspamd_stat_get_ctx ();
@@ -195,7 +277,8 @@ rspamd_stat_classify (struct rspamd_task *task, lua_State *L, GError **err)
}
/* Initialize classifiers and statfiles runtime */
- if (!rspamd_stat_preprocess (st_ctx, task, tklist, err)) {
+ if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, tklist, L, err))
+ == NULL) {
return FALSE;
}