aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-27 13:51:50 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-27 13:51:50 +0100
commit6f7007aca294790c8cb2bb3b784005dc1a2118a4 (patch)
treeaa2c60980dbd06a3df1281b6fbfb999b3e0978bd /src/libstat
parent043c5b7bcdb055c7f45034bd0a83408773c35bfd (diff)
downloadrspamd-6f7007aca294790c8cb2bb3b784005dc1a2118a4.tar.gz
rspamd-6f7007aca294790c8cb2bb3b784005dc1a2118a4.zip
Fix stat processing.
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/stat_config.c6
-rw-r--r--src/libstat/stat_process.c82
-rw-r--r--src/libstat/tokenizers/tokenizers.h4
3 files changed, 18 insertions, 74 deletions
diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c
index 9ae1b5bf8..9e65b68a5 100644
--- a/src/libstat/stat_config.c
+++ b/src/libstat/stat_config.c
@@ -46,14 +46,16 @@ static struct rspamd_stat_tokenizer stat_tokenizers[] = {
.get_config = rspamd_tokenizer_osb_get_config,
.compatible_config = rspamd_tokenizer_osb_compatible_config,
.tokenize_func = rspamd_tokenizer_osb,
- .load_config = rspamd_tokenizer_osb_load_config
+ .load_config = rspamd_tokenizer_osb_load_config,
+ .is_compat = rspamd_tokenizer_osb_is_compat
},
{
.name = "osb",
.get_config = rspamd_tokenizer_osb_get_config,
.compatible_config = rspamd_tokenizer_osb_compatible_config,
.tokenize_func = rspamd_tokenizer_osb,
- .load_config = rspamd_tokenizer_osb_load_config
+ .load_config = rspamd_tokenizer_osb_load_config,
+ .is_compat = rspamd_tokenizer_osb_is_compat
},
};
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index e6d7c90c1..d6350e0e0 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -72,7 +72,6 @@ rspamd_stat_tokenize_header (struct rspamd_task *task,
tok->tokenizer->tokenize_func (tok,
task->task_pool,
ar,
- tok->tokens,
TRUE,
prefix);
@@ -91,6 +90,9 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
GArray *words;
gchar *sub;
guint i;
+ gboolean compat;
+
+ compat = tok->tokenizer->is_compat (tok);
for (i = 0; i < task->text_parts->len; i ++) {
part = g_ptr_array_index (task->text_parts, i);
@@ -98,11 +100,11 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
if (!IS_PART_EMPTY (part) && part->words != NULL) {
if (compat) {
tok->tokenizer->tokenize_func (tok, task->task_pool,
- part->words, tok->tokens, IS_PART_UTF (part), NULL);
+ part->words, IS_PART_UTF (part), NULL);
}
else {
tok->tokenizer->tokenize_func (tok, task->task_pool,
- part->normalized_words, tok->tokens, IS_PART_UTF (part), NULL);
+ part->normalized_words, IS_PART_UTF (part), NULL);
}
}
@@ -142,7 +144,7 @@ rspamd_stat_get_tokenizer_runtime (struct rspamd_tokenizer_config *cf,
struct rspamd_classifier_runtime *cl_runtime,
gpointer conf, gsize conf_len)
{
- struct rspamd_tokenizer_runtime *tok = NULL, *cur;
+ struct rspamd_tokenizer_runtime *tok = NULL;
const gchar *name;
if (cf == NULL || cf->name == NULL) {
@@ -171,7 +173,7 @@ rspamd_stat_get_tokenizer_runtime (struct rspamd_tokenizer_config *cf,
(rspamd_mempool_destruct_t)g_tree_destroy, tok->tokens);
tok->name = name;
rspamd_stat_process_tokenize (st_ctx, task, tok);
- g_hash_table_insert (cl_runtime, tok, tok);
+ cl_runtime->tok = tok;
return tok;
}
@@ -258,10 +260,8 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,
gpointer backend_runtime, tok_config;
GList *cur, *st_list = NULL, *curst;
GList *cl_runtimes = NULL;
- GHashTableIter it;
guint result_size = 0, start_pos = 0, end_pos = 0;
gsize conf_len;
- struct rspamd_tokenizer_runtime *tok_runtime;
struct preprocess_cb_data cbdata;
cur = g_list_first (task->cfg->classifiers);
@@ -419,51 +419,19 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx,
rspamd_stat_result_t
rspamd_stat_classify (struct rspamd_task *task, lua_State *L, GError **err)
{
- struct rspamd_stat_classifier *cls;
- struct rspamd_classifier_config *clcf;
struct rspamd_stat_ctx *st_ctx;
struct rspamd_statfile_runtime *st_run;
- struct rspamd_tokenizer_runtime *tklist = NULL, *tok;
struct rspamd_classifier_runtime *cl_run;
struct classifier_ctx *cl_ctx;
GList *cl_runtimes;
GList *cur, *curst;
- gboolean ret = RSPAMD_STAT_PROCESS_OK, compat = TRUE;
- const ucl_object_t *obj;
+ gboolean ret = RSPAMD_STAT_PROCESS_OK;
st_ctx = rspamd_stat_get_ctx ();
g_assert (st_ctx != NULL);
- cur = g_list_first (task->cfg->classifiers);
-
- /* Tokenization */
- while (cur) {
- clcf = (struct rspamd_classifier_config *)cur->data;
- cls = rspamd_stat_get_classifier (clcf->classifier);
-
- if (cls == NULL) {
- g_set_error (err, rspamd_stat_quark (), 500, "type %s is not defined"
- "for classifiers", clcf->classifier);
- return RSPAMD_STAT_PROCESS_ERROR;
- }
-
- tok = rspamd_stat_get_tokenizer_runtime (clcf->tokenizer, task->task_pool,
- &tklist);
-
- if (tok == NULL) {
- g_set_error (err, rspamd_stat_quark (), 500, "type %s is not defined"
- "for tokenizers", clcf->tokenizer ?
- clcf->tokenizer->name : "unknown");
- return RSPAMD_STAT_PROCESS_ERROR;
- }
-
- rspamd_stat_process_tokenize (clcf->tokenizer, st_ctx, task, tok);
-
- cur = g_list_next (cur);
- }
-
/* Initialize classifiers and statfiles runtime */
- if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, tklist, L,
+ if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, L,
RSPAMD_CLASSIFY_OP, FALSE, err)) == NULL) {
return RSPAMD_STAT_PROCESS_OK;
}
@@ -586,15 +554,11 @@ rspamd_stat_result_t
rspamd_stat_learn (struct rspamd_task *task, gboolean spam, lua_State *L,
GError **err)
{
- struct rspamd_stat_classifier *cls;
- struct rspamd_classifier_config *clcf;
struct rspamd_stat_ctx *st_ctx;
- struct rspamd_tokenizer_runtime *tklist = NULL, *tok;
struct rspamd_classifier_runtime *cl_run;
struct rspamd_statfile_runtime *st_run;
struct classifier_ctx *cl_ctx;
struct preprocess_cb_data cbdata;
- const ucl_object_t *obj;
GList *cl_runtimes;
GList *cur, *curst;
gboolean ret = RSPAMD_STAT_PROCESS_ERROR, unlearn = FALSE;
@@ -608,32 +572,6 @@ rspamd_stat_learn (struct rspamd_task *task, gboolean spam, lua_State *L,
cur = g_list_first (task->cfg->classifiers);
- /* Tokenization */
- while (cur) {
- clcf = (struct rspamd_classifier_config *)cur->data;
- cls = rspamd_stat_get_classifier (clcf->classifier);
-
- if (cls == NULL) {
- g_set_error (err, rspamd_stat_quark (), 500, "type %s is not defined"
- "for classifiers", clcf->classifier);
- return RSPAMD_STAT_PROCESS_ERROR;
- }
-
- tok = rspamd_stat_get_tokenizer_runtime (clcf->tokenizer, task->task_pool,
- &tklist);
-
- if (tok == NULL) {
- g_set_error (err, rspamd_stat_quark (), 500, "type %s is not defined"
- "for tokenizers", clcf->tokenizer ?
- clcf->tokenizer->name : "unknown");
- return RSPAMD_STAT_PROCESS_ERROR;
- }
-
- rspamd_stat_process_tokenize (clcf->tokenizer, st_ctx, task, tok);
-
- cur = g_list_next (cur);
- }
-
/* Check whether we have learned that file */
for (i = 0; i < st_ctx->caches_count; i ++) {
learn_res = st_ctx->caches[i].process (task, spam,
@@ -652,7 +590,7 @@ rspamd_stat_learn (struct rspamd_task *task, gboolean spam, lua_State *L,
}
/* Initialize classifiers and statfiles runtime */
- if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, tklist, L,
+ if ((cl_runtimes = rspamd_stat_preprocess (st_ctx, task, L,
unlearn ? RSPAMD_UNLEARN_OP : RSPAMD_LEARN_OP, spam, err)) == NULL) {
return RSPAMD_STAT_PROCESS_ERROR;
}
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h
index a2ff388ef..ed7ba4bcc 100644
--- a/src/libstat/tokenizers/tokenizers.h
+++ b/src/libstat/tokenizers/tokenizers.h
@@ -19,6 +19,7 @@ struct rspamd_stat_tokenizer {
gpointer ptr, gsize len);
gboolean (*load_config) (struct rspamd_tokenizer_runtime *rt,
gpointer ptr, gsize len);
+ gboolean (*is_compat) (struct rspamd_tokenizer_runtime *rt);
gint (*tokenize_func)(struct rspamd_tokenizer_runtime *rt,
rspamd_mempool_t *pool,
GArray *words,
@@ -53,6 +54,9 @@ gboolean
rspamd_tokenizer_osb_load_config (struct rspamd_tokenizer_runtime *rt,
gpointer ptr, gsize len);
+gboolean
+rspamd_tokenizer_osb_is_compat (struct rspamd_tokenizer_runtime *rt);
+
#endif
/*
* vi:ts=4