diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-07-11 08:14:42 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-07-11 08:16:37 +0100 |
commit | 54f90a7cd9bcfefdec22748c33e2f85000d5a357 (patch) | |
tree | a483dbd54111b8568ad4511cef7a3cf6381e2db9 /src/libstat | |
parent | cfc78aeb4a7fe473a27e025facd29dfde2bf5d6d (diff) | |
download | rspamd-54f90a7cd9bcfefdec22748c33e2f85000d5a357.tar.gz rspamd-54f90a7cd9bcfefdec22748c33e2f85000d5a357.zip |
[Minor] Move stats signatures generation to tokenization stage
Diffstat (limited to 'src/libstat')
-rw-r--r-- | src/libstat/backends/redis_backend.c | 35 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 22 |
2 files changed, 27 insertions, 30 deletions
diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c index aa4c4a5e7..cb20c9656 100644 --- a/src/libstat/backends/redis_backend.c +++ b/src/libstat/backends/redis_backend.c @@ -18,6 +18,7 @@ #include "stat_internal.h" #include "upstream.h" #include "lua/lua_common.h" +#include "libserver/mempool_vars_internal.h" #ifdef WITH_HIREDIS #include "hiredis.h" @@ -359,7 +360,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, rspamd_fstring_t *out; rspamd_token_t *tok; gchar n0[512], n1[64]; - rspamd_cryptobox_hash_state_t hst; guint i, l0, l1, cmd_len, prefix_len; gint ret; @@ -383,11 +383,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, } out->len = 0; - - if (rt->ctx->enable_signatures) { - /* Generate hash for tokens */ - rspamd_cryptobox_hash_init (&hst, NULL, 0); - } } else { if (rt->ctx->new_schema) { @@ -431,12 +426,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, tok->values[idx]); } - if (rt->ctx->enable_signatures) { - /* Generate hash for tokens */ - rspamd_cryptobox_hash_update (&hst, (guchar *)&tok->data, - sizeof (tok->data)); - } - if (rt->ctx->new_schema) { /* * HINCRBY <prefix_token> <0|1> <value> @@ -608,26 +597,11 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, rspamd_printf_fstring (&out, "*1\r\n$4\r\nEXEC\r\n"); } - if (learn && rt->ctx->enable_signatures) { - guchar hout[rspamd_cryptobox_HASHBYTES]; - gchar *b32_hout; - - rspamd_cryptobox_hash_final (&hst, hout); - b32_hout = rspamd_encode_base32 (hout, sizeof (hout)); - /* - * We need to strip it to 32 characters providing ~160 bits of - * hash distribution - */ - b32_hout[32] = '\0'; - rspamd_mempool_set_variable (task->task_pool, "bayes_signature", - b32_hout, g_free); - } - return out; } static void -rspamd_redis_generate_learn_signature (struct rspamd_task *task, +rspamd_redis_store_stat_signature (struct rspamd_task *task, struct redis_stat_runtime *rt, GPtrArray *tokens, const gchar *prefix) @@ -638,7 +612,8 @@ rspamd_redis_generate_learn_signature (struct rspamd_task *task, rspamd_fstring_t *out; out = rspamd_fstring_sized_new (1024); - sig = rspamd_mempool_get_variable (task->task_pool, "bayes_signature"); + sig = rspamd_mempool_get_variable (task->task_pool, + RSPAMD_MEMPOOL_STAT_SIGNATURE); if (sig == NULL) { msg_err_task ("cannot get bayes signature"); @@ -1707,7 +1682,7 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, /* Add signature if needed */ if (rt->ctx->enable_signatures) { - rspamd_redis_generate_learn_signature (task, rt, tokens, + rspamd_redis_store_stat_signature (task, rt, tokens, "RSIG"); } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 6411580f6..9ca26cd83 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -298,11 +298,15 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, struct rspamd_task *task) { struct rspamd_mime_text_part *part; + rspamd_cryptobox_hash_state_t hst; rspamd_stat_token_t *tok; + rspamd_token_t *st_tok; GArray *words; gchar *sub = NULL; guint i, reserved_len = 0; gdouble *pdiff; + guchar hout[rspamd_cryptobox_HASHBYTES]; + gchar *b32_hout; for (i = 0; i < task->text_parts->len; i++) { part = g_ptr_array_index (task->text_parts, i); @@ -363,6 +367,24 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, } rspamd_stat_tokenize_parts_metadata (st_ctx, task); + + /* Produce signature */ + rspamd_cryptobox_hash_init (&hst, NULL, 0); + + PTR_ARRAY_FOREACH (task->tokens, i, st_tok) { + rspamd_cryptobox_hash_update (&hst, (guchar *)&st_tok->data, + sizeof (st_tok->data)); + } + + rspamd_cryptobox_hash_final (&hst, hout); + b32_hout = rspamd_encode_base32 (hout, sizeof (hout)); + /* + * We need to strip it to 32 characters providing ~160 bits of + * hash distribution + */ + b32_hout[32] = '\0'; + rspamd_mempool_set_variable (task->task_pool, RSPAMD_MEMPOOL_STAT_SIGNATURE, + b32_hout, g_free); } static void |