From 54f90a7cd9bcfefdec22748c33e2f85000d5a357 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 11 Jul 2017 08:14:42 +0100 Subject: [PATCH] [Minor] Move stats signatures generation to tokenization stage --- src/libserver/mempool_vars_internal.h | 1 + src/libstat/backends/redis_backend.c | 35 ++++----------------------- src/libstat/stat_process.c | 22 +++++++++++++++++ 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/libserver/mempool_vars_internal.h b/src/libserver/mempool_vars_internal.h index 94f7f8f02..229d56d48 100644 --- a/src/libserver/mempool_vars_internal.h +++ b/src/libserver/mempool_vars_internal.h @@ -32,5 +32,6 @@ #define RSPAMD_MEMPOOL_DKIM_SIGNATURE "dkim-signature" #define RSPAMD_MEMPOOL_DMARC_CHECKS "dmarc_checks" #define RSPAMD_MEMPOOL_DKIM_BH_CACHE "dkim_bh_cache" +#define RSPAMD_MEMPOOL_STAT_SIGNATURE "stat_signature" #endif diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c index aa4c4a5e7..cb20c9656 100644 --- a/src/libstat/backends/redis_backend.c +++ b/src/libstat/backends/redis_backend.c @@ -18,6 +18,7 @@ #include "stat_internal.h" #include "upstream.h" #include "lua/lua_common.h" +#include "libserver/mempool_vars_internal.h" #ifdef WITH_HIREDIS #include "hiredis.h" @@ -359,7 +360,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, rspamd_fstring_t *out; rspamd_token_t *tok; gchar n0[512], n1[64]; - rspamd_cryptobox_hash_state_t hst; guint i, l0, l1, cmd_len, prefix_len; gint ret; @@ -383,11 +383,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, } out->len = 0; - - if (rt->ctx->enable_signatures) { - /* Generate hash for tokens */ - rspamd_cryptobox_hash_init (&hst, NULL, 0); - } } else { if (rt->ctx->new_schema) { @@ -431,12 +426,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, tok->values[idx]); } - if (rt->ctx->enable_signatures) { - /* Generate hash for tokens */ - rspamd_cryptobox_hash_update (&hst, (guchar *)&tok->data, - sizeof (tok->data)); - } - if (rt->ctx->new_schema) { /* * HINCRBY <0|1> @@ -608,26 +597,11 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, rspamd_printf_fstring (&out, "*1\r\n$4\r\nEXEC\r\n"); } - if (learn && rt->ctx->enable_signatures) { - guchar hout[rspamd_cryptobox_HASHBYTES]; - gchar *b32_hout; - - rspamd_cryptobox_hash_final (&hst, hout); - b32_hout = rspamd_encode_base32 (hout, sizeof (hout)); - /* - * We need to strip it to 32 characters providing ~160 bits of - * hash distribution - */ - b32_hout[32] = '\0'; - rspamd_mempool_set_variable (task->task_pool, "bayes_signature", - b32_hout, g_free); - } - return out; } static void -rspamd_redis_generate_learn_signature (struct rspamd_task *task, +rspamd_redis_store_stat_signature (struct rspamd_task *task, struct redis_stat_runtime *rt, GPtrArray *tokens, const gchar *prefix) @@ -638,7 +612,8 @@ rspamd_redis_generate_learn_signature (struct rspamd_task *task, rspamd_fstring_t *out; out = rspamd_fstring_sized_new (1024); - sig = rspamd_mempool_get_variable (task->task_pool, "bayes_signature"); + sig = rspamd_mempool_get_variable (task->task_pool, + RSPAMD_MEMPOOL_STAT_SIGNATURE); if (sig == NULL) { msg_err_task ("cannot get bayes signature"); @@ -1707,7 +1682,7 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, /* Add signature if needed */ if (rt->ctx->enable_signatures) { - rspamd_redis_generate_learn_signature (task, rt, tokens, + rspamd_redis_store_stat_signature (task, rt, tokens, "RSIG"); } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 6411580f6..9ca26cd83 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -298,11 +298,15 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, struct rspamd_task *task) { struct rspamd_mime_text_part *part; + rspamd_cryptobox_hash_state_t hst; rspamd_stat_token_t *tok; + rspamd_token_t *st_tok; GArray *words; gchar *sub = NULL; guint i, reserved_len = 0; gdouble *pdiff; + guchar hout[rspamd_cryptobox_HASHBYTES]; + gchar *b32_hout; for (i = 0; i < task->text_parts->len; i++) { part = g_ptr_array_index (task->text_parts, i); @@ -363,6 +367,24 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, } rspamd_stat_tokenize_parts_metadata (st_ctx, task); + + /* Produce signature */ + rspamd_cryptobox_hash_init (&hst, NULL, 0); + + PTR_ARRAY_FOREACH (task->tokens, i, st_tok) { + rspamd_cryptobox_hash_update (&hst, (guchar *)&st_tok->data, + sizeof (st_tok->data)); + } + + rspamd_cryptobox_hash_final (&hst, hout); + b32_hout = rspamd_encode_base32 (hout, sizeof (hout)); + /* + * We need to strip it to 32 characters providing ~160 bits of + * hash distribution + */ + b32_hout[32] = '\0'; + rspamd_mempool_set_variable (task->task_pool, RSPAMD_MEMPOOL_STAT_SIGNATURE, + b32_hout, g_free); } static void -- 2.39.5