summaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-07-11 08:14:42 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-07-11 08:16:37 +0100
commit54f90a7cd9bcfefdec22748c33e2f85000d5a357 (patch)
treea483dbd54111b8568ad4511cef7a3cf6381e2db9 /src/libstat
parentcfc78aeb4a7fe473a27e025facd29dfde2bf5d6d (diff)
downloadrspamd-54f90a7cd9bcfefdec22748c33e2f85000d5a357.tar.gz
rspamd-54f90a7cd9bcfefdec22748c33e2f85000d5a357.zip
[Minor] Move stats signatures generation to tokenization stage
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/backends/redis_backend.c35
-rw-r--r--src/libstat/stat_process.c22
2 files changed, 27 insertions, 30 deletions
diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c
index aa4c4a5e7..cb20c9656 100644
--- a/src/libstat/backends/redis_backend.c
+++ b/src/libstat/backends/redis_backend.c
@@ -18,6 +18,7 @@
#include "stat_internal.h"
#include "upstream.h"
#include "lua/lua_common.h"
+#include "libserver/mempool_vars_internal.h"
#ifdef WITH_HIREDIS
#include "hiredis.h"
@@ -359,7 +360,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
rspamd_fstring_t *out;
rspamd_token_t *tok;
gchar n0[512], n1[64];
- rspamd_cryptobox_hash_state_t hst;
guint i, l0, l1, cmd_len, prefix_len;
gint ret;
@@ -383,11 +383,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
}
out->len = 0;
-
- if (rt->ctx->enable_signatures) {
- /* Generate hash for tokens */
- rspamd_cryptobox_hash_init (&hst, NULL, 0);
- }
}
else {
if (rt->ctx->new_schema) {
@@ -431,12 +426,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
tok->values[idx]);
}
- if (rt->ctx->enable_signatures) {
- /* Generate hash for tokens */
- rspamd_cryptobox_hash_update (&hst, (guchar *)&tok->data,
- sizeof (tok->data));
- }
-
if (rt->ctx->new_schema) {
/*
* HINCRBY <prefix_token> <0|1> <value>
@@ -608,26 +597,11 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
rspamd_printf_fstring (&out, "*1\r\n$4\r\nEXEC\r\n");
}
- if (learn && rt->ctx->enable_signatures) {
- guchar hout[rspamd_cryptobox_HASHBYTES];
- gchar *b32_hout;
-
- rspamd_cryptobox_hash_final (&hst, hout);
- b32_hout = rspamd_encode_base32 (hout, sizeof (hout));
- /*
- * We need to strip it to 32 characters providing ~160 bits of
- * hash distribution
- */
- b32_hout[32] = '\0';
- rspamd_mempool_set_variable (task->task_pool, "bayes_signature",
- b32_hout, g_free);
- }
-
return out;
}
static void
-rspamd_redis_generate_learn_signature (struct rspamd_task *task,
+rspamd_redis_store_stat_signature (struct rspamd_task *task,
struct redis_stat_runtime *rt,
GPtrArray *tokens,
const gchar *prefix)
@@ -638,7 +612,8 @@ rspamd_redis_generate_learn_signature (struct rspamd_task *task,
rspamd_fstring_t *out;
out = rspamd_fstring_sized_new (1024);
- sig = rspamd_mempool_get_variable (task->task_pool, "bayes_signature");
+ sig = rspamd_mempool_get_variable (task->task_pool,
+ RSPAMD_MEMPOOL_STAT_SIGNATURE);
if (sig == NULL) {
msg_err_task ("cannot get bayes signature");
@@ -1707,7 +1682,7 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens,
/* Add signature if needed */
if (rt->ctx->enable_signatures) {
- rspamd_redis_generate_learn_signature (task, rt, tokens,
+ rspamd_redis_store_stat_signature (task, rt, tokens,
"RSIG");
}
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 6411580f6..9ca26cd83 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -298,11 +298,15 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
struct rspamd_task *task)
{
struct rspamd_mime_text_part *part;
+ rspamd_cryptobox_hash_state_t hst;
rspamd_stat_token_t *tok;
+ rspamd_token_t *st_tok;
GArray *words;
gchar *sub = NULL;
guint i, reserved_len = 0;
gdouble *pdiff;
+ guchar hout[rspamd_cryptobox_HASHBYTES];
+ gchar *b32_hout;
for (i = 0; i < task->text_parts->len; i++) {
part = g_ptr_array_index (task->text_parts, i);
@@ -363,6 +367,24 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
}
rspamd_stat_tokenize_parts_metadata (st_ctx, task);
+
+ /* Produce signature */
+ rspamd_cryptobox_hash_init (&hst, NULL, 0);
+
+ PTR_ARRAY_FOREACH (task->tokens, i, st_tok) {
+ rspamd_cryptobox_hash_update (&hst, (guchar *)&st_tok->data,
+ sizeof (st_tok->data));
+ }
+
+ rspamd_cryptobox_hash_final (&hst, hout);
+ b32_hout = rspamd_encode_base32 (hout, sizeof (hout));
+ /*
+ * We need to strip it to 32 characters providing ~160 bits of
+ * hash distribution
+ */
+ b32_hout[32] = '\0';
+ rspamd_mempool_set_variable (task->task_pool, RSPAMD_MEMPOOL_STAT_SIGNATURE,
+ b32_hout, g_free);
}
static void