]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Move stats signatures generation to tokenization stage
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 11 Jul 2017 07:14:42 +0000 (08:14 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 11 Jul 2017 07:16:37 +0000 (08:16 +0100)
src/libserver/mempool_vars_internal.h
src/libstat/backends/redis_backend.c
src/libstat/stat_process.c

index 94f7f8f02f6e5ee48caddbcad2e1f558aa6564f3..229d56d48dfa1d5f606162dbd14cc4e8f22293f0 100644 (file)
@@ -32,5 +32,6 @@
 #define RSPAMD_MEMPOOL_DKIM_SIGNATURE "dkim-signature"
 #define RSPAMD_MEMPOOL_DMARC_CHECKS "dmarc_checks"
 #define RSPAMD_MEMPOOL_DKIM_BH_CACHE "dkim_bh_cache"
+#define RSPAMD_MEMPOOL_STAT_SIGNATURE "stat_signature"
 
 #endif
index aa4c4a5e742b4bffcd73cad4488c57661a8f2cc4..cb20c9656498c54142fa35f1a6b7c50c635db2c3 100644 (file)
@@ -18,6 +18,7 @@
 #include "stat_internal.h"
 #include "upstream.h"
 #include "lua/lua_common.h"
+#include "libserver/mempool_vars_internal.h"
 
 #ifdef WITH_HIREDIS
 #include "hiredis.h"
@@ -359,7 +360,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
        rspamd_fstring_t *out;
        rspamd_token_t *tok;
        gchar n0[512], n1[64];
-       rspamd_cryptobox_hash_state_t hst;
        guint i, l0, l1, cmd_len, prefix_len;
        gint ret;
 
@@ -383,11 +383,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
                }
 
                out->len = 0;
-
-               if (rt->ctx->enable_signatures) {
-                       /* Generate hash for tokens */
-                       rspamd_cryptobox_hash_init (&hst, NULL, 0);
-               }
        }
        else {
                if (rt->ctx->new_schema) {
@@ -431,12 +426,6 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
                                                tok->values[idx]);
                        }
 
-                       if (rt->ctx->enable_signatures) {
-                               /* Generate hash for tokens */
-                               rspamd_cryptobox_hash_update (&hst, (guchar *)&tok->data,
-                                               sizeof (tok->data));
-                       }
-
                        if (rt->ctx->new_schema) {
                                /*
                                 * HINCRBY <prefix_token> <0|1> <value>
@@ -608,26 +597,11 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
                rspamd_printf_fstring (&out, "*1\r\n$4\r\nEXEC\r\n");
        }
 
-       if (learn && rt->ctx->enable_signatures) {
-               guchar hout[rspamd_cryptobox_HASHBYTES];
-               gchar *b32_hout;
-
-               rspamd_cryptobox_hash_final (&hst, hout);
-               b32_hout = rspamd_encode_base32 (hout, sizeof (hout));
-               /*
-                * We need to strip it to 32 characters providing ~160 bits of
-                * hash distribution
-                */
-               b32_hout[32] = '\0';
-               rspamd_mempool_set_variable (task->task_pool, "bayes_signature",
-                               b32_hout, g_free);
-       }
-
        return out;
 }
 
 static void
-rspamd_redis_generate_learn_signature (struct rspamd_task *task,
+rspamd_redis_store_stat_signature (struct rspamd_task *task,
                struct redis_stat_runtime *rt,
                GPtrArray *tokens,
                const gchar *prefix)
@@ -638,7 +612,8 @@ rspamd_redis_generate_learn_signature (struct rspamd_task *task,
        rspamd_fstring_t *out;
 
        out = rspamd_fstring_sized_new (1024);
-       sig = rspamd_mempool_get_variable (task->task_pool, "bayes_signature");
+       sig = rspamd_mempool_get_variable (task->task_pool,
+                       RSPAMD_MEMPOOL_STAT_SIGNATURE);
 
        if (sig == NULL) {
                msg_err_task ("cannot get bayes signature");
@@ -1707,7 +1682,7 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens,
 
                /* Add signature if needed */
                if (rt->ctx->enable_signatures) {
-                       rspamd_redis_generate_learn_signature (task, rt, tokens,
+                       rspamd_redis_store_stat_signature (task, rt, tokens,
                                        "RSIG");
                }
 
index 6411580f6c08de34c23949506384e7dc41c078c1..9ca26cd83fb47378418ea37af3b51872ef625e4a 100644 (file)
@@ -298,11 +298,15 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
                struct rspamd_task *task)
 {
        struct rspamd_mime_text_part *part;
+       rspamd_cryptobox_hash_state_t hst;
        rspamd_stat_token_t *tok;
+       rspamd_token_t *st_tok;
        GArray *words;
        gchar *sub = NULL;
        guint i, reserved_len = 0;
        gdouble *pdiff;
+       guchar hout[rspamd_cryptobox_HASHBYTES];
+       gchar *b32_hout;
 
        for (i = 0; i < task->text_parts->len; i++) {
                part = g_ptr_array_index (task->text_parts, i);
@@ -363,6 +367,24 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
        }
 
        rspamd_stat_tokenize_parts_metadata (st_ctx, task);
+
+       /* Produce signature */
+       rspamd_cryptobox_hash_init (&hst, NULL, 0);
+
+       PTR_ARRAY_FOREACH (task->tokens, i, st_tok) {
+               rspamd_cryptobox_hash_update (&hst, (guchar *)&st_tok->data,
+                               sizeof (st_tok->data));
+       }
+
+       rspamd_cryptobox_hash_final (&hst, hout);
+       b32_hout = rspamd_encode_base32 (hout, sizeof (hout));
+       /*
+        * We need to strip it to 32 characters providing ~160 bits of
+        * hash distribution
+        */
+       b32_hout[32] = '\0';
+       rspamd_mempool_set_variable (task->task_pool, RSPAMD_MEMPOOL_STAT_SIGNATURE,
+                       b32_hout, g_free);
 }
 
 static void