]> source.dussan.org Git - rspamd.git/commitdiff
Fix tokenizers and mmapped file.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 27 Jul 2015 13:48:46 +0000 (14:48 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 27 Jul 2015 13:48:46 +0000 (14:48 +0100)
src/libstat/backends/backends.h
src/libstat/backends/mmaped_file.c
src/libstat/stat_internal.h
src/libstat/stat_process.c
src/libstat/tokenizers/osb.c
src/libstat/tokenizers/tokenizers.h

index c25ab648e54c49f4fac96eb7ba10c1492dc23a5e..4ac59655ccfcdd9e2e21ed5bc0c476cb2ef3ef77 100644 (file)
@@ -99,7 +99,7 @@ struct rspamd_stat_backend {
                                gpointer ctx); \
                ucl_object_t * rspamd_##name##_get_stat (gpointer runtime, \
                                gpointer ctx); \
-               void rspamd_##name##_load_tokenizer_config (gpointer runtime, \
+               gpointer rspamd_##name##_load_tokenizer_config (gpointer runtime, \
                                gsize *len); \
                void rspamd_##name##_close (gpointer ctx)
 
index fb3c4fd43cbc968900c0e9a51e58b7c9271cb0be..bbd106187eebebdcf3913468a768e6a14e835425 100644 (file)
@@ -120,7 +120,8 @@ rspamd_mmaped_file_t * rspamd_mmaped_file_is_open (
 rspamd_mmaped_file_t * rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool,
                const gchar *filename, size_t size, struct rspamd_statfile_config *stcf);
 gint rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool,
-               const gchar *filename, size_t size, struct rspamd_statfile_config *stcf);
+               const gchar *filename, size_t size, struct rspamd_statfile_config *stcf,
+               rspamd_mempool_t *mempool);
 
 double
 rspamd_mmaped_file_get_block (rspamd_mmaped_file_ctx * pool,
@@ -452,7 +453,7 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx * pool,
        }
 
        /* Now create new file with required size */
-       if (rspamd_mmaped_file_create (pool, filename, size, stcf) != 0) {
+       if (rspamd_mmaped_file_create (pool, filename, size, stcf, pool->pool) != 0) {
                msg_err ("cannot create new file");
                g_free (backup);
                return NULL;
@@ -543,8 +544,6 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool,
 {
        struct stat st;
        rspamd_mmaped_file_t *new_file;
-       struct rspamd_stat_tokenizer *tokenizer;
-       struct stat_file_header *header;
 
        if ((new_file = rspamd_mmaped_file_is_open (pool, stcf)) != NULL) {
                return new_file;
@@ -615,22 +614,7 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool,
 
        rspamd_mmaped_file_preload (new_file);
 
-       /* Check tokenizer compatibility */
-       header = new_file->map;
        g_assert (stcf->clcf != NULL);
-       g_assert (stcf->clcf->tokenizer != NULL);
-       tokenizer = rspamd_stat_get_tokenizer (stcf->clcf->tokenizer->name);
-       g_assert (tokenizer != NULL);
-
-       if (!tokenizer->compatible_config (stcf->clcf->tokenizer, header->unused,
-                       header->tokenizer_conf_len)) {
-               msg_err ("mmapped statfile %s is not compatible with the tokenizer "
-                               "defined", new_file->filename);
-               munmap (new_file->map, st.st_size);
-               g_slice_free1 (sizeof (*new_file), new_file);
-
-               return NULL;
-       }
 
        g_hash_table_insert (pool->files, stcf, new_file);
 
@@ -664,7 +648,7 @@ rspamd_mmaped_file_close_file (rspamd_mmaped_file_ctx * pool,
 
 gint
 rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool, const gchar *filename,
-               size_t size, struct rspamd_statfile_config *stcf)
+               size_t size, struct rspamd_statfile_config *stcf, rspamd_mempool_t *mempool)
 {
        struct stat_file_header header = {
                .magic = {'r', 's', 'd'},
@@ -722,7 +706,7 @@ rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool, const gchar *filename,
        g_assert (stcf->clcf->tokenizer != NULL);
        tokenizer = rspamd_stat_get_tokenizer (stcf->clcf->tokenizer->name);
        g_assert (tokenizer != NULL);
-       tok_conf = tokenizer->get_config (stcf->clcf->tokenizer, &tok_conf_len);
+       tok_conf = tokenizer->get_config (mempool, stcf->clcf->tokenizer, &tok_conf_len);
        header.tokenizer_conf_len = tok_conf_len;
        g_assert (tok_conf_len < sizeof (header.unused) - sizeof (guint64));
        memcpy (header.unused, tok_conf, tok_conf_len);
@@ -819,20 +803,25 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg)
                clf = cur->data;
 
                curst = clf->statfiles;
-               while (curst) {
-                       stf = curst->data;
 
+               if (clf->backend == NULL) {
                        /*
                         * By default, all statfiles are treated as mmaped files
                         */
-                       if (stf->backend == NULL ||
-                                       strcmp (stf->backend, MMAPED_BACKEND_TYPE) == 0) {
+                       clf->backend = MMAPED_BACKEND_TYPE;
+               }
+
+               if (strcmp (clf->backend, MMAPED_BACKEND_TYPE) == 0) {
+                       while (curst) {
+                               stf = curst->data;
                                /*
                                 * Check configuration sanity
                                 */
                                filenameo = ucl_object_find_key (stf->opts, "filename");
+
                                if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
                                        filenameo = ucl_object_find_key (stf->opts, "path");
+
                                        if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
                                                msg_err ("statfile %s has no filename defined", stf->symbol);
                                                curst = curst->next;
@@ -843,6 +832,7 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg)
                                filename = ucl_object_tostring (filenameo);
 
                                sizeo = ucl_object_find_key (stf->opts, "size");
+
                                if (sizeo == NULL || ucl_object_type (sizeo) != UCL_INT) {
                                        msg_err ("statfile %s has no size defined", stf->symbol);
                                        curst = curst->next;
@@ -854,9 +844,9 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg)
                                rspamd_mmaped_file_open (new, filename, size, stf);
 
                                ctx->statfiles ++;
-                       }
 
-                       curst = curst->next;
+                               curst = curst->next;
+                       }
                }
 
                cur = g_list_next (cur);
@@ -927,7 +917,7 @@ rspamd_mmaped_file_runtime (struct rspamd_task *task,
                size = ucl_object_toint (sizeo);
 
                if (learn) {
-                       rspamd_mmaped_file_create (ctx, filename, size, stcf);
+                       rspamd_mmaped_file_create (ctx, filename, size, stcf, task->task_pool);
                }
 
                mf = rspamd_mmaped_file_open (ctx, filename, size, stcf);
@@ -1095,3 +1085,20 @@ rspamd_mmaped_file_finalize_process (struct rspamd_task *task, gpointer runtime,
                gpointer ctx)
 {
 }
+
+gpointer
+rspamd_mmaped_file_load_tokenizer_config (gpointer runtime,
+               gsize *len)
+{
+       rspamd_mmaped_file_t *mf = runtime;
+       struct stat_file_header *header;
+
+       g_assert (mf != NULL);
+       header = mf->map;
+
+       if (len) {
+               *len = header->tokenizer_conf_len;
+       }
+
+       return header->unused;
+}
index 64790cddc00287fa6e117e757677a38966d3ae83..e70fc298ab9bd959e7105f4f25b694bd2ad9a54e 100644 (file)
@@ -39,6 +39,7 @@ struct rspamd_tokenizer_runtime {
        GTree *tokens;
        const gchar *name;
        struct rspamd_stat_tokenizer *tokenizer;
+       struct rspamd_tokenizer_config *tkcf;
        gpointer config;
        gsize conf_len;
 };
index d6350e0e0c70fde23efd7abab6189bcb89e2e6b7..3ec579049aef9747689a31a0aba8176e98744baa 100644 (file)
@@ -162,12 +162,13 @@ rspamd_stat_get_tokenizer_runtime (struct rspamd_tokenizer_config *cf,
                return NULL;
        }
 
-       if (!tok->tokenizer->load_config (tok, conf, conf_len)) {
+       if (!tok->tokenizer->load_config (task->task_pool, tok, conf, conf_len)) {
                return NULL;
        }
 
        tok->config = conf;
        tok->conf_len = conf_len;
+       tok->tkcf = cf;
        tok->tokens = g_tree_new (token_node_compare_func);
        rspamd_mempool_add_destructor (task->task_pool,
                        (rspamd_mempool_destruct_t)g_tree_destroy, tok->tokens);
index 5916adb858e2b5bc9fc6015f46ec16a0a31cff3e..40dec0d820d25ad04be5607ba045e6b1ebca74c7 100644 (file)
@@ -154,20 +154,29 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool,
 }
 
 gpointer
-rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
+rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool,
+               struct rspamd_tokenizer_config *cf,
                gsize *len)
 {
        struct rspamd_osb_tokenizer_config *osb_cf, *def;
 
        if (cf != NULL && cf->opts != NULL) {
-               osb_cf = rspamd_tokenizer_osb_config_from_ucl (NULL, cf->opts);
+               osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, cf->opts);
        }
        else {
                def = rspamd_tokenizer_osb_default_config ();
-               osb_cf = g_slice_alloc (sizeof (*osb_cf));
+               osb_cf = rspamd_mempool_alloc (pool, sizeof (*osb_cf));
                memcpy (osb_cf, def, sizeof (*osb_cf));
+               /* Do not write sipkey to statfile */
+       }
+
+       if (osb_cf->ht == RSPAMD_OSB_HASH_SIPHASH) {
+               msg_info ("siphash key is not stored into statfiles, so you'd need to "
+                               "keep it inside the configuration");
        }
 
+       memset (osb_cf->sk, 0, sizeof (osb_cf->sk));
+
        if (len != NULL) {
                *len = sizeof (*osb_cf);
        }
@@ -176,13 +185,14 @@ rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
 }
 
 gboolean
-rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
+rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_runtime *rt,
                        gpointer ptr, gsize len)
 {
        struct rspamd_osb_tokenizer_config *osb_cf, *test_cf;
        gboolean ret = FALSE;
 
-       test_cf = rspamd_tokenizer_osb_get_config (cf, NULL);
+       test_cf = rt->config;
+       g_assert (test_cf != NULL);
 
        if (len == sizeof (*osb_cf)) {
                osb_cf = ptr;
@@ -193,7 +203,8 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
                else {
                        if (osb_cf->version == DEFAULT_OSB_VERSION) {
                                /* We can compare them directly now */
-                               ret = memcmp (osb_cf, test_cf, sizeof (*osb_cf)) == 0;
+                               ret = (memcmp (osb_cf, test_cf, sizeof (*osb_cf)
+                                               - sizeof (osb_cf->sk))) == 0;
                        }
                }
        }
@@ -208,10 +219,9 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
 }
 
 gint
-rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
+rspamd_tokenizer_osb (struct rspamd_tokenizer_runtime *rt,
        rspamd_mempool_t * pool,
        GArray * input,
-       GTree * tree,
        gboolean is_utf,
        const gchar *prefix)
 {
@@ -221,6 +231,7 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
        guint64 *hashpipe, cur, seed;
        guint32 h1, h2;
        guint processed = 0, i, w, window_size;
+       GTree *tree = rt->tokens;
 
        g_assert (tree != NULL);
 
@@ -228,13 +239,7 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
                return FALSE;
        }
 
-       if (cf != NULL && cf->opts != NULL) {
-               osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, cf->opts);
-       }
-       else {
-               osb_cf = rspamd_tokenizer_osb_default_config ();
-       }
-
+       osb_cf = rt->config;
        window_size = osb_cf->window_size;
 
        if (prefix) {
@@ -334,6 +339,32 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
        return TRUE;
 }
 
-/*
- * vi:ts=4
- */
+
+gboolean
+rspamd_tokenizer_osb_load_config (rspamd_mempool_t *pool,
+               struct rspamd_tokenizer_runtime *rt,
+               gpointer ptr, gsize len)
+{
+       struct rspamd_osb_tokenizer_config *osb_cf;
+
+       if (ptr == NULL) {
+               osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, rt->tkcf->opts);
+       }
+       else {
+               g_assert (len == sizeof (*osb_cf));
+               osb_cf = ptr;
+       }
+
+       rt->config = osb_cf;
+       rt->conf_len = sizeof (*osb_cf);
+
+       return TRUE;
+}
+
+gboolean
+rspamd_tokenizer_osb_is_compat (struct rspamd_tokenizer_runtime *rt)
+{
+       struct rspamd_osb_tokenizer_config *osb_cf = rt->config;
+
+       return (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT);
+}
index ed7ba4bccf385281835396debcc0f5214010316d..050f6d7b1bbc006b44e20c94bf472a2b136acfd3 100644 (file)
@@ -14,10 +14,12 @@ struct rspamd_tokenizer_runtime;
 /* Common tokenizer structure */
 struct rspamd_stat_tokenizer {
        gchar *name;
-       gpointer (*get_config) (struct rspamd_tokenizer_config *cf, gsize *len);
+       gpointer (*get_config) (rspamd_mempool_t *pool,
+                       struct rspamd_tokenizer_config *cf, gsize *len);
        gboolean (*compatible_config) (struct rspamd_tokenizer_runtime *rt,
                        gpointer ptr, gsize len);
-       gboolean (*load_config) (struct rspamd_tokenizer_runtime *rt,
+       gboolean (*load_config) (rspamd_mempool_t *pool,
+                       struct rspamd_tokenizer_runtime *rt,
                        gpointer ptr, gsize len);
        gboolean (*is_compat) (struct rspamd_tokenizer_runtime *rt);
        gint (*tokenize_func)(struct rspamd_tokenizer_runtime *rt,
@@ -43,7 +45,8 @@ gint rspamd_tokenizer_osb (struct rspamd_tokenizer_runtime *rt,
        gboolean is_utf,
        const gchar *prefix);
 
-gpointer rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
+gpointer rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool,
+               struct rspamd_tokenizer_config *cf,
                gsize *len);
 
 gboolean
@@ -51,7 +54,8 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_runtime *rt,
                        gpointer ptr, gsize len);
 
 gboolean
-rspamd_tokenizer_osb_load_config (struct rspamd_tokenizer_runtime *rt,
+rspamd_tokenizer_osb_load_config (rspamd_mempool_t *pool,
+               struct rspamd_tokenizer_runtime *rt,
                gpointer ptr, gsize len);
 
 gboolean