]> source.dussan.org Git - rspamd.git/commitdiff
Rework mmapped file backend
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Jan 2016 16:37:40 +0000 (16:37 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Jan 2016 16:37:40 +0000 (16:37 +0000)
src/libstat/backends/mmaped_file.c

index 2daf6e6cb4ea42f8ab0780507e4768d872c73ed7..651e39f38c1225491f000678f19041fd32b353d4 100644 (file)
 
 /* Section types */
 #define STATFILE_SECTION_COMMON 1
-#define STATFILE_SECTION_HEADERS 2
-#define STATFILE_SECTION_URLS 3
-#define STATFILE_SECTION_REGEXP 4
-
-#define DEFAULT_STATFILE_INVALIDATE_TIME 30
-#define DEFAULT_STATFILE_INVALIDATE_JITTER 30
-
-#define MMAPED_BACKEND_TYPE "mmap"
 
 /**
  * Common statfile header
@@ -90,7 +82,8 @@ typedef struct {
 #else
        gchar filename[MAXPATHLEN];             /**< name of file                                               */
 #endif
-       gint fd;                                    /**< descriptor                                                     */
+       rspamd_mempool_t *pool;
+       gint fd;                                /**< descriptor                                                 */
        void *map;                              /**< mmaped area                                                */
        off_t seek_pos;                         /**< current seek position                              */
        struct stat_file_section cur_section;   /**< current section                                    */
@@ -98,34 +91,23 @@ typedef struct {
        struct rspamd_statfile_config *cf;
 } rspamd_mmaped_file_t;
 
-/**
- * Statfiles pool
- */
-typedef struct  {
-       GHashTable *files;                     /**< hash table of opened files indexed by name  */
-       rspamd_mempool_t *pool;                 /**< memory pool object                                 */
-       rspamd_mempool_mutex_t *lock;               /**< mutex                                                          */
-       gboolean mlock_ok;                      /**< whether it is possible to use mlock (2) to avoid statfiles unloading */
-} rspamd_mmaped_file_ctx;
 
 #define RSPAMD_STATFILE_VERSION {'1', '2'}
 #define BACKUP_SUFFIX ".old"
 
 static void rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool,
-          rspamd_mmaped_file_ctx *statfiles_pool, rspamd_mmaped_file_t *file,
+          rspamd_mmaped_file_t *file,
           guint32 h1, guint32 h2, double value);
 
-rspamd_mmaped_file_t * rspamd_mmaped_file_is_open (
-               rspamd_mmaped_file_ctx * pool, struct rspamd_statfile_config *stcf);
-rspamd_mmaped_file_t * rspamd_mmaped_file_open (rspamd_mmaped_file_ctx *statfiles_pool,
-               const gchar *filename, size_t size, struct rspamd_statfile_config *stcf);
-gint rspamd_mmaped_file_create (rspamd_mmaped_file_ctx *statfile_pool,
-               const gchar *filename, size_t size, struct rspamd_statfile_config *stcf,
+rspamd_mmaped_file_t * rspamd_mmaped_file_open (rspamd_mempool_t *pool,
+               const gchar *filename, size_t size,
+               struct rspamd_statfile_config *stcf);
+gint rspamd_mmaped_file_create (const gchar *filename, size_t size,
+               struct rspamd_statfile_config *stcf,
                rspamd_mempool_t *pool);
 
 double
-rspamd_mmaped_file_get_block (rspamd_mmaped_file_ctx * pool,
-       rspamd_mmaped_file_t * file,
+rspamd_mmaped_file_get_block (rspamd_mmaped_file_t * file,
        guint32 h1,
        guint32 h2)
 {
@@ -159,8 +141,8 @@ rspamd_mmaped_file_get_block (rspamd_mmaped_file_ctx * pool,
 
 static void
 rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool,
-          rspamd_mmaped_file_ctx *statfiles_pool, rspamd_mmaped_file_t *file,
-          guint32 h1, guint32 h2, double value)
+               rspamd_mmaped_file_t *file,
+               guint32 h1, guint32 h2, double value)
 {
        struct stat_file_block *block, *to_expire = NULL;
        struct stat_file_header *header;
@@ -240,24 +222,14 @@ rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool,
 
 void
 rspamd_mmaped_file_set_block (rspamd_mempool_t *pool,
-               rspamd_mmaped_file_ctx * statfile_pool,
                rspamd_mmaped_file_t * file,
                guint32 h1,
                guint32 h2,
                double value)
 {
-       rspamd_mmaped_file_set_block_common (pool, statfile_pool, file, h1, h2, value);
-}
-
-rspamd_mmaped_file_t *
-rspamd_mmaped_file_is_open (rspamd_mmaped_file_ctx * pool,
-               struct rspamd_statfile_config *stcf)
-{
-       return g_hash_table_lookup (pool->files, stcf);
+       rspamd_mmaped_file_set_block_common (pool, file, h1, h2, value);
 }
 
-
-
 gboolean
 rspamd_mmaped_file_set_revision (rspamd_mmaped_file_t *file, guint64 rev, time_t time)
 {
@@ -421,11 +393,11 @@ rspamd_mmaped_file_check (rspamd_mempool_t *pool, rspamd_mmaped_file_t * file)
 
 
 static rspamd_mmaped_file_t *
-rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx *statfiles_pool,
-       const gchar *filename,
-       size_t old_size,
-       size_t size,
-       struct rspamd_statfile_config *stcf)
+rspamd_mmaped_file_reindex (rspamd_mempool_t *pool,
+               const gchar *filename,
+               size_t old_size,
+               size_t size,
+               struct rspamd_statfile_config *stcf)
 {
        gchar *backup, *lock;
        gint fd, lock_fd;
@@ -433,7 +405,6 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx *statfiles_pool,
        u_char *map, *pos;
        struct stat_file_block *block;
        struct stat_file_header *header, *nh;
-       rspamd_mempool_t *pool = statfiles_pool->pool;
 
        if (size <
                sizeof (struct stat_file_header) + sizeof (struct stat_file_section) +
@@ -454,11 +425,11 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx *statfiles_pool,
                        if (!rspamd_file_lock (lock_fd, FALSE)) {
                                g_free (lock);
 
-                               return rspamd_mmaped_file_open (statfiles_pool, filename, size, stcf);
+                               return rspamd_mmaped_file_open (pool, filename, size, stcf);
                        }
                }
                else {
-                       return rspamd_mmaped_file_open (statfiles_pool, filename, size, stcf);
+                       return rspamd_mmaped_file_open (pool, filename, size, stcf);
                }
        }
 
@@ -467,7 +438,7 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx *statfiles_pool,
                unlink (lock);
                g_free (lock);
 
-               return rspamd_mmaped_file_open (statfiles_pool, filename, size, stcf);
+               return rspamd_mmaped_file_open (pool, filename, size, stcf);
        }
 
 
@@ -485,7 +456,7 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx *statfiles_pool,
        }
 
        /* Now create new file with required size */
-       if (rspamd_mmaped_file_create (statfiles_pool, filename, size, stcf, statfiles_pool->pool) != 0) {
+       if (rspamd_mmaped_file_create (filename, size, stcf, pool) != 0) {
                msg_err_pool ("cannot create new file");
                g_free (backup);
                unlink (lock);
@@ -497,7 +468,7 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx *statfiles_pool,
        }
        /* Now open new file and start copying */
        fd = open (backup, O_RDONLY);
-       new = rspamd_mmaped_file_open (statfiles_pool, filename, size, stcf);
+       new = rspamd_mmaped_file_open (pool, filename, size, stcf);
 
        if (fd == -1 || new == NULL) {
                if (fd != -1) {
@@ -532,8 +503,8 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx *statfiles_pool,
        while (old_size - (pos - map) >= sizeof (struct stat_file_block)) {
                block = (struct stat_file_block *)pos;
                if (block->hash1 != 0 && block->value != 0) {
-                       rspamd_mmaped_file_set_block_common (statfiles_pool->pool,
-                                       statfiles_pool, new, block->hash1,
+                       rspamd_mmaped_file_set_block_common (pool,
+                                       new, block->hash1,
                                        block->hash2, block->value);
                }
                pos += sizeof (block);
@@ -593,17 +564,13 @@ rspamd_mmaped_file_preload (rspamd_mmaped_file_t *file)
 }
 
 rspamd_mmaped_file_t *
-rspamd_mmaped_file_open (rspamd_mmaped_file_ctx *statfiles_pool,
+rspamd_mmaped_file_open (rspamd_mempool_t *pool,
                const gchar *filename, size_t size,
                struct rspamd_statfile_config *stcf)
 {
        struct stat st;
        rspamd_mmaped_file_t *new_file;
-       rspamd_mempool_t *pool = statfiles_pool->pool;
 
-       if ((new_file = rspamd_mmaped_file_is_open (statfiles_pool, stcf)) != NULL) {
-               return new_file;
-       }
 
        if (stat (filename, &st) == -1) {
                msg_info_pool ("cannot stat file %s, error %s, %d", filename, strerror (
@@ -615,7 +582,7 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx *statfiles_pool,
                && size > sizeof (struct stat_file)) {
                msg_warn_pool ("need to reindex statfile old size: %Hz, new size: %Hz",
                        (size_t)st.st_size, size);
-               return rspamd_mmaped_file_reindex (statfiles_pool, filename, st.st_size, size, stcf);
+               return rspamd_mmaped_file_reindex (pool, filename, st.st_size, size, stcf);
        }
        else if (size < sizeof (struct stat_file)) {
                msg_err_pool ("requested to shrink statfile to %Hz but it is too small",
@@ -648,17 +615,10 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx *statfiles_pool,
        rspamd_strlcpy (new_file->filename, filename, sizeof (new_file->filename));
        new_file->len = st.st_size;
        /* Try to lock pages in RAM */
-       if (statfiles_pool->mlock_ok) {
-               if (mlock (new_file->map, new_file->len) == -1) {
-                       msg_warn_pool (
-                               "mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s",
-                               strerror (errno));
-                       statfiles_pool->mlock_ok = FALSE;
-               }
-       }
+
        /* Acquire lock for this operation */
        rspamd_file_lock (new_file->fd, FALSE);
-       if (rspamd_mmaped_file_check (statfiles_pool->pool, new_file) == -1) {
+       if (rspamd_mmaped_file_check (pool, new_file) == -1) {
                rspamd_file_unlock (new_file->fd, FALSE);
                munmap (new_file->map, st.st_size);
                g_slice_free1 (sizeof (*new_file), new_file);
@@ -673,23 +633,13 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx *statfiles_pool,
 
        g_assert (stcf->clcf != NULL);
 
-       g_hash_table_insert (statfiles_pool->files, stcf, new_file);
-
        return new_file;
 }
 
 gint
-rspamd_mmaped_file_close_file (rspamd_mmaped_file_ctx *statfile_pool,
+rspamd_mmaped_file_close_file (rspamd_mempool_t *pool,
        rspamd_mmaped_file_t * file)
 {
-       rspamd_mmaped_file_t *pos;
-       rspamd_mempool_t *pool = statfile_pool->pool;
-
-       if ((pos = rspamd_mmaped_file_is_open (statfile_pool, file->cf)) == NULL) {
-               msg_info_pool ("file %s is not opened", file->filename);
-               return -1;
-       }
-
        if (file->map) {
                msg_info_pool ("syncing statfile %s", file->filename);
                msync (file->map, file->len, MS_ASYNC);
@@ -705,8 +655,10 @@ rspamd_mmaped_file_close_file (rspamd_mmaped_file_ctx *statfile_pool,
 }
 
 gint
-rspamd_mmaped_file_create (rspamd_mmaped_file_ctx *statfile_pool, const gchar *filename,
-               size_t size, struct rspamd_statfile_config *stcf, rspamd_mempool_t *pool)
+rspamd_mmaped_file_create (const gchar *filename,
+               size_t size,
+               struct rspamd_statfile_config *stcf,
+               rspamd_mempool_t *pool)
 {
        struct stat_file_header header = {
                .magic = {'r', 's', 'd'},
@@ -727,11 +679,6 @@ rspamd_mmaped_file_create (rspamd_mmaped_file_ctx *statfile_pool, const gchar *f
        gpointer tok_conf;
        gsize tok_conf_len;
 
-       if (rspamd_mmaped_file_is_open (statfile_pool, stcf) != NULL) {
-               msg_info_pool ("file %s is already opened", filename);
-               return 0;
-       }
-
        if (size <
                sizeof (struct stat_file_header) + sizeof (struct stat_file_section) +
                sizeof (block)) {
@@ -838,103 +785,51 @@ rspamd_mmaped_file_create (rspamd_mmaped_file_ctx *statfile_pool, const gchar *f
 }
 
 gpointer
-rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg)
+rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx,
+               struct rspamd_config *cfg, struct rspamd_statfile *st)
 {
-       rspamd_mmaped_file_ctx *new;
-       struct rspamd_classifier_config *clf;
-       struct rspamd_statfile_config *stf;
-       GList *cur, *curst;
+       struct rspamd_statfile_config *stf = st->stcf;
+       rspamd_mmaped_file_t *mf;
        const ucl_object_t *filenameo, *sizeo;
        const gchar *filename;
        gsize size;
 
-       new = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (rspamd_mmaped_file_ctx));
-       new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "statfiles");
-       memcpy (new->pool->tag.uid, cfg->cfg_pool->tag.uid, sizeof (new->pool->tag.uid));
-       new->lock = rspamd_mempool_get_mutex (new->pool);
-       new->mlock_ok = cfg->mlock_statfile_pool;
-       new->files = g_hash_table_new (g_direct_hash, g_direct_equal);
-
-       /* Iterate over all classifiers and load matching statfiles */
-       cur = cfg->classifiers;
-
-       while (cur) {
-               clf = cur->data;
+       filenameo = ucl_object_find_key (stf->opts, "filename");
 
-               curst = clf->statfiles;
+       if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
+               filenameo = ucl_object_find_key (stf->opts, "path");
 
-               if (clf->backend == NULL) {
-                       /*
-                        * By default, all statfiles are treated as mmaped files
-                        */
-                       clf->backend = MMAPED_BACKEND_TYPE;
+               if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
+                       msg_err_config ("statfile %s has no filename defined", stf->symbol);
+                       return NULL;
                }
+       }
 
-               if (strcmp (clf->backend, MMAPED_BACKEND_TYPE) == 0) {
-                       while (curst) {
-                               stf = curst->data;
-                               /*
-                                * Check configuration sanity
-                                */
-                               filenameo = ucl_object_find_key (stf->opts, "filename");
-
-                               if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
-                                       filenameo = ucl_object_find_key (stf->opts, "path");
-
-                                       if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
-                                               msg_err_config ("statfile %s has no filename defined", stf->symbol);
-                                               curst = curst->next;
-                                               continue;
-                                       }
-                               }
-
-                               filename = ucl_object_tostring (filenameo);
-
-                               sizeo = ucl_object_find_key (stf->opts, "size");
-
-                               if (sizeo == NULL || ucl_object_type (sizeo) != UCL_INT) {
-                                       msg_err_config ("statfile %s has no size defined", stf->symbol);
-                                       curst = curst->next;
-                                       continue;
-                               }
-
-                               size = ucl_object_toint (sizeo);
-
-                               rspamd_mmaped_file_open (new, filename, size, stf);
-
-                               ctx->statfiles ++;
+       filename = ucl_object_tostring (filenameo);
 
-                               curst = curst->next;
-                       }
-               }
+       sizeo = ucl_object_find_key (stf->opts, "size");
 
-               cur = g_list_next (cur);
+       if (sizeo == NULL || ucl_object_type (sizeo) != UCL_INT) {
+               msg_err_config ("statfile %s has no size defined", stf->symbol);
+               return NULL;
        }
 
-       return (gpointer)new;
+       size = ucl_object_toint (sizeo);
+       mf = rspamd_mmaped_file_open (cfg->cfg_pool, filename, size, stf);
+       mf->pool = cfg->cfg_pool;
+
+       return (gpointer)mf;
 }
 
 void
 rspamd_mmaped_file_close (gpointer p)
 {
-       rspamd_mmaped_file_ctx *ctx = (rspamd_mmaped_file_ctx *)p;
-       GHashTableIter it;
-       gpointer k, v;
-       rspamd_mmaped_file_t *mf;
-
-       g_assert (ctx != NULL);
+       rspamd_mmaped_file_t *mf = p;
 
-       rspamd_mempool_lock_mutex (ctx->lock);
-       g_hash_table_iter_init (&it, ctx->files);
+       g_assert (p != NULL);
 
-       while (g_hash_table_iter_next (&it, &k, &v)) {
-               mf = v;
-               rspamd_mmaped_file_close_file (ctx, mf);
-       }
+       rspamd_mmaped_file_close_file (mf->pool, mf);
 
-       g_hash_table_unref (ctx->files);
-       rspamd_mempool_unlock_mutex (ctx->lock);
-       /* XXX: we don't delete pool here to avoid deadlocks */
 }
 
 gpointer
@@ -943,44 +838,7 @@ rspamd_mmaped_file_runtime (struct rspamd_task *task,
                gboolean learn,
                gpointer p)
 {
-       rspamd_mmaped_file_ctx *ctx = (rspamd_mmaped_file_ctx *)p;
-       rspamd_mmaped_file_t *mf;
-       const ucl_object_t *filenameo, *sizeo;
-       const gchar *filename;
-       gsize size;
-
-       g_assert (ctx != NULL);
-
-       mf = rspamd_mmaped_file_is_open (ctx, stcf);
-
-       if (mf == NULL) {
-               /* Create file here */
-
-               filenameo = ucl_object_find_key (stcf->opts, "filename");
-               if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
-                       filenameo = ucl_object_find_key (stcf->opts, "path");
-                       if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
-                               msg_err_task ("statfile %s has no filename defined", stcf->symbol);
-                               return NULL;
-                       }
-               }
-
-               filename = ucl_object_tostring (filenameo);
-
-               sizeo = ucl_object_find_key (stcf->opts, "size");
-               if (sizeo == NULL || ucl_object_type (sizeo) != UCL_INT) {
-                       msg_err_task ("statfile %s has no size defined", stcf->symbol);
-                       return NULL;
-               }
-
-               size = ucl_object_toint (sizeo);
-
-               if (learn) {
-                       rspamd_mmaped_file_create (ctx, filename, size, stcf, task->task_pool);
-               }
-
-               mf = rspamd_mmaped_file_open (ctx, filename, size, stcf);
-       }
+       rspamd_mmaped_file_t *mf = p;
 
        return (gpointer)mf;
 }
@@ -990,24 +848,14 @@ rspamd_mmaped_file_process_token (struct rspamd_task *task, rspamd_token_t *tok,
                struct rspamd_token_result *res,
                gpointer p)
 {
-       rspamd_mmaped_file_ctx *ctx = (rspamd_mmaped_file_ctx *)p;
-       rspamd_mmaped_file_t *mf;
+       rspamd_mmaped_file_t *mf = p;
        guint32 h1, h2;
 
        g_assert (res != NULL);
        g_assert (p != NULL);
-       g_assert (res->st_runtime != NULL);
        g_assert (tok != NULL);
        g_assert (tok->datalen >= sizeof (guint32) * 2);
 
-       mf = (rspamd_mmaped_file_t *)res->st_runtime->backend_runtime;
-
-       if (mf == NULL) {
-               /* Statfile is does not exist, so all values are zero */
-               res->value = 0.0;
-               return FALSE;
-       }
-
        memcpy (&h1, tok->data, sizeof (h1));
        memcpy (&h2, tok->data + sizeof (h1), sizeof (h2));
        res->value = rspamd_mmaped_file_get_block (ctx, mf, h1, h2);
@@ -1024,24 +872,14 @@ rspamd_mmaped_file_learn_token (struct rspamd_task *task, rspamd_token_t *tok,
                struct rspamd_token_result *res,
                gpointer p)
 {
-       rspamd_mmaped_file_ctx *ctx = (rspamd_mmaped_file_ctx *)p;
-       rspamd_mmaped_file_t *mf;
+       rspamd_mmaped_file_t *mf = p;
        guint32 h1, h2;
 
        g_assert (res != NULL);
        g_assert (p != NULL);
-       g_assert (res->st_runtime != NULL);
        g_assert (tok != NULL);
        g_assert (tok->datalen >= sizeof (guint32) * 2);
 
-       mf = (rspamd_mmaped_file_t *)res->st_runtime->backend_runtime;
-
-       if (mf == NULL) {
-               /* Statfile is does not exist, so all values are zero */
-               res->value = 0.0;
-               return FALSE;
-       }
-
        memcpy (&h1, tok->data, sizeof (h1));
        memcpy (&h2, tok->data + sizeof (h1), sizeof (h2));
        rspamd_mmaped_file_set_block (task->task_pool, ctx, mf, h1, h2, res->value);