diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-01-17 21:53:49 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-01-18 23:05:15 +0000 |
commit | 665166c376a54f52b070e891780ca6209bbaa2d1 (patch) | |
tree | 034b58919947b10b1c5adb85796bc8d1a8ea0ed7 /src/libserver | |
parent | 5d9fc64987e526b3a3cccd3dcb5f980ccc8b83cb (diff) | |
download | rspamd-665166c376a54f52b070e891780ca6209bbaa2d1.tar.gz rspamd-665166c376a54f52b070e891780ca6209bbaa2d1.zip |
Start refactoring of statistics in rspamd.
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/cfg_file.h | 47 | ||||
-rw-r--r-- | src/libserver/cfg_rcl.c | 28 | ||||
-rw-r--r-- | src/libserver/cfg_utils.c | 6 | ||||
-rw-r--r-- | src/libserver/statfile.c | 1083 | ||||
-rw-r--r-- | src/libserver/statfile.h | 310 |
5 files changed, 3 insertions, 1471 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 5844a945f..1beb51055 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -125,43 +125,6 @@ struct rspamd_symbols_group { GList *symbols; }; -/** - * Statfile section definition - */ -struct rspamd_statfile_section { - guint32 code; /**< section's code */ - guint64 size; /**< size of section */ - double weight; /**< weight coefficient for section */ -}; - -/** - * Statfile autolearn parameters - */ -struct statfile_autolearn_params { - const gchar *metric; /**< metric name for autolearn triggering */ - double threshold_min; /**< threshold mark */ - double threshold_max; /**< threshold mark */ - GList *symbols; /**< list of symbols */ -}; - -/** - * Sync affinity - */ -enum sync_affinity { - AFFINITY_NONE = 0, - AFFINITY_MASTER, - AFFINITY_SLAVE -}; - -/** - * Binlog params - */ -struct statfile_binlog_params { - enum sync_affinity affinity; - time_t rotate_time; - gchar *master_addr; - guint16 master_port; -}; typedef double (*statfile_normalize_func)(struct rspamd_config *cfg, long double score, void *params); @@ -171,15 +134,7 @@ typedef double (*statfile_normalize_func)(struct rspamd_config *cfg, */ struct rspamd_statfile_config { gchar *symbol; /**< symbol of statfile */ - gchar *path; /**< filesystem pattern (with %r or %f) */ gchar *label; /**< label of this statfile */ - gsize size; /**< size of statfile */ - GList *sections; /**< list of sections in statfile */ - struct statfile_autolearn_params *autolearn; /**< autolearn params */ - struct statfile_binlog_params *binlog; /**< binlog params */ - statfile_normalize_func normalizer; /**< function that is used as normaliser */ - void *normalizer_data; /**< normalizer function params */ - gchar *normalizer_str; /**< source string (for dump) */ ucl_object_t *opts; /**< other options */ gboolean is_spam; /**< spam flag */ }; @@ -193,7 +148,7 @@ struct rspamd_classifier_config { gchar *metric; /**< metric of this classifier */ struct classifier *classifier; /**< classifier interface */ struct tokenizer *tokenizer; /**< tokenizer used for classifier */ - GHashTable *opts; /**< other options */ + ucl_object_t *opts; /**< other options */ GList *pre_callbacks; /**< list of callbacks that are called before classification */ GList *post_callbacks; /**< list of callbacks that are called after classification */ }; diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index 6c77292aa..921464219 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -889,14 +889,6 @@ rspamd_rcl_statfile_handler (struct rspamd_config *cfg, const ucl_object_t *obj, return FALSE; } - if (st->path == NULL) { - g_set_error (err, - CFG_RCL_ERROR, - EINVAL, - "statfile must have a path defined"); - return FALSE; - } - st->opts = (ucl_object_t *)obj; val = ucl_object_find_key (obj, "spam"); @@ -967,7 +959,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg, if (found == NULL) { ccf = rspamd_config_new_classifier (cfg, NULL); - ccf->classifier = get_classifier (type); + ccf->classifier = rspamd_stat_get_classifier (type); } else { ccf = found; @@ -997,13 +989,7 @@ rspamd_rcl_classifier_handler (struct rspamd_config *cfg, } else if (g_ascii_strcasecmp (key, "tokenizer") == 0 && val->type == UCL_STRING) { - ccf->tokenizer = get_tokenizer (ucl_object_tostring (val)); - } - else { - /* Just insert a value of option to the hash */ - g_hash_table_insert (ccf->opts, - (gpointer)key, - (gpointer)ucl_object_tostring_forced (val)); + ccf->tokenizer = rspamd_stat_get_tokenizer (ucl_object_tostring (val)); } } } @@ -1404,21 +1390,11 @@ rspamd_rcl_config_init (void) G_STRUCT_OFFSET (struct rspamd_statfile_config, symbol), 0); rspamd_rcl_add_default_handler (ssub, - "path", - rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct rspamd_statfile_config, path), - RSPAMD_CL_FLAG_STRING_PATH); - rspamd_rcl_add_default_handler (ssub, "label", rspamd_rcl_parse_struct_string, G_STRUCT_OFFSET (struct rspamd_statfile_config, label), 0); rspamd_rcl_add_default_handler (ssub, - "size", - rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct rspamd_statfile_config, size), - RSPAMD_CL_FLAG_INT_SIZE); - rspamd_rcl_add_default_handler (ssub, "spam", rspamd_rcl_parse_struct_boolean, G_STRUCT_OFFSET (struct rspamd_statfile_config, is_spam), diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index b53a2690c..c9a9555b1 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -498,12 +498,6 @@ rspamd_config_new_classifier (struct rspamd_config *cfg, rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_classifier_config)); } - if (c->opts == NULL) { - c->opts = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_add_destructor (cfg->cfg_pool, - (rspamd_mempool_destruct_t) g_hash_table_destroy, - c->opts); - } if (c->labels == NULL) { c->labels = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, diff --git a/src/libserver/statfile.c b/src/libserver/statfile.c deleted file mode 100644 index 066671a95..000000000 --- a/src/libserver/statfile.c +++ /dev/null @@ -1,1083 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "statfile.h" -#include "main.h" - -#define RSPAMD_STATFILE_VERSION {'1', '2'} -#define BACKUP_SUFFIX ".old" - -/* Maximum number of statistics files */ -#define STATFILES_MAX 255 -static void statfile_pool_set_block_common ( - statfile_pool_t * pool, stat_file_t * file, - guint32 h1, guint32 h2, - time_t t, double value, - gboolean from_now); - -static gint -cmpstatfile (const void *a, const void *b) -{ - const stat_file_t *s1 = a, *s2 = b; - - return g_ascii_strcasecmp (s1->filename, s2->filename); -} - -/* Convert statfile version 1.0 to statfile version 1.2, saving backup */ -struct stat_file_header_10 { - u_char magic[3]; /**< magic signature ('r' 's' 'd') */ - u_char version[2]; /**< version of statfile */ - u_char padding[3]; /**< padding */ - guint64 create_time; /**< create time (time_t->guint64) */ -}; - -static gboolean -convert_statfile_10 (stat_file_t * file) -{ - gchar *backup_name; - struct stat st; - struct stat_file_header header = { - .magic = {'r', 's', 'd'}, - .version = RSPAMD_STATFILE_VERSION, - .padding = {0, 0, 0}, - .revision = 0, - .rev_time = 0 - }; - - - /* Format backup name */ - backup_name = g_strdup_printf ("%s.%s", file->filename, BACKUP_SUFFIX); - - msg_info ("convert old statfile %s to version %c.%c, backup in %s", - file->filename, - header.version[0], - header.version[1], - backup_name); - - if (stat (backup_name, &st) != -1) { - msg_info ("replace old %s", backup_name); - unlink (backup_name); - } - - rename (file->filename, backup_name); - g_free (backup_name); - - /* XXX: maybe race condition here */ - rspamd_file_unlock (file->fd, FALSE); - close (file->fd); - if ((file->fd = - open (file->filename, O_RDWR | O_TRUNC | O_CREAT, - S_IWUSR | S_IRUSR)) == -1) { - msg_info ("cannot create file %s, error %d, %s", - file->filename, - errno, - strerror (errno)); - return FALSE; - } - rspamd_file_lock (file->fd, FALSE); - /* Now make new header and copy it to new file */ - if (write (file->fd, &header, sizeof (header)) == -1) { - msg_info ("cannot write to file %s, error %d, %s", - file->filename, - errno, - strerror (errno)); - return FALSE; - } - /* Now write old map to new file */ - if (write (file->fd, - ((u_char *)file->map + sizeof (struct stat_file_header_10)), - file->len - sizeof (struct stat_file_header_10)) == -1) { - msg_info ("cannot write to file %s, error %d, %s", - file->filename, - errno, - strerror (errno)); - return FALSE; - } - /* Unmap old memory and map new */ - munmap (file->map, file->len); - file->len = file->len + sizeof (struct stat_file_header) - - sizeof (struct stat_file_header_10); -#ifdef HAVE_MMAP_NOCORE - if ((file->map = - mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NOCORE, - file->fd, 0)) == MAP_FAILED) { -#else - if ((file->map = - mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, - 0)) == MAP_FAILED) { -#endif - msg_info ("cannot mmap file %s, error %d, %s", - file->filename, - errno, - strerror (errno)); - return FALSE; - } - - return TRUE; -} - -/* Check whether specified file is statistic file and calculate its len in blocks */ -static gint -statfile_pool_check (stat_file_t * file) -{ - struct stat_file *f; - gchar *c; - static gchar valid_version[] = RSPAMD_STATFILE_VERSION; - - - if (!file || !file->map) { - return -1; - } - - if (file->len < sizeof (struct stat_file)) { - msg_info ("file %s is too short to be stat file: %z", - file->filename, - file->len); - return -1; - } - - f = (struct stat_file *)file->map; - c = f->header.magic; - /* Check magic and version */ - if (*c++ != 'r' || *c++ != 's' || *c++ != 'd') { - msg_info ("file %s is invalid stat file", file->filename); - return -1; - } - /* Now check version and convert old version to new one (that can be used for sync */ - if (*c == 1 && *(c + 1) == 0) { - if (!convert_statfile_10 (file)) { - return -1; - } - f = (struct stat_file *)file->map; - } - else if (memcmp (c, valid_version, sizeof (valid_version)) != 0) { - /* Unknown version */ - msg_info ("file %s has invalid version %c.%c", - file->filename, - '0' + *c, - '0' + *(c + 1)); - return -1; - } - - /* Check first section and set new offset */ - file->cur_section.code = f->section.code; - file->cur_section.length = f->section.length; - if (file->cur_section.length * sizeof (struct stat_file_block) > - file->len) { - msg_info ("file %s is truncated: %z, must be %z", - file->filename, - file->len, - file->cur_section.length * sizeof (struct stat_file_block)); - return -1; - } - file->seek_pos = sizeof (struct stat_file) - - sizeof (struct stat_file_block); - - return 0; -} - - -statfile_pool_t * -statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock) -{ - statfile_pool_t *new; - - new = rspamd_mempool_alloc0 (pool, sizeof (statfile_pool_t)); - new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - new->files = - rspamd_mempool_alloc0 (new->pool, STATFILES_MAX * sizeof (stat_file_t)); - new->lock = rspamd_mempool_get_mutex (new->pool); - new->mlock_ok = use_mlock; - - return new; -} - -static stat_file_t * -statfile_pool_reindex (statfile_pool_t * pool, - gchar *filename, - size_t old_size, - size_t size) -{ - gchar *backup; - gint fd; - stat_file_t *new; - u_char *map, *pos; - struct stat_file_block *block; - struct stat_file_header *header; - - if (size < - sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + - sizeof (block)) { - msg_err ("file %s is too small to carry any statistic: %z", - filename, - size); - return NULL; - } - - /* First of all rename old file */ - rspamd_mempool_lock_mutex (pool->lock); - - backup = g_strconcat (filename, ".old", NULL); - if (rename (filename, backup) == -1) { - msg_err ("cannot rename %s to %s: %s", filename, backup, strerror ( - errno)); - g_free (backup); - rspamd_mempool_unlock_mutex (pool->lock); - return NULL; - } - - rspamd_mempool_unlock_mutex (pool->lock); - - /* Now create new file with required size */ - if (statfile_pool_create (pool, filename, size) != 0) { - msg_err ("cannot create new file"); - g_free (backup); - return NULL; - } - /* Now open new file and start copying */ - fd = open (backup, O_RDONLY); - new = statfile_pool_open (pool, filename, size, TRUE); - - if (fd == -1 || new == NULL) { - msg_err ("cannot open file: %s", strerror (errno)); - g_free (backup); - return NULL; - } - - /* Now start reading blocks from old statfile */ - if ((map = - mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { - msg_err ("cannot mmap file: %s", strerror (errno)); - close (fd); - g_free (backup); - return NULL; - } - - pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block)); - while (old_size - (pos - map) >= sizeof (struct stat_file_block)) { - block = (struct stat_file_block *)pos; - if (block->hash1 != 0 && block->value != 0) { - statfile_pool_set_block_common (pool, - new, - block->hash1, - block->hash2, - 0, - block->value, - FALSE); - } - pos += sizeof (block); - } - - header = (struct stat_file_header *)map; - statfile_set_revision (new, header->revision, header->rev_time); - - munmap (map, old_size); - close (fd); - unlink (backup); - g_free (backup); - - return new; - -} - -/* - * Pre-load mmaped file into memory - */ -static void -statfile_preload (stat_file_t *file) -{ - guint8 *pos, *end; - volatile guint8 t; - gsize size; - - pos = (guint8 *)file->map; - end = (guint8 *)file->map + file->len; - - if (madvise (pos, end - pos, MADV_SEQUENTIAL) == -1) { - msg_info ("madvise failed: %s", strerror (errno)); - } - else { - /* Load pages of file */ -#ifdef HAVE_GETPAGESIZE - size = getpagesize (); -#else - size = sysconf (_SC_PAGESIZE); -#endif - while (pos < end) { - t = *pos; - (void)t; - pos += size; - } - } -} - -stat_file_t * -statfile_pool_open (statfile_pool_t * pool, - gchar *filename, - size_t size, - gboolean forced) -{ - struct stat st; - stat_file_t *new_file; - - if ((new_file = statfile_pool_is_open (pool, filename)) != NULL) { - return new_file; - } - - if (pool->opened >= STATFILES_MAX - 1) { - msg_err ("reached hard coded limit of statfiles opened: %d", - STATFILES_MAX); - return NULL; - } - - if (stat (filename, &st) == -1) { - msg_info ("cannot stat file %s, error %s, %d", filename, strerror ( - errno), errno); - return NULL; - } - - rspamd_mempool_lock_mutex (pool->lock); - if (!forced && - labs (size - st.st_size) > (long)sizeof (struct stat_file) * 2 - && size > sizeof (struct stat_file)) { - rspamd_mempool_unlock_mutex (pool->lock); - msg_warn ("need to reindex statfile old size: %Hz, new size: %Hz", - (size_t)st.st_size, size); - return statfile_pool_reindex (pool, filename, st.st_size, size); - } - else if (size < sizeof (struct stat_file)) { - msg_err ("requested to shrink statfile to %Hz but it is too small", - size); - } - - new_file = &pool->files[pool->opened++]; - bzero (new_file, sizeof (stat_file_t)); - if ((new_file->fd = open (filename, O_RDWR)) == -1) { - msg_info ("cannot open file %s, error %d, %s", - filename, - errno, - strerror (errno)); - rspamd_mempool_unlock_mutex (pool->lock); - pool->opened--; - return NULL; - } - - if ((new_file->map = - mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, - new_file->fd, 0)) == MAP_FAILED) { - close (new_file->fd); - rspamd_mempool_unlock_mutex (pool->lock); - msg_info ("cannot mmap file %s, error %d, %s", - filename, - errno, - strerror (errno)); - pool->opened--; - return NULL; - - } - - rspamd_strlcpy (new_file->filename, filename, sizeof (new_file->filename)); - new_file->len = st.st_size; - /* Try to lock pages in RAM */ - if (pool->mlock_ok) { - if (mlock (new_file->map, new_file->len) == -1) { - msg_warn ( - "mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", - strerror (errno)); - pool->mlock_ok = FALSE; - } - } - /* Acquire lock for this operation */ - rspamd_file_lock (new_file->fd, FALSE); - if (statfile_pool_check (new_file) == -1) { - pool->opened--; - rspamd_mempool_unlock_mutex (pool->lock); - rspamd_file_unlock (new_file->fd, FALSE); - munmap (new_file->map, st.st_size); - return NULL; - } - rspamd_file_unlock (new_file->fd, FALSE); - - new_file->open_time = time (NULL); - new_file->access_time = new_file->open_time; - new_file->lock = rspamd_mempool_get_mutex (pool->pool); - - statfile_preload (new_file); - - rspamd_mempool_unlock_mutex (pool->lock); - - return statfile_pool_is_open (pool, filename); -} - -gint -statfile_pool_close (statfile_pool_t * pool, - stat_file_t * file, - gboolean keep_sorted) -{ - stat_file_t *pos; - - if ((pos = statfile_pool_is_open (pool, file->filename)) == NULL) { - msg_info ("file %s is not opened", file->filename); - return -1; - } - - rspamd_mempool_lock_mutex (pool->lock); - - if (file->map) { - msg_info ("syncing statfile %s", file->filename); - msync (file->map, file->len, MS_ASYNC); - munmap (file->map, file->len); - } - if (file->fd != -1) { - close (file->fd); - } - /* Move the remain statfiles */ - memmove (pos, ((guint8 *)pos) + sizeof (stat_file_t), - (--pool->opened - (pos - pool->files)) * sizeof (stat_file_t)); - - rspamd_mempool_unlock_mutex (pool->lock); - - return 0; -} - -gint -statfile_pool_create (statfile_pool_t * pool, gchar *filename, size_t size) -{ - struct stat_file_header header = { - .magic = {'r', 's', 'd'}, - .version = RSPAMD_STATFILE_VERSION, - .padding = {0, 0, 0}, - .revision = 0, - .rev_time = 0, - .used_blocks = 0 - }; - struct stat_file_section section = { - .code = STATFILE_SECTION_COMMON, - }; - struct stat_file_block block = { 0, 0, 0 }; - gint fd; - guint buflen = 0, nblocks; - gchar *buf = NULL; - - if (statfile_pool_is_open (pool, filename) != NULL) { - msg_info ("file %s is already opened", filename); - return 0; - } - - if (size < - sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + - sizeof (block)) { - msg_err ("file %s is too small to carry any statistic: %z", - filename, - size); - return -1; - } - - rspamd_mempool_lock_mutex (pool->lock); - nblocks = - (size - sizeof (struct stat_file_header) - - sizeof (struct stat_file_section)) / sizeof (struct stat_file_block); - header.total_blocks = nblocks; - - if ((fd = - open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { - msg_info ("cannot create file %s, error %d, %s", - filename, - errno, - strerror (errno)); - rspamd_mempool_unlock_mutex (pool->lock); - return -1; - } - - rspamd_fallocate (fd, - 0, - sizeof (header) + sizeof (section) + sizeof (block) * nblocks); - - header.create_time = (guint64) time (NULL); - if (write (fd, &header, sizeof (header)) == -1) { - msg_info ("cannot write header to file %s, error %d, %s", - filename, - errno, - strerror (errno)); - close (fd); - rspamd_mempool_unlock_mutex (pool->lock); - return -1; - } - - section.length = (guint64) nblocks; - if (write (fd, §ion, sizeof (section)) == -1) { - msg_info ("cannot write section header to file %s, error %d, %s", - filename, - errno, - strerror (errno)); - close (fd); - rspamd_mempool_unlock_mutex (pool->lock); - return -1; - } - - /* Buffer for write 256 blocks at once */ - if (nblocks > 256) { - buflen = sizeof (block) * 256; - buf = g_malloc0 (buflen); - } - - while (nblocks) { - if (nblocks > 256) { - /* Just write buffer */ - if (write (fd, buf, buflen) == -1) { - msg_info ("cannot write blocks buffer to file %s, error %d, %s", - filename, - errno, - strerror (errno)); - close (fd); - rspamd_mempool_unlock_mutex (pool->lock); - g_free (buf); - return -1; - } - nblocks -= 256; - } - else { - if (write (fd, &block, sizeof (block)) == -1) { - msg_info ("cannot write block to file %s, error %d, %s", - filename, - errno, - strerror (errno)); - close (fd); - if (buf) { - g_free (buf); - } - rspamd_mempool_unlock_mutex (pool->lock); - return -1; - } - nblocks--; - } - } - - close (fd); - rspamd_mempool_unlock_mutex (pool->lock); - - if (buf) { - g_free (buf); - } - - return 0; -} - -void -statfile_pool_delete (statfile_pool_t * pool) -{ - gint i; - - for (i = 0; i < pool->opened; i++) { - statfile_pool_close (pool, &pool->files[i], FALSE); - } - rspamd_mempool_delete (pool->pool); -} - -void -statfile_pool_lock_file (statfile_pool_t * pool, stat_file_t * file) -{ - - rspamd_mempool_lock_mutex (file->lock); -} - -void -statfile_pool_unlock_file (statfile_pool_t * pool, stat_file_t * file) -{ - - rspamd_mempool_unlock_mutex (file->lock); -} - -double -statfile_pool_get_block (statfile_pool_t * pool, - stat_file_t * file, - guint32 h1, - guint32 h2, - time_t now) -{ - struct stat_file_block *block; - guint i, blocknum; - u_char *c; - - - file->access_time = now; - if (!file->map) { - return 0; - } - - blocknum = h1 % file->cur_section.length; - c = (u_char *) file->map + file->seek_pos + blocknum * - sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - - for (i = 0; i < CHAIN_LENGTH; i++) { - if (i + blocknum >= file->cur_section.length) { - break; - } - if (block->hash1 == h1 && block->hash2 == h2) { - return block->value; - } - c += sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - } - - - return 0; -} - -static void -statfile_pool_set_block_common (statfile_pool_t * pool, - stat_file_t * file, - guint32 h1, - guint32 h2, - time_t t, - double value, - gboolean from_now) -{ - struct stat_file_block *block, *to_expire = NULL; - struct stat_file_header *header; - guint i, blocknum; - u_char *c; - double min = G_MAXDOUBLE; - - if (from_now) { - file->access_time = t; - } - if (!file->map) { - return; - } - - blocknum = h1 % file->cur_section.length; - header = (struct stat_file_header *)file->map; - c = (u_char *) file->map + file->seek_pos + blocknum * - sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - - for (i = 0; i < CHAIN_LENGTH; i++) { - if (i + blocknum >= file->cur_section.length) { - /* Need to expire some block in chain */ - msg_info ("chain %ud is full in statfile %s, starting expire", - blocknum, - file->filename); - break; - } - /* First try to find block in chain */ - if (block->hash1 == h1 && block->hash2 == h2) { - block->value = value; - return; - } - /* Check whether we have a free block in chain */ - if (block->hash1 == 0 && block->hash2 == 0) { - /* Write new block here */ - msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", - i, - blocknum, - h1, - h2); - block->hash1 = h1; - block->hash2 = h2; - block->value = value; - header->used_blocks++; - - return; - } - - /* Expire block with minimum value otherwise */ - if (block->value < min) { - to_expire = block; - min = block->value; - } - c += sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - } - - /* Try expire some block */ - if (to_expire) { - block = to_expire; - } - else { - /* Expire first block in chain */ - c = (u_char *) file->map + file->seek_pos + blocknum * - sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - } - - block->hash1 = h1; - block->hash2 = h2; - block->value = value; -} - -void -statfile_pool_set_block (statfile_pool_t * pool, - stat_file_t * file, - guint32 h1, - guint32 h2, - time_t now, - double value) -{ - statfile_pool_set_block_common (pool, file, h1, h2, now, value, TRUE); -} - -stat_file_t * -statfile_pool_is_open (statfile_pool_t * pool, gchar *filename) -{ - static stat_file_t f, *ret; - rspamd_strlcpy (f.filename, filename, sizeof (f.filename)); - ret = lfind (&f, - pool->files, - (size_t *)&pool->opened, - sizeof (stat_file_t), - cmpstatfile); - return ret; -} - -guint32 -statfile_pool_get_section (statfile_pool_t * pool, stat_file_t * file) -{ - - return file->cur_section.code; -} - -gboolean -statfile_pool_set_section (statfile_pool_t * pool, - stat_file_t * file, - guint32 code, - gboolean from_begin) -{ - struct stat_file_section *sec; - off_t cur_offset; - - - /* Try to find section */ - if (from_begin) { - cur_offset = sizeof (struct stat_file_header); - } - else { - cur_offset = file->seek_pos - sizeof (struct stat_file_section); - } - while (cur_offset < (off_t)file->len) { - sec = (struct stat_file_section *)((gchar *)file->map + cur_offset); - if (sec->code == code) { - file->cur_section.code = code; - file->cur_section.length = sec->length; - file->seek_pos = cur_offset + sizeof (struct stat_file_section); - return TRUE; - } - cur_offset += sec->length; - } - - return FALSE; -} - -gboolean -statfile_pool_add_section (statfile_pool_t * pool, - stat_file_t * file, - guint32 code, - guint64 length) -{ - struct stat_file_section sect; - struct stat_file_block block = { 0, 0, 0 }; - - if (lseek (file->fd, 0, SEEK_END) == -1) { - msg_info ("cannot lseek file %s, error %d, %s", - file->filename, - errno, - strerror (errno)); - return FALSE; - } - - sect.code = code; - sect.length = length; - - if (write (file->fd, §, sizeof (sect)) == -1) { - msg_info ("cannot write block to file %s, error %d, %s", - file->filename, - errno, - strerror (errno)); - return FALSE; - } - - while (length--) { - if (write (file->fd, &block, sizeof (block)) == -1) { - msg_info ("cannot write block to file %s, error %d, %s", - file->filename, - errno, - strerror (errno)); - return FALSE; - } - } - - /* Lock statfile to remap memory */ - statfile_pool_lock_file (pool, file); - munmap (file->map, file->len); - fsync (file->fd); - file->len += length; - - if ((file->map = - mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, - 0)) == NULL) { - msg_info ("cannot mmap file %s, error %d, %s", - file->filename, - errno, - strerror (errno)); - return FALSE; - } - statfile_pool_unlock_file (pool, file); - - return TRUE; - -} - -guint32 -statfile_get_section_by_name (const gchar *name) -{ - if (g_ascii_strcasecmp (name, "common") == 0) { - return STATFILE_SECTION_COMMON; - } - else if (g_ascii_strcasecmp (name, "header") == 0) { - return STATFILE_SECTION_HEADERS; - } - else if (g_ascii_strcasecmp (name, "url") == 0) { - return STATFILE_SECTION_URLS; - } - else if (g_ascii_strcasecmp (name, "regexp") == 0) { - return STATFILE_SECTION_REGEXP; - } - - return 0; -} - -gboolean -statfile_set_revision (stat_file_t *file, guint64 rev, time_t time) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return FALSE; - } - - header = (struct stat_file_header *)file->map; - - header->revision = rev; - header->rev_time = time; - - return TRUE; -} - -gboolean -statfile_inc_revision (stat_file_t *file) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return FALSE; - } - - header = (struct stat_file_header *)file->map; - - header->revision++; - - return TRUE; -} - -gboolean -statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return FALSE; - } - - header = (struct stat_file_header *)file->map; - - if (rev != NULL) { - *rev = header->revision; - } - if (time != NULL) { - *time = header->rev_time; - } - - return TRUE; -} - -guint64 -statfile_get_used_blocks (stat_file_t *file) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return (guint64) - 1; - } - - header = (struct stat_file_header *)file->map; - - return header->used_blocks; -} - -guint64 -statfile_get_total_blocks (stat_file_t *file) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return (guint64) - 1; - } - - header = (struct stat_file_header *)file->map; - - /* If total blocks is 0 we have old version of header, so set total blocks correctly */ - if (header->total_blocks == 0) { - header->total_blocks = file->cur_section.length; - } - - return header->total_blocks; -} - -static void -statfile_pool_invalidate_callback (gint fd, short what, void *ud) -{ - statfile_pool_t *pool = ud; - stat_file_t *file; - gint i; - - msg_info ("invalidating %d statfiles", pool->opened); - - for (i = 0; i < pool->opened; i++) { - file = &pool->files[i]; - msync (file->map, file->len, MS_ASYNC); - } - -} - - -void -statfile_pool_plan_invalidate (statfile_pool_t *pool, - time_t seconds, - time_t jitter) -{ - gboolean pending; - - - if (pool->invalidate_event != NULL) { - pending = evtimer_pending (pool->invalidate_event, NULL); - if (pending) { - /* Replan event */ - pool->invalidate_tv.tv_sec = seconds + - g_random_int_range (0, jitter); - pool->invalidate_tv.tv_usec = 0; - evtimer_add (pool->invalidate_event, &pool->invalidate_tv); - } - } - else { - pool->invalidate_event = - rspamd_mempool_alloc (pool->pool, sizeof (struct event)); - pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); - pool->invalidate_tv.tv_usec = 0; - evtimer_set (pool->invalidate_event, - statfile_pool_invalidate_callback, - pool); - evtimer_add (pool->invalidate_event, &pool->invalidate_tv); - msg_info ("invalidate of statfile pool is planned in %d seconds", - (gint)pool->invalidate_tv.tv_sec); - } -} - - -stat_file_t * -get_statfile_by_symbol (statfile_pool_t *pool, - struct rspamd_classifier_config *ccf, - const gchar *symbol, - struct rspamd_statfile_config **st, - gboolean try_create) -{ - stat_file_t *res = NULL; - GList *cur; - - if (pool == NULL || ccf == NULL || symbol == NULL) { - msg_err ("invalid input arguments"); - return NULL; - } - - cur = g_list_first (ccf->statfiles); - while (cur) { - *st = cur->data; - if (strcmp (symbol, (*st)->symbol) == 0) { - break; - } - *st = NULL; - cur = g_list_next (cur); - } - if (*st == NULL) { - msg_info ("cannot find statfile with symbol %s", symbol); - return NULL; - } - - if ((res = statfile_pool_is_open (pool, (*st)->path)) == NULL) { - if ((res = - statfile_pool_open (pool, (*st)->path, (*st)->size, - FALSE)) == NULL) { - msg_warn ("cannot open %s", (*st)->path); - if (try_create) { - if (statfile_pool_create (pool, (*st)->path, - (*st)->size) == -1) { - msg_err ("cannot create statfile %s", (*st)->path); - return NULL; - } - res = - statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE); - if (res == NULL) { - msg_err ("cannot open statfile %s after creation", - (*st)->path); - } - } - } - } - - return res; -} - -void -statfile_pool_lockall (statfile_pool_t *pool) -{ - stat_file_t *file; - gint i; - - if (pool->mlock_ok) { - for (i = 0; i < pool->opened; i++) { - file = &pool->files[i]; - if (mlock (file->map, file->len) == -1) { - msg_warn ( - "mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", - strerror (errno)); - pool->mlock_ok = FALSE; - return; - } - } - } - /* Do not try to lock if mlock failed */ -} - diff --git a/src/libserver/statfile.h b/src/libserver/statfile.h deleted file mode 100644 index f7f632703..000000000 --- a/src/libserver/statfile.h +++ /dev/null @@ -1,310 +0,0 @@ -/** - * @file statfile.h - * Describes common methods for accessing statistics files and caching them in memory - */ - -#ifndef RSPAMD_STATFILE_H -#define RSPAMD_STATFILE_H - -#include "config.h" -#include "mem_pool.h" -#include "hash.h" - -#define CHAIN_LENGTH 128 - -/* Section types */ -#define STATFILE_SECTION_COMMON 1 -#define STATFILE_SECTION_HEADERS 2 -#define STATFILE_SECTION_URLS 3 -#define STATFILE_SECTION_REGEXP 4 - -#define DEFAULT_STATFILE_INVALIDATE_TIME 30 -#define DEFAULT_STATFILE_INVALIDATE_JITTER 30 - -/** - * Common statfile header - */ -struct stat_file_header { - u_char magic[3]; /**< magic signature ('r' 's' 'd') */ - u_char version[2]; /**< version of statfile */ - u_char padding[3]; /**< padding */ - guint64 create_time; /**< create time (time_t->guint64) */ - guint64 revision; /**< revision number */ - guint64 rev_time; /**< revision time */ - guint64 used_blocks; /**< used blocks number */ - guint64 total_blocks; /**< total number of blocks */ - u_char unused[239]; /**< some bytes that can be used in future */ -}; - -/** - * Section header - */ -struct stat_file_section { - guint64 code; /**< section's code */ - guint64 length; /**< section's length in blocks */ -}; - -/** - * Block of data in statfile - */ -struct stat_file_block { - guint32 hash1; /**< hash1 (also acts as index) */ - guint32 hash2; /**< hash2 */ - double value; /**< double value */ -}; - -/** - * Statistic file - */ -struct stat_file { - struct stat_file_header header; /**< header */ - struct stat_file_section section; /**< first section */ - struct stat_file_block blocks[1]; /**< first block of data */ -}; - -/** - * Common view of statfile object - */ -typedef struct stat_file_s { -#ifdef HAVE_PATH_MAX - gchar filename[PATH_MAX]; /**< name of file */ -#else - gchar filename[MAXPATHLEN]; /**< name of file */ -#endif - gint fd; /**< descriptor */ - void *map; /**< mmaped area */ - off_t seek_pos; /**< current seek position */ - struct stat_file_section cur_section; /**< current section */ - time_t open_time; /**< time when file was opened */ - time_t access_time; /**< last access time */ - size_t len; /**< length of file(in bytes) */ - rspamd_mempool_mutex_t *lock; /**< mutex */ -} stat_file_t; - -/** - * Statfiles pool - */ -typedef struct statfile_pool_s { - stat_file_t *files; /**< hash table of opened files indexed by name */ - void **maps; /**< shared hash table of mmaped areas indexed by name */ - gint opened; /**< number of opened files */ - rspamd_mempool_t *pool; /**< memory pool object */ - rspamd_mempool_mutex_t *lock; /**< mutex */ - struct event *invalidate_event; /**< event for pool invalidation */ - struct timeval invalidate_tv; - gboolean mlock_ok; /**< whether it is possible to use mlock (2) to avoid statfiles unloading */ -} statfile_pool_t; - -/* Forwarded declarations */ -struct rspamd_classifier_config; -struct rspamd_statfile_config; - -/** - * Create new statfile pool - * @param max_size maximum size - * @return statfile pool object - */ -statfile_pool_t * statfile_pool_new (rspamd_mempool_t *pool, - gboolean use_mlock); - -/** - * Open statfile and attach it to pool - * @param pool statfile pool object - * @param filename name of statfile to open - * @return 0 if specified statfile is attached and -1 in case of error - */ -stat_file_t * statfile_pool_open (statfile_pool_t *pool, - gchar *filename, - size_t len, - gboolean forced); - -/** - * Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching - * @param pool statfile pool object - * @param filename name of statfile to create - * @param len length of new statfile - * @return 0 if file was created and -1 in case of error - */ -gint statfile_pool_create (statfile_pool_t *pool, gchar *filename, size_t len); - -/** - * Close specified statfile - * @param pool statfile pool object - * @param filename name of statfile to close - * @param remove_hash remove filename from opened files hash also - * @return 0 if file was closed and -1 if statfile was not opened - */ -gint statfile_pool_close (statfile_pool_t *pool, - stat_file_t *file, - gboolean keep_sorted); - -/** - * Delete statfile pool and close all attached statfiles - * @param pool statfile pool object - */ -void statfile_pool_delete (statfile_pool_t *pool); - -/** - * Try to lock all statfiles in memory - * @param pool statfile pool object - */ -void statfile_pool_lockall (statfile_pool_t *pool); - -/** - * Lock specified file for exclusive use (eg. learning) - * @param pool statfile pool object - * @param filename name of statfile - */ -void statfile_pool_lock_file (statfile_pool_t *pool, stat_file_t *file); - -/** - * Unlock specified file - * @param pool statfile pool object - * @param filename name of statfile - */ -void statfile_pool_unlock_file (statfile_pool_t *pool, stat_file_t *file); - -/** - * Get block from statfile with h1 and h2 values, use time argument for current time - * @param pool statfile pool object - * @param filename name of statfile - * @param h1 h1 in file - * @param h2 h2 in file - * @param now current time - * @return block value or 0 if block is not found - */ -double statfile_pool_get_block (statfile_pool_t *pool, - stat_file_t *file, - guint32 h1, - guint32 h2, - time_t now); - -/** - * Set specified block in statfile - * @param pool statfile pool object - * @param filename name of statfile - * @param h1 h1 in file - * @param h2 h2 in file - * @param now current time - * @param value value of block - */ -void statfile_pool_set_block (statfile_pool_t *pool, - stat_file_t *file, - guint32 h1, - guint32 h2, - time_t now, - double value); - -/** - * Check whether statfile is opened - * @param pool statfile pool object - * @param filename name of statfile - * @return TRUE if specified statfile is opened and FALSE otherwise - */ -stat_file_t * statfile_pool_is_open (statfile_pool_t *pool, gchar *filename); - -/** - * Returns current statfile section - * @param pool statfile pool object - * @param filename name of statfile - * @return code of section or 0 if file is not opened - */ -guint32 statfile_pool_get_section (statfile_pool_t *pool, stat_file_t *file); - -/** - * Go to other section of statfile - * @param pool statfile pool object - * @param filename name of statfile - * @param code code of section to seek to - * @param from_begin search for section from begin of file if true - * @return TRUE if section was set and FALSE otherwise - */ -gboolean statfile_pool_set_section (statfile_pool_t *pool, - stat_file_t *file, - guint32 code, - gboolean from_begin); - -/** - * Add new section to statfile - * @param pool statfile pool object - * @param filename name of statfile - * @param code code of section to seek to - * @param length length in blocks of new section - * @return TRUE if section was successfully added and FALSE in case of error - */ -gboolean statfile_pool_add_section (statfile_pool_t *pool, - stat_file_t *file, - guint32 code, - guint64 length); - - -/** - * Return code of section identified by name - * @param name name of section - * @return code of section or 0 if name of section is unknown - */ -guint32 statfile_get_section_by_name (const gchar *name); - -/** - * Set statfile revision and revision time - * @param filename name of statfile - * @param revision number of revision - * @param time time of revision - * @return TRUE if revision was set - */ -gboolean statfile_set_revision (stat_file_t *file, guint64 rev, time_t time); - -/** - * Increment statfile revision and revision time - * @param filename name of statfile - * @param time time of revision - * @return TRUE if revision was set - */ -gboolean statfile_inc_revision (stat_file_t *file); - -/** - * Set statfile revision and revision time - * @param filename name of statfile - * @param revision saved number of revision - * @param time saved time of revision - * @return TRUE if revision was saved in rev and time - */ -gboolean statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time); - -/** - * Get statfile used blocks - * @param file file to get number of used blocks - * @return number of used blocks or (guint64)-1 in case of error - */ -guint64 statfile_get_used_blocks (stat_file_t *file); - -/** - * Get statfile total blocks - * @param file file to get number of used blocks - * @return number of used blocks or (guint64)-1 in case of error - */ -guint64 statfile_get_total_blocks (stat_file_t *file); - - -/** - * Plan statfile pool invalidation - */ -void statfile_pool_plan_invalidate (statfile_pool_t *pool, - time_t seconds, - time_t jitter); - -/** - * Get a statfile by symbol - * @param pool pool object - * @param ccf ccf classifier config - * @param symbol symbol to search - * @param st statfile to get - * @param try_create whether we need to create statfile if it is absent - */ -stat_file_t * get_statfile_by_symbol (statfile_pool_t *pool, - struct rspamd_classifier_config *ccf, - const gchar *symbol, - struct rspamd_statfile_config **st, - gboolean try_create); - -#endif |