diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-01-17 21:53:49 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-01-18 23:05:15 +0000 |
commit | 665166c376a54f52b070e891780ca6209bbaa2d1 (patch) | |
tree | 034b58919947b10b1c5adb85796bc8d1a8ea0ed7 /src/libstat | |
parent | 5d9fc64987e526b3a3cccd3dcb5f980ccc8b83cb (diff) | |
download | rspamd-665166c376a54f52b070e891780ca6209bbaa2d1.tar.gz rspamd-665166c376a54f52b070e891780ca6209bbaa2d1.zip |
Start refactoring of statistics in rspamd.
Diffstat (limited to 'src/libstat')
-rw-r--r-- | src/libstat/CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/libstat/backends.h | 43 | ||||
-rw-r--r-- | src/libstat/backends/backends.c | 47 | ||||
-rw-r--r-- | src/libstat/backends/mmaped_file.c | 1083 | ||||
-rw-r--r-- | src/libstat/backends/mmaped_file.h | 310 | ||||
-rw-r--r-- | src/libstat/classifiers.h | 2 | ||||
-rw-r--r-- | src/libstat/classifiers/classifiers.c | 2 | ||||
-rw-r--r-- | src/libstat/stat_config.c | 30 | ||||
-rw-r--r-- | src/libstat/tokenizers.h | 2 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 5 |
10 files changed, 1522 insertions, 7 deletions
diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt index 810570f20..f1692de63 100644 --- a/src/libstat/CMakeLists.txt +++ b/src/libstat/CMakeLists.txt @@ -1,11 +1,14 @@ # Librspamdserver SET(LIBSTATSRC - ) + stat_config.c) SET(TOKENIZERSSRC tokenizers/tokenizers.c tokenizers/osb.c) SET(CLASSIFIERSSRC classifiers/classifiers.c classifiers/bayes.c) + +SET(BACKENDSSRC backends/backends.c + backends/mmaped_file.c) ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC}) IF(NOT DEBIAN_BUILD) diff --git a/src/libstat/backends.h b/src/libstat/backends.h new file mode 100644 index 000000000..04710b4b2 --- /dev/null +++ b/src/libstat/backends.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef BACKENDS_H_ +#define BACKENDS_H_ + +#include "config.h" +#include "cfg_file.h" + +#define RSPAMD_DEFAULT_BACKEND "mmap" + +struct rspamd_stat_backend { + const char *name; + gpointer (*init)(rspamd_mempool_t *pool, struct rspamd_statfile_config *cfg); + gpointer ctx; +}; + +extern struct rspamd_stat_backend statfile_backends[]; + +struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name); + +#endif /* BACKENDS_H_ */ diff --git a/src/libstat/backends/backends.c b/src/libstat/backends/backends.c new file mode 100644 index 000000000..815a66dbd --- /dev/null +++ b/src/libstat/backends/backends.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "main.h" +#include "backends.h" +#include "mmaped_file.h" + +struct rspamd_stat_backend statfile_backends[] = { + {RSPAMD_DEFAULT_BACKEND, } +}; + + +struct rspamd_stat_backend * +rspamd_stat_get_backend (const char *name) +{ + guint i; + + for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) { + if (strcmp (statfile_backends[i].name, name) == 0) { + return &statfile_backends[i]; + } + } + + return NULL; +} diff --git a/src/libstat/backends/mmaped_file.c b/src/libstat/backends/mmaped_file.c new file mode 100644 index 000000000..066671a95 --- /dev/null +++ b/src/libstat/backends/mmaped_file.c @@ -0,0 +1,1083 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "statfile.h" +#include "main.h" + +#define RSPAMD_STATFILE_VERSION {'1', '2'} +#define BACKUP_SUFFIX ".old" + +/* Maximum number of statistics files */ +#define STATFILES_MAX 255 +static void statfile_pool_set_block_common ( + statfile_pool_t * pool, stat_file_t * file, + guint32 h1, guint32 h2, + time_t t, double value, + gboolean from_now); + +static gint +cmpstatfile (const void *a, const void *b) +{ + const stat_file_t *s1 = a, *s2 = b; + + return g_ascii_strcasecmp (s1->filename, s2->filename); +} + +/* Convert statfile version 1.0 to statfile version 1.2, saving backup */ +struct stat_file_header_10 { + u_char magic[3]; /**< magic signature ('r' 's' 'd') */ + u_char version[2]; /**< version of statfile */ + u_char padding[3]; /**< padding */ + guint64 create_time; /**< create time (time_t->guint64) */ +}; + +static gboolean +convert_statfile_10 (stat_file_t * file) +{ + gchar *backup_name; + struct stat st; + struct stat_file_header header = { + .magic = {'r', 's', 'd'}, + .version = RSPAMD_STATFILE_VERSION, + .padding = {0, 0, 0}, + .revision = 0, + .rev_time = 0 + }; + + + /* Format backup name */ + backup_name = g_strdup_printf ("%s.%s", file->filename, BACKUP_SUFFIX); + + msg_info ("convert old statfile %s to version %c.%c, backup in %s", + file->filename, + header.version[0], + header.version[1], + backup_name); + + if (stat (backup_name, &st) != -1) { + msg_info ("replace old %s", backup_name); + unlink (backup_name); + } + + rename (file->filename, backup_name); + g_free (backup_name); + + /* XXX: maybe race condition here */ + rspamd_file_unlock (file->fd, FALSE); + close (file->fd); + if ((file->fd = + open (file->filename, O_RDWR | O_TRUNC | O_CREAT, + S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", + file->filename, + errno, + strerror (errno)); + return FALSE; + } + rspamd_file_lock (file->fd, FALSE); + /* Now make new header and copy it to new file */ + if (write (file->fd, &header, sizeof (header)) == -1) { + msg_info ("cannot write to file %s, error %d, %s", + file->filename, + errno, + strerror (errno)); + return FALSE; + } + /* Now write old map to new file */ + if (write (file->fd, + ((u_char *)file->map + sizeof (struct stat_file_header_10)), + file->len - sizeof (struct stat_file_header_10)) == -1) { + msg_info ("cannot write to file %s, error %d, %s", + file->filename, + errno, + strerror (errno)); + return FALSE; + } + /* Unmap old memory and map new */ + munmap (file->map, file->len); + file->len = file->len + sizeof (struct stat_file_header) - + sizeof (struct stat_file_header_10); +#ifdef HAVE_MMAP_NOCORE + if ((file->map = + mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NOCORE, + file->fd, 0)) == MAP_FAILED) { +#else + if ((file->map = + mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, + 0)) == MAP_FAILED) { +#endif + msg_info ("cannot mmap file %s, error %d, %s", + file->filename, + errno, + strerror (errno)); + return FALSE; + } + + return TRUE; +} + +/* Check whether specified file is statistic file and calculate its len in blocks */ +static gint +statfile_pool_check (stat_file_t * file) +{ + struct stat_file *f; + gchar *c; + static gchar valid_version[] = RSPAMD_STATFILE_VERSION; + + + if (!file || !file->map) { + return -1; + } + + if (file->len < sizeof (struct stat_file)) { + msg_info ("file %s is too short to be stat file: %z", + file->filename, + file->len); + return -1; + } + + f = (struct stat_file *)file->map; + c = f->header.magic; + /* Check magic and version */ + if (*c++ != 'r' || *c++ != 's' || *c++ != 'd') { + msg_info ("file %s is invalid stat file", file->filename); + return -1; + } + /* Now check version and convert old version to new one (that can be used for sync */ + if (*c == 1 && *(c + 1) == 0) { + if (!convert_statfile_10 (file)) { + return -1; + } + f = (struct stat_file *)file->map; + } + else if (memcmp (c, valid_version, sizeof (valid_version)) != 0) { + /* Unknown version */ + msg_info ("file %s has invalid version %c.%c", + file->filename, + '0' + *c, + '0' + *(c + 1)); + return -1; + } + + /* Check first section and set new offset */ + file->cur_section.code = f->section.code; + file->cur_section.length = f->section.length; + if (file->cur_section.length * sizeof (struct stat_file_block) > + file->len) { + msg_info ("file %s is truncated: %z, must be %z", + file->filename, + file->len, + file->cur_section.length * sizeof (struct stat_file_block)); + return -1; + } + file->seek_pos = sizeof (struct stat_file) - + sizeof (struct stat_file_block); + + return 0; +} + + +statfile_pool_t * +statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock) +{ + statfile_pool_t *new; + + new = rspamd_mempool_alloc0 (pool, sizeof (statfile_pool_t)); + new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + new->files = + rspamd_mempool_alloc0 (new->pool, STATFILES_MAX * sizeof (stat_file_t)); + new->lock = rspamd_mempool_get_mutex (new->pool); + new->mlock_ok = use_mlock; + + return new; +} + +static stat_file_t * +statfile_pool_reindex (statfile_pool_t * pool, + gchar *filename, + size_t old_size, + size_t size) +{ + gchar *backup; + gint fd; + stat_file_t *new; + u_char *map, *pos; + struct stat_file_block *block; + struct stat_file_header *header; + + if (size < + sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + + sizeof (block)) { + msg_err ("file %s is too small to carry any statistic: %z", + filename, + size); + return NULL; + } + + /* First of all rename old file */ + rspamd_mempool_lock_mutex (pool->lock); + + backup = g_strconcat (filename, ".old", NULL); + if (rename (filename, backup) == -1) { + msg_err ("cannot rename %s to %s: %s", filename, backup, strerror ( + errno)); + g_free (backup); + rspamd_mempool_unlock_mutex (pool->lock); + return NULL; + } + + rspamd_mempool_unlock_mutex (pool->lock); + + /* Now create new file with required size */ + if (statfile_pool_create (pool, filename, size) != 0) { + msg_err ("cannot create new file"); + g_free (backup); + return NULL; + } + /* Now open new file and start copying */ + fd = open (backup, O_RDONLY); + new = statfile_pool_open (pool, filename, size, TRUE); + + if (fd == -1 || new == NULL) { + msg_err ("cannot open file: %s", strerror (errno)); + g_free (backup); + return NULL; + } + + /* Now start reading blocks from old statfile */ + if ((map = + mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("cannot mmap file: %s", strerror (errno)); + close (fd); + g_free (backup); + return NULL; + } + + pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block)); + while (old_size - (pos - map) >= sizeof (struct stat_file_block)) { + block = (struct stat_file_block *)pos; + if (block->hash1 != 0 && block->value != 0) { + statfile_pool_set_block_common (pool, + new, + block->hash1, + block->hash2, + 0, + block->value, + FALSE); + } + pos += sizeof (block); + } + + header = (struct stat_file_header *)map; + statfile_set_revision (new, header->revision, header->rev_time); + + munmap (map, old_size); + close (fd); + unlink (backup); + g_free (backup); + + return new; + +} + +/* + * Pre-load mmaped file into memory + */ +static void +statfile_preload (stat_file_t *file) +{ + guint8 *pos, *end; + volatile guint8 t; + gsize size; + + pos = (guint8 *)file->map; + end = (guint8 *)file->map + file->len; + + if (madvise (pos, end - pos, MADV_SEQUENTIAL) == -1) { + msg_info ("madvise failed: %s", strerror (errno)); + } + else { + /* Load pages of file */ +#ifdef HAVE_GETPAGESIZE + size = getpagesize (); +#else + size = sysconf (_SC_PAGESIZE); +#endif + while (pos < end) { + t = *pos; + (void)t; + pos += size; + } + } +} + +stat_file_t * +statfile_pool_open (statfile_pool_t * pool, + gchar *filename, + size_t size, + gboolean forced) +{ + struct stat st; + stat_file_t *new_file; + + if ((new_file = statfile_pool_is_open (pool, filename)) != NULL) { + return new_file; + } + + if (pool->opened >= STATFILES_MAX - 1) { + msg_err ("reached hard coded limit of statfiles opened: %d", + STATFILES_MAX); + return NULL; + } + + if (stat (filename, &st) == -1) { + msg_info ("cannot stat file %s, error %s, %d", filename, strerror ( + errno), errno); + return NULL; + } + + rspamd_mempool_lock_mutex (pool->lock); + if (!forced && + labs (size - st.st_size) > (long)sizeof (struct stat_file) * 2 + && size > sizeof (struct stat_file)) { + rspamd_mempool_unlock_mutex (pool->lock); + msg_warn ("need to reindex statfile old size: %Hz, new size: %Hz", + (size_t)st.st_size, size); + return statfile_pool_reindex (pool, filename, st.st_size, size); + } + else if (size < sizeof (struct stat_file)) { + msg_err ("requested to shrink statfile to %Hz but it is too small", + size); + } + + new_file = &pool->files[pool->opened++]; + bzero (new_file, sizeof (stat_file_t)); + if ((new_file->fd = open (filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", + filename, + errno, + strerror (errno)); + rspamd_mempool_unlock_mutex (pool->lock); + pool->opened--; + return NULL; + } + + if ((new_file->map = + mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, + new_file->fd, 0)) == MAP_FAILED) { + close (new_file->fd); + rspamd_mempool_unlock_mutex (pool->lock); + msg_info ("cannot mmap file %s, error %d, %s", + filename, + errno, + strerror (errno)); + pool->opened--; + return NULL; + + } + + rspamd_strlcpy (new_file->filename, filename, sizeof (new_file->filename)); + new_file->len = st.st_size; + /* Try to lock pages in RAM */ + if (pool->mlock_ok) { + if (mlock (new_file->map, new_file->len) == -1) { + msg_warn ( + "mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", + strerror (errno)); + pool->mlock_ok = FALSE; + } + } + /* Acquire lock for this operation */ + rspamd_file_lock (new_file->fd, FALSE); + if (statfile_pool_check (new_file) == -1) { + pool->opened--; + rspamd_mempool_unlock_mutex (pool->lock); + rspamd_file_unlock (new_file->fd, FALSE); + munmap (new_file->map, st.st_size); + return NULL; + } + rspamd_file_unlock (new_file->fd, FALSE); + + new_file->open_time = time (NULL); + new_file->access_time = new_file->open_time; + new_file->lock = rspamd_mempool_get_mutex (pool->pool); + + statfile_preload (new_file); + + rspamd_mempool_unlock_mutex (pool->lock); + + return statfile_pool_is_open (pool, filename); +} + +gint +statfile_pool_close (statfile_pool_t * pool, + stat_file_t * file, + gboolean keep_sorted) +{ + stat_file_t *pos; + + if ((pos = statfile_pool_is_open (pool, file->filename)) == NULL) { + msg_info ("file %s is not opened", file->filename); + return -1; + } + + rspamd_mempool_lock_mutex (pool->lock); + + if (file->map) { + msg_info ("syncing statfile %s", file->filename); + msync (file->map, file->len, MS_ASYNC); + munmap (file->map, file->len); + } + if (file->fd != -1) { + close (file->fd); + } + /* Move the remain statfiles */ + memmove (pos, ((guint8 *)pos) + sizeof (stat_file_t), + (--pool->opened - (pos - pool->files)) * sizeof (stat_file_t)); + + rspamd_mempool_unlock_mutex (pool->lock); + + return 0; +} + +gint +statfile_pool_create (statfile_pool_t * pool, gchar *filename, size_t size) +{ + struct stat_file_header header = { + .magic = {'r', 's', 'd'}, + .version = RSPAMD_STATFILE_VERSION, + .padding = {0, 0, 0}, + .revision = 0, + .rev_time = 0, + .used_blocks = 0 + }; + struct stat_file_section section = { + .code = STATFILE_SECTION_COMMON, + }; + struct stat_file_block block = { 0, 0, 0 }; + gint fd; + guint buflen = 0, nblocks; + gchar *buf = NULL; + + if (statfile_pool_is_open (pool, filename) != NULL) { + msg_info ("file %s is already opened", filename); + return 0; + } + + if (size < + sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + + sizeof (block)) { + msg_err ("file %s is too small to carry any statistic: %z", + filename, + size); + return -1; + } + + rspamd_mempool_lock_mutex (pool->lock); + nblocks = + (size - sizeof (struct stat_file_header) - + sizeof (struct stat_file_section)) / sizeof (struct stat_file_block); + header.total_blocks = nblocks; + + if ((fd = + open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", + filename, + errno, + strerror (errno)); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + rspamd_fallocate (fd, + 0, + sizeof (header) + sizeof (section) + sizeof (block) * nblocks); + + header.create_time = (guint64) time (NULL); + if (write (fd, &header, sizeof (header)) == -1) { + msg_info ("cannot write header to file %s, error %d, %s", + filename, + errno, + strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + section.length = (guint64) nblocks; + if (write (fd, §ion, sizeof (section)) == -1) { + msg_info ("cannot write section header to file %s, error %d, %s", + filename, + errno, + strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + /* Buffer for write 256 blocks at once */ + if (nblocks > 256) { + buflen = sizeof (block) * 256; + buf = g_malloc0 (buflen); + } + + while (nblocks) { + if (nblocks > 256) { + /* Just write buffer */ + if (write (fd, buf, buflen) == -1) { + msg_info ("cannot write blocks buffer to file %s, error %d, %s", + filename, + errno, + strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + g_free (buf); + return -1; + } + nblocks -= 256; + } + else { + if (write (fd, &block, sizeof (block)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", + filename, + errno, + strerror (errno)); + close (fd); + if (buf) { + g_free (buf); + } + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + nblocks--; + } + } + + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + + if (buf) { + g_free (buf); + } + + return 0; +} + +void +statfile_pool_delete (statfile_pool_t * pool) +{ + gint i; + + for (i = 0; i < pool->opened; i++) { + statfile_pool_close (pool, &pool->files[i], FALSE); + } + rspamd_mempool_delete (pool->pool); +} + +void +statfile_pool_lock_file (statfile_pool_t * pool, stat_file_t * file) +{ + + rspamd_mempool_lock_mutex (file->lock); +} + +void +statfile_pool_unlock_file (statfile_pool_t * pool, stat_file_t * file) +{ + + rspamd_mempool_unlock_mutex (file->lock); +} + +double +statfile_pool_get_block (statfile_pool_t * pool, + stat_file_t * file, + guint32 h1, + guint32 h2, + time_t now) +{ + struct stat_file_block *block; + guint i, blocknum; + u_char *c; + + + file->access_time = now; + if (!file->map) { + return 0; + } + + blocknum = h1 % file->cur_section.length; + c = (u_char *) file->map + file->seek_pos + blocknum * + sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + + for (i = 0; i < CHAIN_LENGTH; i++) { + if (i + blocknum >= file->cur_section.length) { + break; + } + if (block->hash1 == h1 && block->hash2 == h2) { + return block->value; + } + c += sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + + return 0; +} + +static void +statfile_pool_set_block_common (statfile_pool_t * pool, + stat_file_t * file, + guint32 h1, + guint32 h2, + time_t t, + double value, + gboolean from_now) +{ + struct stat_file_block *block, *to_expire = NULL; + struct stat_file_header *header; + guint i, blocknum; + u_char *c; + double min = G_MAXDOUBLE; + + if (from_now) { + file->access_time = t; + } + if (!file->map) { + return; + } + + blocknum = h1 % file->cur_section.length; + header = (struct stat_file_header *)file->map; + c = (u_char *) file->map + file->seek_pos + blocknum * + sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + + for (i = 0; i < CHAIN_LENGTH; i++) { + if (i + blocknum >= file->cur_section.length) { + /* Need to expire some block in chain */ + msg_info ("chain %ud is full in statfile %s, starting expire", + blocknum, + file->filename); + break; + } + /* First try to find block in chain */ + if (block->hash1 == h1 && block->hash2 == h2) { + block->value = value; + return; + } + /* Check whether we have a free block in chain */ + if (block->hash1 == 0 && block->hash2 == 0) { + /* Write new block here */ + msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", + i, + blocknum, + h1, + h2); + block->hash1 = h1; + block->hash2 = h2; + block->value = value; + header->used_blocks++; + + return; + } + + /* Expire block with minimum value otherwise */ + if (block->value < min) { + to_expire = block; + min = block->value; + } + c += sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + /* Try expire some block */ + if (to_expire) { + block = to_expire; + } + else { + /* Expire first block in chain */ + c = (u_char *) file->map + file->seek_pos + blocknum * + sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + block->hash1 = h1; + block->hash2 = h2; + block->value = value; +} + +void +statfile_pool_set_block (statfile_pool_t * pool, + stat_file_t * file, + guint32 h1, + guint32 h2, + time_t now, + double value) +{ + statfile_pool_set_block_common (pool, file, h1, h2, now, value, TRUE); +} + +stat_file_t * +statfile_pool_is_open (statfile_pool_t * pool, gchar *filename) +{ + static stat_file_t f, *ret; + rspamd_strlcpy (f.filename, filename, sizeof (f.filename)); + ret = lfind (&f, + pool->files, + (size_t *)&pool->opened, + sizeof (stat_file_t), + cmpstatfile); + return ret; +} + +guint32 +statfile_pool_get_section (statfile_pool_t * pool, stat_file_t * file) +{ + + return file->cur_section.code; +} + +gboolean +statfile_pool_set_section (statfile_pool_t * pool, + stat_file_t * file, + guint32 code, + gboolean from_begin) +{ + struct stat_file_section *sec; + off_t cur_offset; + + + /* Try to find section */ + if (from_begin) { + cur_offset = sizeof (struct stat_file_header); + } + else { + cur_offset = file->seek_pos - sizeof (struct stat_file_section); + } + while (cur_offset < (off_t)file->len) { + sec = (struct stat_file_section *)((gchar *)file->map + cur_offset); + if (sec->code == code) { + file->cur_section.code = code; + file->cur_section.length = sec->length; + file->seek_pos = cur_offset + sizeof (struct stat_file_section); + return TRUE; + } + cur_offset += sec->length; + } + + return FALSE; +} + +gboolean +statfile_pool_add_section (statfile_pool_t * pool, + stat_file_t * file, + guint32 code, + guint64 length) +{ + struct stat_file_section sect; + struct stat_file_block block = { 0, 0, 0 }; + + if (lseek (file->fd, 0, SEEK_END) == -1) { + msg_info ("cannot lseek file %s, error %d, %s", + file->filename, + errno, + strerror (errno)); + return FALSE; + } + + sect.code = code; + sect.length = length; + + if (write (file->fd, §, sizeof (sect)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", + file->filename, + errno, + strerror (errno)); + return FALSE; + } + + while (length--) { + if (write (file->fd, &block, sizeof (block)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", + file->filename, + errno, + strerror (errno)); + return FALSE; + } + } + + /* Lock statfile to remap memory */ + statfile_pool_lock_file (pool, file); + munmap (file->map, file->len); + fsync (file->fd); + file->len += length; + + if ((file->map = + mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, + 0)) == NULL) { + msg_info ("cannot mmap file %s, error %d, %s", + file->filename, + errno, + strerror (errno)); + return FALSE; + } + statfile_pool_unlock_file (pool, file); + + return TRUE; + +} + +guint32 +statfile_get_section_by_name (const gchar *name) +{ + if (g_ascii_strcasecmp (name, "common") == 0) { + return STATFILE_SECTION_COMMON; + } + else if (g_ascii_strcasecmp (name, "header") == 0) { + return STATFILE_SECTION_HEADERS; + } + else if (g_ascii_strcasecmp (name, "url") == 0) { + return STATFILE_SECTION_URLS; + } + else if (g_ascii_strcasecmp (name, "regexp") == 0) { + return STATFILE_SECTION_REGEXP; + } + + return 0; +} + +gboolean +statfile_set_revision (stat_file_t *file, guint64 rev, time_t time) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + header->revision = rev; + header->rev_time = time; + + return TRUE; +} + +gboolean +statfile_inc_revision (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + header->revision++; + + return TRUE; +} + +gboolean +statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + if (rev != NULL) { + *rev = header->revision; + } + if (time != NULL) { + *time = header->rev_time; + } + + return TRUE; +} + +guint64 +statfile_get_used_blocks (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return (guint64) - 1; + } + + header = (struct stat_file_header *)file->map; + + return header->used_blocks; +} + +guint64 +statfile_get_total_blocks (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return (guint64) - 1; + } + + header = (struct stat_file_header *)file->map; + + /* If total blocks is 0 we have old version of header, so set total blocks correctly */ + if (header->total_blocks == 0) { + header->total_blocks = file->cur_section.length; + } + + return header->total_blocks; +} + +static void +statfile_pool_invalidate_callback (gint fd, short what, void *ud) +{ + statfile_pool_t *pool = ud; + stat_file_t *file; + gint i; + + msg_info ("invalidating %d statfiles", pool->opened); + + for (i = 0; i < pool->opened; i++) { + file = &pool->files[i]; + msync (file->map, file->len, MS_ASYNC); + } + +} + + +void +statfile_pool_plan_invalidate (statfile_pool_t *pool, + time_t seconds, + time_t jitter) +{ + gboolean pending; + + + if (pool->invalidate_event != NULL) { + pending = evtimer_pending (pool->invalidate_event, NULL); + if (pending) { + /* Replan event */ + pool->invalidate_tv.tv_sec = seconds + + g_random_int_range (0, jitter); + pool->invalidate_tv.tv_usec = 0; + evtimer_add (pool->invalidate_event, &pool->invalidate_tv); + } + } + else { + pool->invalidate_event = + rspamd_mempool_alloc (pool->pool, sizeof (struct event)); + pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); + pool->invalidate_tv.tv_usec = 0; + evtimer_set (pool->invalidate_event, + statfile_pool_invalidate_callback, + pool); + evtimer_add (pool->invalidate_event, &pool->invalidate_tv); + msg_info ("invalidate of statfile pool is planned in %d seconds", + (gint)pool->invalidate_tv.tv_sec); + } +} + + +stat_file_t * +get_statfile_by_symbol (statfile_pool_t *pool, + struct rspamd_classifier_config *ccf, + const gchar *symbol, + struct rspamd_statfile_config **st, + gboolean try_create) +{ + stat_file_t *res = NULL; + GList *cur; + + if (pool == NULL || ccf == NULL || symbol == NULL) { + msg_err ("invalid input arguments"); + return NULL; + } + + cur = g_list_first (ccf->statfiles); + while (cur) { + *st = cur->data; + if (strcmp (symbol, (*st)->symbol) == 0) { + break; + } + *st = NULL; + cur = g_list_next (cur); + } + if (*st == NULL) { + msg_info ("cannot find statfile with symbol %s", symbol); + return NULL; + } + + if ((res = statfile_pool_is_open (pool, (*st)->path)) == NULL) { + if ((res = + statfile_pool_open (pool, (*st)->path, (*st)->size, + FALSE)) == NULL) { + msg_warn ("cannot open %s", (*st)->path); + if (try_create) { + if (statfile_pool_create (pool, (*st)->path, + (*st)->size) == -1) { + msg_err ("cannot create statfile %s", (*st)->path); + return NULL; + } + res = + statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE); + if (res == NULL) { + msg_err ("cannot open statfile %s after creation", + (*st)->path); + } + } + } + } + + return res; +} + +void +statfile_pool_lockall (statfile_pool_t *pool) +{ + stat_file_t *file; + gint i; + + if (pool->mlock_ok) { + for (i = 0; i < pool->opened; i++) { + file = &pool->files[i]; + if (mlock (file->map, file->len) == -1) { + msg_warn ( + "mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", + strerror (errno)); + pool->mlock_ok = FALSE; + return; + } + } + } + /* Do not try to lock if mlock failed */ +} + diff --git a/src/libstat/backends/mmaped_file.h b/src/libstat/backends/mmaped_file.h new file mode 100644 index 000000000..f7f632703 --- /dev/null +++ b/src/libstat/backends/mmaped_file.h @@ -0,0 +1,310 @@ +/** + * @file statfile.h + * Describes common methods for accessing statistics files and caching them in memory + */ + +#ifndef RSPAMD_STATFILE_H +#define RSPAMD_STATFILE_H + +#include "config.h" +#include "mem_pool.h" +#include "hash.h" + +#define CHAIN_LENGTH 128 + +/* Section types */ +#define STATFILE_SECTION_COMMON 1 +#define STATFILE_SECTION_HEADERS 2 +#define STATFILE_SECTION_URLS 3 +#define STATFILE_SECTION_REGEXP 4 + +#define DEFAULT_STATFILE_INVALIDATE_TIME 30 +#define DEFAULT_STATFILE_INVALIDATE_JITTER 30 + +/** + * Common statfile header + */ +struct stat_file_header { + u_char magic[3]; /**< magic signature ('r' 's' 'd') */ + u_char version[2]; /**< version of statfile */ + u_char padding[3]; /**< padding */ + guint64 create_time; /**< create time (time_t->guint64) */ + guint64 revision; /**< revision number */ + guint64 rev_time; /**< revision time */ + guint64 used_blocks; /**< used blocks number */ + guint64 total_blocks; /**< total number of blocks */ + u_char unused[239]; /**< some bytes that can be used in future */ +}; + +/** + * Section header + */ +struct stat_file_section { + guint64 code; /**< section's code */ + guint64 length; /**< section's length in blocks */ +}; + +/** + * Block of data in statfile + */ +struct stat_file_block { + guint32 hash1; /**< hash1 (also acts as index) */ + guint32 hash2; /**< hash2 */ + double value; /**< double value */ +}; + +/** + * Statistic file + */ +struct stat_file { + struct stat_file_header header; /**< header */ + struct stat_file_section section; /**< first section */ + struct stat_file_block blocks[1]; /**< first block of data */ +}; + +/** + * Common view of statfile object + */ +typedef struct stat_file_s { +#ifdef HAVE_PATH_MAX + gchar filename[PATH_MAX]; /**< name of file */ +#else + gchar filename[MAXPATHLEN]; /**< name of file */ +#endif + gint fd; /**< descriptor */ + void *map; /**< mmaped area */ + off_t seek_pos; /**< current seek position */ + struct stat_file_section cur_section; /**< current section */ + time_t open_time; /**< time when file was opened */ + time_t access_time; /**< last access time */ + size_t len; /**< length of file(in bytes) */ + rspamd_mempool_mutex_t *lock; /**< mutex */ +} stat_file_t; + +/** + * Statfiles pool + */ +typedef struct statfile_pool_s { + stat_file_t *files; /**< hash table of opened files indexed by name */ + void **maps; /**< shared hash table of mmaped areas indexed by name */ + gint opened; /**< number of opened files */ + rspamd_mempool_t *pool; /**< memory pool object */ + rspamd_mempool_mutex_t *lock; /**< mutex */ + struct event *invalidate_event; /**< event for pool invalidation */ + struct timeval invalidate_tv; + gboolean mlock_ok; /**< whether it is possible to use mlock (2) to avoid statfiles unloading */ +} statfile_pool_t; + +/* Forwarded declarations */ +struct rspamd_classifier_config; +struct rspamd_statfile_config; + +/** + * Create new statfile pool + * @param max_size maximum size + * @return statfile pool object + */ +statfile_pool_t * statfile_pool_new (rspamd_mempool_t *pool, + gboolean use_mlock); + +/** + * Open statfile and attach it to pool + * @param pool statfile pool object + * @param filename name of statfile to open + * @return 0 if specified statfile is attached and -1 in case of error + */ +stat_file_t * statfile_pool_open (statfile_pool_t *pool, + gchar *filename, + size_t len, + gboolean forced); + +/** + * Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching + * @param pool statfile pool object + * @param filename name of statfile to create + * @param len length of new statfile + * @return 0 if file was created and -1 in case of error + */ +gint statfile_pool_create (statfile_pool_t *pool, gchar *filename, size_t len); + +/** + * Close specified statfile + * @param pool statfile pool object + * @param filename name of statfile to close + * @param remove_hash remove filename from opened files hash also + * @return 0 if file was closed and -1 if statfile was not opened + */ +gint statfile_pool_close (statfile_pool_t *pool, + stat_file_t *file, + gboolean keep_sorted); + +/** + * Delete statfile pool and close all attached statfiles + * @param pool statfile pool object + */ +void statfile_pool_delete (statfile_pool_t *pool); + +/** + * Try to lock all statfiles in memory + * @param pool statfile pool object + */ +void statfile_pool_lockall (statfile_pool_t *pool); + +/** + * Lock specified file for exclusive use (eg. learning) + * @param pool statfile pool object + * @param filename name of statfile + */ +void statfile_pool_lock_file (statfile_pool_t *pool, stat_file_t *file); + +/** + * Unlock specified file + * @param pool statfile pool object + * @param filename name of statfile + */ +void statfile_pool_unlock_file (statfile_pool_t *pool, stat_file_t *file); + +/** + * Get block from statfile with h1 and h2 values, use time argument for current time + * @param pool statfile pool object + * @param filename name of statfile + * @param h1 h1 in file + * @param h2 h2 in file + * @param now current time + * @return block value or 0 if block is not found + */ +double statfile_pool_get_block (statfile_pool_t *pool, + stat_file_t *file, + guint32 h1, + guint32 h2, + time_t now); + +/** + * Set specified block in statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param h1 h1 in file + * @param h2 h2 in file + * @param now current time + * @param value value of block + */ +void statfile_pool_set_block (statfile_pool_t *pool, + stat_file_t *file, + guint32 h1, + guint32 h2, + time_t now, + double value); + +/** + * Check whether statfile is opened + * @param pool statfile pool object + * @param filename name of statfile + * @return TRUE if specified statfile is opened and FALSE otherwise + */ +stat_file_t * statfile_pool_is_open (statfile_pool_t *pool, gchar *filename); + +/** + * Returns current statfile section + * @param pool statfile pool object + * @param filename name of statfile + * @return code of section or 0 if file is not opened + */ +guint32 statfile_pool_get_section (statfile_pool_t *pool, stat_file_t *file); + +/** + * Go to other section of statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param code code of section to seek to + * @param from_begin search for section from begin of file if true + * @return TRUE if section was set and FALSE otherwise + */ +gboolean statfile_pool_set_section (statfile_pool_t *pool, + stat_file_t *file, + guint32 code, + gboolean from_begin); + +/** + * Add new section to statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param code code of section to seek to + * @param length length in blocks of new section + * @return TRUE if section was successfully added and FALSE in case of error + */ +gboolean statfile_pool_add_section (statfile_pool_t *pool, + stat_file_t *file, + guint32 code, + guint64 length); + + +/** + * Return code of section identified by name + * @param name name of section + * @return code of section or 0 if name of section is unknown + */ +guint32 statfile_get_section_by_name (const gchar *name); + +/** + * Set statfile revision and revision time + * @param filename name of statfile + * @param revision number of revision + * @param time time of revision + * @return TRUE if revision was set + */ +gboolean statfile_set_revision (stat_file_t *file, guint64 rev, time_t time); + +/** + * Increment statfile revision and revision time + * @param filename name of statfile + * @param time time of revision + * @return TRUE if revision was set + */ +gboolean statfile_inc_revision (stat_file_t *file); + +/** + * Set statfile revision and revision time + * @param filename name of statfile + * @param revision saved number of revision + * @param time saved time of revision + * @return TRUE if revision was saved in rev and time + */ +gboolean statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time); + +/** + * Get statfile used blocks + * @param file file to get number of used blocks + * @return number of used blocks or (guint64)-1 in case of error + */ +guint64 statfile_get_used_blocks (stat_file_t *file); + +/** + * Get statfile total blocks + * @param file file to get number of used blocks + * @return number of used blocks or (guint64)-1 in case of error + */ +guint64 statfile_get_total_blocks (stat_file_t *file); + + +/** + * Plan statfile pool invalidation + */ +void statfile_pool_plan_invalidate (statfile_pool_t *pool, + time_t seconds, + time_t jitter); + +/** + * Get a statfile by symbol + * @param pool pool object + * @param ccf ccf classifier config + * @param symbol symbol to search + * @param st statfile to get + * @param try_create whether we need to create statfile if it is absent + */ +stat_file_t * get_statfile_by_symbol (statfile_pool_t *pool, + struct rspamd_classifier_config *ccf, + const gchar *symbol, + struct rspamd_statfile_config **st, + gboolean try_create); + +#endif diff --git a/src/libstat/classifiers.h b/src/libstat/classifiers.h index d13178486..2c2f33449 100644 --- a/src/libstat/classifiers.h +++ b/src/libstat/classifiers.h @@ -45,7 +45,7 @@ struct classifier { }; /* Get classifier structure by name or return NULL if this name is not found */ -struct classifier * get_classifier (const char *name); +struct classifier * rspamd_stat_get_classifier (const char *name); /* Bayes algorithm */ struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, diff --git a/src/libstat/classifiers/classifiers.c b/src/libstat/classifiers/classifiers.c index 6af7d2dc8..a3efb53c1 100644 --- a/src/libstat/classifiers/classifiers.c +++ b/src/libstat/classifiers/classifiers.c @@ -40,7 +40,7 @@ struct classifier classifiers[] = { }; struct classifier * -get_classifier (const char *name) +rspamd_stat_get_classifier (const char *name) { guint i; diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c new file mode 100644 index 000000000..fd2c0f165 --- /dev/null +++ b/src/libstat/stat_config.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "stat_api.h" +#include "main.h" +#include "cfg_rcl.h" + diff --git a/src/libstat/tokenizers.h b/src/libstat/tokenizers.h index ed47e0add..c0d2e8934 100644 --- a/src/libstat/tokenizers.h +++ b/src/libstat/tokenizers.h @@ -33,7 +33,7 @@ struct tokenizer { int token_node_compare_func (gconstpointer a, gconstpointer b); /* Get tokenizer structure by name or return NULL if this name is not found */ -struct tokenizer * get_tokenizer (const char *name); +struct tokenizer * rspamd_stat_get_tokenizer (const char *name); /* Get next word from specified f_str_t buf */ gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 3e6c745ec..ce221397d 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -26,7 +26,6 @@ * Common tokenization functions */ -#include <sys/types.h> #include "main.h" #include "tokenizers.h" @@ -77,7 +76,7 @@ const gchar t_delimiters[255] = { }; struct tokenizer * -get_tokenizer (const char *name) +rspamd_stat_get_tokenizer (const char *name) { guint i; @@ -230,7 +229,7 @@ tokenize_subject (struct rspamd_task *task, GTree ** tree) (rspamd_mempool_destruct_t) g_tree_destroy, *tree); } - osb_tokenizer = get_tokenizer ("osb-text"); + osb_tokenizer = rspamd_stat_get_tokenizer ("osb-text"); /* Try to use pre-defined subject */ if (task->subject != NULL) { |