From 115313228194cacf60a3e09c9b8117078ab37350 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 16 Oct 2009 20:46:49 +0400 Subject: [PATCH] * Add ability to change statfile size limit in config and allow reindexing of statfiles --- src/classifiers/winnow.c | 8 +- src/filter.c | 4 +- src/statfile.c | 166 ++++++++++++++++++++++++++++++++---- src/statfile.h | 2 +- test/rspamd_statfile_test.c | 2 +- 5 files changed, 156 insertions(+), 26 deletions(-) diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index 9404644ac..acf12a462 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -120,7 +120,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp while (cur) { st = cur->data; if ((data.file = statfile_pool_is_open (pool, st->path)) == NULL) { - if ((data.file = statfile_pool_open (pool, st->path)) == NULL) { + if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { msg_warn ("winnow_classify: cannot open %s, skip it", st->path); cur = g_list_next (cur); continue; @@ -174,13 +174,13 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t * pool, char *symbol, while (cur) { st = cur->data; if (strcmp (symbol, st->symbol) == 0) { - if ((data.file = statfile_pool_open (pool, st->path)) == NULL) { + if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { /* Try to create statfile */ - if (statfile_pool_create (pool, st->path, st->size / sizeof (struct stat_file_block)) == -1) { + if (statfile_pool_create (pool, st->path, st->size) == -1) { msg_err ("winnow_learn: cannot create statfile %s", st->path); return; } - if ((data.file = statfile_pool_open (pool, st->path)) == NULL) { + if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { msg_err ("winnow_learn: cannot create statfile %s", st->path); return; } diff --git a/src/filter.c b/src/filter.c index 4269e29d5..fefdd968c 100644 --- a/src/filter.c +++ b/src/filter.c @@ -472,9 +472,9 @@ process_autolearn (struct statfile *st, struct worker_task *task, GTree * tokens /* Check opened */ if (!statfile_pool_is_open (task->worker->srv->statfile_pool, filename)) { /* Try open */ - if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == NULL) { + if (statfile_pool_open (task->worker->srv->statfile_pool, filename, st->size, FALSE) == NULL) { /* Try create */ - if (statfile_pool_create (task->worker->srv->statfile_pool, filename, st->size / sizeof (struct stat_file_block)) == -1) { + if (statfile_pool_create (task->worker->srv->statfile_pool, filename, st->size) == -1) { msg_info ("process_autolearn: error while creating statfile %s", filename); return; } diff --git a/src/statfile.c b/src/statfile.c index 697f54e4c..e44af6c2b 100644 --- a/src/statfile.c +++ b/src/statfile.c @@ -29,6 +29,11 @@ /* Maximum number of statistics files */ #define STATFILES_MAX 255 +static void statfile_pool_set_block_common ( + statfile_pool_t * pool, stat_file_t * file, + uint32_t h1, uint32_t h2, + time_t t, float value, + gboolean from_now); static int cmpstatfile (const void *a, const void *b) @@ -128,8 +133,72 @@ statfile_pool_new (size_t max_size) return new; } +static stat_file_t * +statfile_pool_reindex (statfile_pool_t * pool, char *filename, size_t old_size, size_t size) +{ + char *backup; + int fd; + stat_file_t *new; + u_char *map, *pos; + struct stat_file_block *block; + + /* First of all rename old file */ + memory_pool_lock_mutex (pool->lock); + + backup = g_strconcat (filename, ".old", NULL); + if (rename (filename, backup) == -1) { + msg_err ("statfile_pool_reindex: cannot rename %s to %s: %s", filename, backup, strerror (errno)); + g_free (backup); + memory_pool_unlock_mutex (pool->lock); + return NULL; + } + + memory_pool_unlock_mutex (pool->lock); + + /* Now create new file with required size */ + if (statfile_pool_create (pool, filename, size) != 0) { + msg_err ("statfile_pool_reindex: cannot create new file"); + g_free (backup); + return NULL; + } + /* Now open new file and start copying */ + fd = open (backup, O_RDONLY); + new = statfile_pool_open (pool, filename, size, TRUE); + + if (fd == -1 || new == NULL) { + msg_err ("statfile_pool_reindex: cannot open file: %s", strerror (errno)); + g_free (backup); + return NULL; + } + + /* Now start reading blocks from old statfile */ + if ((map = mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("statfile_pool_reindex: cannot mmap file: %s", strerror (errno)); + close (fd); + g_free (backup); + return NULL; + } + + pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block)); + while (pos - map < old_size) { + block = (struct stat_file_block *)pos; + if (block->hash1 != 0 && block->value != 0) { + statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, block->last_access, block->value, FALSE); + } + pos += sizeof (block); + } + + munmap (map, old_size); + close (fd); + unlink (backup); + g_free (backup); + + return new; + +} + stat_file_t * -statfile_pool_open (statfile_pool_t * pool, char *filename) +statfile_pool_open (statfile_pool_t * pool, char *filename, size_t size, gboolean forced) { struct stat st; stat_file_t *new_file; @@ -148,12 +217,20 @@ statfile_pool_open (statfile_pool_t * pool, char *filename) return NULL; } - if (st.st_size > pool->max) { + if (!forced && st.st_size > pool->max) { msg_info ("statfile_pool_open: cannot attach file to pool, too large: %zd", (size_t) st.st_size); return NULL; } - while (pool->max + pool->opened * sizeof (struct stat_file) < pool->occupied + st.st_size) { + memory_pool_lock_mutex (pool->lock); + if (!forced && abs (st.st_size - size) > sizeof (struct stat_file_block)) { + memory_pool_unlock_mutex (pool->lock); + msg_warn ("statfile_pool_open: need to reindex statfile old size: %zd, new size: %zd", st.st_size, size); + return statfile_pool_reindex (pool, filename, st.st_size, size); + } + memory_pool_unlock_mutex (pool->lock); + + while (!forced && (pool->max + pool->opened * sizeof (struct stat_file) * 2 < pool->occupied + st.st_size)) { if (statfile_pool_expire (pool) == -1) { /* Failed to find any more free space in pool */ msg_info ("statfile_pool_open: expiration for pool failed, opening file %s failed", filename); @@ -235,7 +312,7 @@ statfile_pool_close (statfile_pool_t * pool, stat_file_t * file, gboolean keep_s } int -statfile_pool_create (statfile_pool_t * pool, char *filename, size_t blocks) +statfile_pool_create (statfile_pool_t * pool, char *filename, size_t size) { struct stat_file_header header = { .magic = {'r', 's', 'd'}, @@ -247,6 +324,8 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t blocks) }; struct stat_file_block block = { 0, 0, 0, 0 }; int fd; + unsigned int buflen, nblocks; + char *buf = NULL; if (statfile_pool_is_open (pool, filename) != NULL) { msg_info ("statfile_pool_open: file %s is already opened", filename); @@ -254,6 +333,7 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t blocks) } memory_pool_lock_mutex (pool->lock); + nblocks = (size - sizeof (struct stat_file_header) - sizeof (struct stat_file_section)) / sizeof (struct stat_file_block); if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { msg_info ("statfile_pool_create: cannot create file %s, error %d, %s", filename, errno, strerror (errno)); @@ -269,26 +349,53 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t blocks) return -1; } - section.length = (uint64_t) blocks; + section.length = (uint64_t) nblocks; if (write (fd, §ion, sizeof (section)) == -1) { msg_info ("statfile_pool_create: cannot write section header to file %s, error %d, %s", filename, errno, strerror (errno)); close (fd); memory_pool_unlock_mutex (pool->lock); return -1; } - - while (blocks--) { - if (write (fd, &block, sizeof (block)) == -1) { - msg_info ("statfile_pool_create: cannot write block to file %s, error %d, %s", filename, errno, strerror (errno)); - close (fd); - memory_pool_unlock_mutex (pool->lock); - return -1; + + /* Buffer for write 256 blocks at once */ + if (nblocks > 256) { + buflen = MIN (nblocks / 256 * sizeof (block), sizeof (block) * 256); + buf = g_malloc0 (buflen); + } + + while (nblocks) { + if (nblocks > 256) { + /* Just write buffer */ + if (write (fd, buf, buflen) == -1) { + msg_info ("statfile_pool_create: cannot write blocks buffer to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + memory_pool_unlock_mutex (pool->lock); + g_free (buf); + return -1; + } + nblocks -= 256; + } + else { + if (write (fd, &block, sizeof (block)) == -1) { + msg_info ("statfile_pool_create: cannot write block to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + if (buf) { + g_free (buf); + } + memory_pool_unlock_mutex (pool->lock); + return -1; + } + nblocks --; } } close (fd); memory_pool_unlock_mutex (pool->lock); + if (buf) { + g_free (buf); + } + return 0; } @@ -353,8 +460,8 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1 return 0; } -void -statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t now, float value) +static void +statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t t, float value, gboolean from_now) { struct stat_file_block *block, *to_expire = NULL; struct stat_file_header *header; @@ -362,7 +469,9 @@ statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1 u_char *c; - file->access_time = now; + if (from_now) { + file->access_time = t; + } if (!file->map) { return; } @@ -380,7 +489,12 @@ statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1 } /* First try to find block in chain */ if (block->hash1 == h1 && block->hash2 == h2) { - block->last_access = now - (time_t) header->create_time; + if (from_now) { + block->last_access = t - (time_t) header->create_time; + } + else { + block->last_access = t; + } block->value = value; return; } @@ -391,7 +505,12 @@ statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1 block->hash1 = h1; block->hash2 = h2; block->value = value; - block->last_access = now - (time_t) header->create_time; + if (from_now) { + block->last_access = t - (time_t) header->create_time; + } + else { + block->last_access = t; + } return; } if (block->last_access > oldest) { @@ -410,12 +529,23 @@ statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1 c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); block = (struct stat_file_block *)c; } - block->last_access = now - (time_t) header->create_time; + if (from_now) { + block->last_access = t - (time_t) header->create_time; + } + else { + block->last_access = t; + } block->hash1 = h1; block->hash2 = h2; block->value = value; } +void +statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t now, float value) +{ + statfile_pool_set_block_common (pool, file, h1, h2, now, value, TRUE); +} + stat_file_t * statfile_pool_is_open (statfile_pool_t * pool, char *filename) { diff --git a/src/statfile.h b/src/statfile.h index 4417f4af7..76efe264f 100644 --- a/src/statfile.h +++ b/src/statfile.h @@ -100,7 +100,7 @@ statfile_pool_t* statfile_pool_new (size_t max_size); * @param filename name of statfile to open * @return 0 if specified statfile is attached and -1 in case of error */ -stat_file_t* statfile_pool_open (statfile_pool_t *pool, char *filename); +stat_file_t* statfile_pool_open (statfile_pool_t *pool, char *filename, size_t len, gboolean forced); /** * Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching diff --git a/test/rspamd_statfile_test.c b/test/rspamd_statfile_test.c index 282d4dc1c..9618874ba 100644 --- a/test/rspamd_statfile_test.c +++ b/test/rspamd_statfile_test.c @@ -41,7 +41,7 @@ rspamd_statfile_test_func () /* Create new file */ g_assert (statfile_pool_create (pool, TEST_FILENAME, 65535) != -1); - g_assert ((st = statfile_pool_open (pool, TEST_FILENAME)) != NULL); + g_assert ((st = statfile_pool_open (pool, TEST_FILENAME, 65535, FALSE)) != NULL); /* Get and set random blocks */ statfile_pool_lock_file (pool, st); -- 2.39.5