From 9f300615e8fca8076266de1a220c74a226d09979 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 14 Dec 2009 19:03:43 +0300 Subject: [PATCH] * Fix symbols cache (init lua filters before symbols cache initialization) * Remove LRU expiration logic from statfiles and replace it with random/lowerest value expiration logic: expire random block or block with lowerest value ! statfiles are incompatible again --- src/classifiers/winnow.c | 24 ++++++++++++++++++++--- src/main.c | 20 +++++++++---------- src/statfile.c | 42 ++++++++++++++++------------------------ src/statfile.h | 1 - src/util.c | 4 ++-- 5 files changed, 50 insertions(+), 41 deletions(-) diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index 7e1144ae7..4b1bd5549 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -54,7 +54,12 @@ classify_callback (gpointer key, gpointer value, gpointer data) /* Consider that not found blocks have value 1 */ v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now); if (fabs (v) > 0.00001) { - cd->sum += v; + if (cd->sum + v > G_MAXDOUBLE / 2.) { + cd->sum = G_MAXDOUBLE / 2.; + } + else { + cd->sum += v; + } cd->in_class++; } @@ -80,10 +85,23 @@ learn_callback (gpointer key, gpointer value, gpointer data) } else { statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, v * c); - node->value = v * c; + /* Set some limit on growing */ + if (v > G_MAXDOUBLE / 2.) { + node->value = v; + } + else { + node->value = v * c; + } } - cd->sum += node->value; + + if (cd->sum + node->value > G_MAXDOUBLE / 2.) { + cd->sum = G_MAXDOUBLE / 2.; + } + else { + cd->sum += node->value; + } + cd->count++; return FALSE; diff --git a/src/main.c b/src/main.c index 5512e30f1..49ef09b59 100644 --- a/src/main.c +++ b/src/main.c @@ -756,16 +756,6 @@ main (int argc, char **argv, char **env) l = g_list_next (l); } - /* Init symbols cache for each metric */ - l = g_list_first (cfg->metrics_list); - while (l) { - metric = l->data; - if (metric->cache && !init_symbols_cache (cfg->cfg_pool, metric->cache, metric->cache_filename)) { - exit (EXIT_FAILURE); - } - l = g_list_next (l); - } - #ifndef WITHOUT_PERL /* Init perl interpreter */ dTHXa (perl_interpreter); @@ -784,6 +774,16 @@ main (int argc, char **argv, char **env) init_lua_filters (cfg); #endif + /* Init symbols cache for each metric */ + l = g_list_first (cfg->metrics_list); + while (l) { + metric = l->data; + if (metric->cache && !init_symbols_cache (cfg->cfg_pool, metric->cache, metric->cache_filename)) { + exit (EXIT_FAILURE); + } + l = g_list_next (l); + } + rspamd->workers = g_hash_table_new (g_direct_hash, g_direct_equal); spawn_workers (rspamd, TRUE); diff --git a/src/statfile.c b/src/statfile.c index a58730144..92a29f46d 100644 --- a/src/statfile.c +++ b/src/statfile.c @@ -154,7 +154,7 @@ statfile_pool_check (stat_file_t * file) file->cur_section.code = f->section.code; file->cur_section.length = f->section.length; if (file->cur_section.length * sizeof (struct stat_file_block) > file->len) { - msg_info ("statfile_pool_check: file %s is truncated: %zd, must be %zd", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block)); + msg_info ("statfile_pool_check: file %s is truncated: %z, must be %z", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block)); return -1; } file->seek_pos = sizeof (struct stat_file) - sizeof (struct stat_file_block); @@ -265,7 +265,7 @@ statfile_pool_reindex (statfile_pool_t * pool, char *filename, size_t old_size, while (pos - map < old_size) { block = (struct stat_file_block *)pos; if (block->hash1 != 0 && block->value != 0) { - statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, block->last_access, block->value, FALSE); + statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, 0, block->value, FALSE); } pos += sizeof (block); } @@ -347,6 +347,7 @@ statfile_pool_open (statfile_pool_t * pool, char *filename, size_t size, gboolea pool->opened--; memory_pool_unlock_mutex (pool->lock); unlock_file (new_file->fd, FALSE); + munmap (new_file->map, st.st_size); return NULL; } unlock_file (new_file->fd, FALSE); @@ -411,7 +412,7 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t size) struct stat_file_section section = { .code = STATFILE_SECTION_COMMON, }; - struct stat_file_block block = { 0, 0, 0, 0 }; + struct stat_file_block block = { 0, 0, 0 }; int fd; unsigned int buflen, nblocks; char *buf = NULL; @@ -539,7 +540,6 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1 break; } if (block->hash1 == h1 && block->hash2 == h2) { - block->last_access = now - (time_t) header->create_time; return block->value; } c += sizeof (struct stat_file_block); @@ -550,13 +550,15 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1 return 0; } +#define RANDOM_EXPIRE G_MAXINT / CHAIN_LENGTH static void statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t t, double value, gboolean from_now) { struct stat_file_block *block, *to_expire = NULL; struct stat_file_header *header; - unsigned int i, blocknum, oldest = 0; + unsigned int i, blocknum; u_char *c; + double min = G_MAXDOUBLE; if (from_now) { @@ -579,12 +581,6 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint } /* First try to find block in chain */ if (block->hash1 == h1 && block->hash2 == h2) { - if (from_now) { - block->last_access = t - (time_t) header->create_time; - } - else { - block->last_access = t; - } block->value = value; return; } @@ -595,17 +591,18 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint block->hash1 = h1; block->hash2 = h2; block->value = value; - if (from_now) { - block->last_access = t - (time_t) header->create_time; - } - else { - block->last_access = t; - } header->used_blocks ++; return; } - if (block->last_access > oldest) { + + /* Expire block if we have some random value that is lower than RANDOM_EXPIRE value */ + if (g_random_int () < RANDOM_EXPIRE) { + to_expire = block; + break; + } + /* Expire block with minimum value otherwise */ + if (block->value < min) { to_expire = block; } c += sizeof (struct stat_file_block); @@ -621,12 +618,7 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); block = (struct stat_file_block *)c; } - if (from_now) { - block->last_access = t - (time_t) header->create_time; - } - else { - block->last_access = t; - } + block->hash1 = h1; block->hash2 = h2; block->value = value; @@ -686,7 +678,7 @@ gboolean statfile_pool_add_section (statfile_pool_t * pool, stat_file_t * file, uint32_t code, uint64_t length) { struct stat_file_section sect; - struct stat_file_block block = { 0, 0, 0, 0 }; + struct stat_file_block block = { 0, 0, 0 }; if (lseek (file->fd, 0, SEEK_END) == -1) { msg_info ("statfile_pool_add_section: cannot lseek file %s, error %d, %s", file->filename, errno, strerror (errno)); diff --git a/src/statfile.h b/src/statfile.h index 43b84bdfe..a43000534 100644 --- a/src/statfile.h +++ b/src/statfile.h @@ -47,7 +47,6 @@ struct stat_file_section { struct stat_file_block { uint32_t hash1; /**< hash1 (also acts as index) */ uint32_t hash2; /**< hash2 */ - uint32_t last_access; /**< last access to block since create time of file */ double value; /**< double value */ }; diff --git a/src/util.c b/src/util.c index 000aeb43e..b3e4f7e68 100644 --- a/src/util.c +++ b/src/util.c @@ -954,8 +954,8 @@ calculate_check_time (struct timespec *begin, int resolution) diff = (ts.tv_sec - begin->tv_sec) * 1000. + /* Seconds */ (ts.tv_nsec - begin->tv_nsec) / 1000000.; /* Nanoseconds */ - rspamd_sprintf (fmt, "%%.%df", resolution); - rspamd_snprintf (res, sizeof (res), fmt, diff); + sprintf (fmt, "%%.%df", resolution); + snprintf (res, sizeof (res), fmt, diff); return (const char *)res; } -- 2.39.5