]> source.dussan.org Git - rspamd.git/commitdiff
* Fix symbols cache (init lua filters before symbols cache initialization)
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 14 Dec 2009 16:03:43 +0000 (19:03 +0300)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 14 Dec 2009 16:03:43 +0000 (19:03 +0300)
* Remove LRU expiration logic from statfiles and replace it with random/lowerest value expiration logic:
  expire random block or block with lowerest value

! statfiles are incompatible again

src/classifiers/winnow.c
src/main.c
src/statfile.c
src/statfile.h
src/util.c

index 7e1144ae7b5ff264f56e3bd1efe5ab14605c7b01..4b1bd5549b05323ca6e7076e450ad984408f879b 100644 (file)
@@ -54,7 +54,12 @@ classify_callback (gpointer key, gpointer value, gpointer data)
        /* Consider that not found blocks have value 1 */
        v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now);
        if (fabs (v) > 0.00001) {
-               cd->sum += v;
+        if (cd->sum + v > G_MAXDOUBLE / 2.) {
+            cd->sum = G_MAXDOUBLE / 2.;
+        }
+        else {
+                   cd->sum += v;
+        }
                cd->in_class++;
        }
 
@@ -80,10 +85,23 @@ learn_callback (gpointer key, gpointer value, gpointer data)
        }
        else {
                statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, v * c);
-               node->value = v * c;
+        /* Set some limit on growing */
+        if (v > G_MAXDOUBLE / 2.) {
+            node->value = v;
+        }
+        else {
+                   node->value = v * c;
+        }
        }
 
-       cd->sum += node->value;
+
+    if (cd->sum + node->value > G_MAXDOUBLE / 2.) {
+        cd->sum = G_MAXDOUBLE / 2.;
+    }
+    else {
+           cd->sum += node->value;
+    }
+
        cd->count++;
 
        return FALSE;
index 5512e30f1cb37986bd0450f2928e04c550a25678..49ef09b5996a9580220c456701235cb62a7e3d38 100644 (file)
@@ -756,16 +756,6 @@ main (int argc, char **argv, char **env)
                l = g_list_next (l);
        }
 
-       /* Init symbols cache for each metric */
-       l = g_list_first (cfg->metrics_list);
-       while (l) {
-               metric = l->data;
-               if (metric->cache && !init_symbols_cache (cfg->cfg_pool, metric->cache, metric->cache_filename)) {
-                       exit (EXIT_FAILURE);
-               }
-               l = g_list_next (l);
-       }
-
 #ifndef WITHOUT_PERL
        /* Init perl interpreter */
        dTHXa (perl_interpreter);
@@ -784,6 +774,16 @@ main (int argc, char **argv, char **env)
        init_lua_filters (cfg);
 #endif
 
+       /* Init symbols cache for each metric */
+       l = g_list_first (cfg->metrics_list);
+       while (l) {
+               metric = l->data;
+               if (metric->cache && !init_symbols_cache (cfg->cfg_pool, metric->cache, metric->cache_filename)) {
+                       exit (EXIT_FAILURE);
+               }
+               l = g_list_next (l);
+       }
+
        rspamd->workers = g_hash_table_new (g_direct_hash, g_direct_equal);
        spawn_workers (rspamd, TRUE);
 
index a58730144c7425378ae88ebbc4bfed2f82c93c71..92a29f46d2ad0a26240d80387c29da55a98fb05a 100644 (file)
@@ -154,7 +154,7 @@ statfile_pool_check (stat_file_t * file)
        file->cur_section.code = f->section.code;
        file->cur_section.length = f->section.length;
        if (file->cur_section.length * sizeof (struct stat_file_block) > file->len) {
-               msg_info ("statfile_pool_check: file %s is truncated: %zd, must be %zd", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block));
+               msg_info ("statfile_pool_check: file %s is truncated: %z, must be %z", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block));
                return -1;
        }
        file->seek_pos = sizeof (struct stat_file) - sizeof (struct stat_file_block);
@@ -265,7 +265,7 @@ statfile_pool_reindex (statfile_pool_t * pool, char *filename, size_t old_size,
        while (pos - map < old_size) {
                block = (struct stat_file_block *)pos;
                if (block->hash1 != 0 && block->value != 0) {
-                       statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, block->last_access, block->value, FALSE);
+                       statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, 0, block->value, FALSE);
                }
                pos += sizeof (block);
        }
@@ -347,6 +347,7 @@ statfile_pool_open (statfile_pool_t * pool, char *filename, size_t size, gboolea
                pool->opened--;
                memory_pool_unlock_mutex (pool->lock);
                unlock_file (new_file->fd, FALSE);
+        munmap (new_file->map, st.st_size);
                return NULL;
        }
        unlock_file (new_file->fd, FALSE);
@@ -411,7 +412,7 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t size)
        struct stat_file_section        section = {
                .code = STATFILE_SECTION_COMMON,
        };
-       struct stat_file_block          block = { 0, 0, 0, 0 };
+       struct stat_file_block          block = { 0, 0, 0 };
        int                             fd;
        unsigned int                    buflen, nblocks;
        char                           *buf = NULL;
@@ -539,7 +540,6 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
                        break;
                }
                if (block->hash1 == h1 && block->hash2 == h2) {
-                       block->last_access = now - (time_t) header->create_time;
                        return block->value;
                }
                c += sizeof (struct stat_file_block);
@@ -550,13 +550,15 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
        return 0;
 }
 
+#define RANDOM_EXPIRE G_MAXINT / CHAIN_LENGTH
 static void
 statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t t, double value, gboolean from_now)
 {
        struct stat_file_block         *block, *to_expire = NULL;
        struct stat_file_header        *header;
-       unsigned int                    i, blocknum, oldest = 0;
+       unsigned int                    i, blocknum;
        u_char                         *c;
+    double                          min = G_MAXDOUBLE;
 
 
        if (from_now) {
@@ -579,12 +581,6 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint
                }
                /* First try to find block in chain */
                if (block->hash1 == h1 && block->hash2 == h2) {
-                       if (from_now) {
-                               block->last_access = t - (time_t) header->create_time;
-                       }
-                       else {
-                               block->last_access = t;
-                       }
                        block->value = value;
                        return;
                }
@@ -595,17 +591,18 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint
                        block->hash1 = h1;
                        block->hash2 = h2;
                        block->value = value;
-                       if (from_now) {
-                               block->last_access = t - (time_t) header->create_time;
-                       }
-                       else {
-                               block->last_access = t;
-                       }
                        header->used_blocks ++;
 
                        return;
                }
-               if (block->last_access > oldest) {
+               
+               /* Expire block if we have some random value that is lower than RANDOM_EXPIRE value */
+               if (g_random_int () < RANDOM_EXPIRE) {
+                       to_expire = block;
+                       break;
+               }
+               /* Expire block with minimum value otherwise */
+               if (block->value < min) {
                        to_expire = block;
                }
                c += sizeof (struct stat_file_block);
@@ -621,12 +618,7 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint
                c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block);
                block = (struct stat_file_block *)c;
        }
-       if (from_now) {
-               block->last_access = t - (time_t) header->create_time;
-       }
-       else {
-               block->last_access = t;
-       }
+
        block->hash1 = h1;
        block->hash2 = h2;
        block->value = value;
@@ -686,7 +678,7 @@ gboolean
 statfile_pool_add_section (statfile_pool_t * pool, stat_file_t * file, uint32_t code, uint64_t length)
 {
        struct stat_file_section        sect;
-       struct stat_file_block          block = { 0, 0, 0, 0 };
+       struct stat_file_block          block = { 0, 0, 0 };
 
        if (lseek (file->fd, 0, SEEK_END) == -1) {
                msg_info ("statfile_pool_add_section: cannot lseek file %s, error %d, %s", file->filename, errno, strerror (errno));
index 43b84bdfed647737f69d3d6b23e460c03587af0d..a43000534ff5c3c1a8575904226ea6df5cec410f 100644 (file)
@@ -47,7 +47,6 @@ struct stat_file_section {
 struct stat_file_block {
        uint32_t hash1;                                                 /**< hash1 (also acts as index)                 */                              
        uint32_t hash2;                                                 /**< hash2                                                              */
-       uint32_t last_access;                                   /**< last access to block since create time of file     */
        double value;                                                   /**< double value                                               */
 };
 
index 000aeb43e844d91e41047d8fc23a4fa619767017..b3e4f7e685d63d74c3bfd3cc7074b805020d6151 100644 (file)
@@ -954,8 +954,8 @@ calculate_check_time (struct timespec *begin, int resolution)
 
        diff = (ts.tv_sec - begin->tv_sec) * 1000. +    /* Seconds */
                (ts.tv_nsec - begin->tv_nsec) / 1000000.;       /* Nanoseconds */
-       rspamd_sprintf (fmt, "%%.%df", resolution);
-       rspamd_snprintf (res, sizeof (res), fmt, diff);
+       sprintf (fmt, "%%.%df", resolution);
+       snprintf (res, sizeof (res), fmt, diff);
 
        return (const char *)res;
 }