summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/classifiers/winnow.c24
-rw-r--r--src/main.c20
-rw-r--r--src/statfile.c42
-rw-r--r--src/statfile.h1
-rw-r--r--src/util.c4
5 files changed, 50 insertions, 41 deletions
diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c
index 7e1144ae7..4b1bd5549 100644
--- a/src/classifiers/winnow.c
+++ b/src/classifiers/winnow.c
@@ -54,7 +54,12 @@ classify_callback (gpointer key, gpointer value, gpointer data)
/* Consider that not found blocks have value 1 */
v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now);
if (fabs (v) > 0.00001) {
- cd->sum += v;
+ if (cd->sum + v > G_MAXDOUBLE / 2.) {
+ cd->sum = G_MAXDOUBLE / 2.;
+ }
+ else {
+ cd->sum += v;
+ }
cd->in_class++;
}
@@ -80,10 +85,23 @@ learn_callback (gpointer key, gpointer value, gpointer data)
}
else {
statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, v * c);
- node->value = v * c;
+ /* Set some limit on growing */
+ if (v > G_MAXDOUBLE / 2.) {
+ node->value = v;
+ }
+ else {
+ node->value = v * c;
+ }
}
- cd->sum += node->value;
+
+ if (cd->sum + node->value > G_MAXDOUBLE / 2.) {
+ cd->sum = G_MAXDOUBLE / 2.;
+ }
+ else {
+ cd->sum += node->value;
+ }
+
cd->count++;
return FALSE;
diff --git a/src/main.c b/src/main.c
index 5512e30f1..49ef09b59 100644
--- a/src/main.c
+++ b/src/main.c
@@ -756,16 +756,6 @@ main (int argc, char **argv, char **env)
l = g_list_next (l);
}
- /* Init symbols cache for each metric */
- l = g_list_first (cfg->metrics_list);
- while (l) {
- metric = l->data;
- if (metric->cache && !init_symbols_cache (cfg->cfg_pool, metric->cache, metric->cache_filename)) {
- exit (EXIT_FAILURE);
- }
- l = g_list_next (l);
- }
-
#ifndef WITHOUT_PERL
/* Init perl interpreter */
dTHXa (perl_interpreter);
@@ -784,6 +774,16 @@ main (int argc, char **argv, char **env)
init_lua_filters (cfg);
#endif
+ /* Init symbols cache for each metric */
+ l = g_list_first (cfg->metrics_list);
+ while (l) {
+ metric = l->data;
+ if (metric->cache && !init_symbols_cache (cfg->cfg_pool, metric->cache, metric->cache_filename)) {
+ exit (EXIT_FAILURE);
+ }
+ l = g_list_next (l);
+ }
+
rspamd->workers = g_hash_table_new (g_direct_hash, g_direct_equal);
spawn_workers (rspamd, TRUE);
diff --git a/src/statfile.c b/src/statfile.c
index a58730144..92a29f46d 100644
--- a/src/statfile.c
+++ b/src/statfile.c
@@ -154,7 +154,7 @@ statfile_pool_check (stat_file_t * file)
file->cur_section.code = f->section.code;
file->cur_section.length = f->section.length;
if (file->cur_section.length * sizeof (struct stat_file_block) > file->len) {
- msg_info ("statfile_pool_check: file %s is truncated: %zd, must be %zd", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block));
+ msg_info ("statfile_pool_check: file %s is truncated: %z, must be %z", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block));
return -1;
}
file->seek_pos = sizeof (struct stat_file) - sizeof (struct stat_file_block);
@@ -265,7 +265,7 @@ statfile_pool_reindex (statfile_pool_t * pool, char *filename, size_t old_size,
while (pos - map < old_size) {
block = (struct stat_file_block *)pos;
if (block->hash1 != 0 && block->value != 0) {
- statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, block->last_access, block->value, FALSE);
+ statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, 0, block->value, FALSE);
}
pos += sizeof (block);
}
@@ -347,6 +347,7 @@ statfile_pool_open (statfile_pool_t * pool, char *filename, size_t size, gboolea
pool->opened--;
memory_pool_unlock_mutex (pool->lock);
unlock_file (new_file->fd, FALSE);
+ munmap (new_file->map, st.st_size);
return NULL;
}
unlock_file (new_file->fd, FALSE);
@@ -411,7 +412,7 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t size)
struct stat_file_section section = {
.code = STATFILE_SECTION_COMMON,
};
- struct stat_file_block block = { 0, 0, 0, 0 };
+ struct stat_file_block block = { 0, 0, 0 };
int fd;
unsigned int buflen, nblocks;
char *buf = NULL;
@@ -539,7 +540,6 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
break;
}
if (block->hash1 == h1 && block->hash2 == h2) {
- block->last_access = now - (time_t) header->create_time;
return block->value;
}
c += sizeof (struct stat_file_block);
@@ -550,13 +550,15 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
return 0;
}
+#define RANDOM_EXPIRE G_MAXINT / CHAIN_LENGTH
static void
statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t t, double value, gboolean from_now)
{
struct stat_file_block *block, *to_expire = NULL;
struct stat_file_header *header;
- unsigned int i, blocknum, oldest = 0;
+ unsigned int i, blocknum;
u_char *c;
+ double min = G_MAXDOUBLE;
if (from_now) {
@@ -579,12 +581,6 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint
}
/* First try to find block in chain */
if (block->hash1 == h1 && block->hash2 == h2) {
- if (from_now) {
- block->last_access = t - (time_t) header->create_time;
- }
- else {
- block->last_access = t;
- }
block->value = value;
return;
}
@@ -595,17 +591,18 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint
block->hash1 = h1;
block->hash2 = h2;
block->value = value;
- if (from_now) {
- block->last_access = t - (time_t) header->create_time;
- }
- else {
- block->last_access = t;
- }
header->used_blocks ++;
return;
}
- if (block->last_access > oldest) {
+
+ /* Expire block if we have some random value that is lower than RANDOM_EXPIRE value */
+ if (g_random_int () < RANDOM_EXPIRE) {
+ to_expire = block;
+ break;
+ }
+ /* Expire block with minimum value otherwise */
+ if (block->value < min) {
to_expire = block;
}
c += sizeof (struct stat_file_block);
@@ -621,12 +618,7 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint
c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block);
block = (struct stat_file_block *)c;
}
- if (from_now) {
- block->last_access = t - (time_t) header->create_time;
- }
- else {
- block->last_access = t;
- }
+
block->hash1 = h1;
block->hash2 = h2;
block->value = value;
@@ -686,7 +678,7 @@ gboolean
statfile_pool_add_section (statfile_pool_t * pool, stat_file_t * file, uint32_t code, uint64_t length)
{
struct stat_file_section sect;
- struct stat_file_block block = { 0, 0, 0, 0 };
+ struct stat_file_block block = { 0, 0, 0 };
if (lseek (file->fd, 0, SEEK_END) == -1) {
msg_info ("statfile_pool_add_section: cannot lseek file %s, error %d, %s", file->filename, errno, strerror (errno));
diff --git a/src/statfile.h b/src/statfile.h
index 43b84bdfe..a43000534 100644
--- a/src/statfile.h
+++ b/src/statfile.h
@@ -47,7 +47,6 @@ struct stat_file_section {
struct stat_file_block {
uint32_t hash1; /**< hash1 (also acts as index) */
uint32_t hash2; /**< hash2 */
- uint32_t last_access; /**< last access to block since create time of file */
double value; /**< double value */
};
diff --git a/src/util.c b/src/util.c
index 000aeb43e..b3e4f7e68 100644
--- a/src/util.c
+++ b/src/util.c
@@ -954,8 +954,8 @@ calculate_check_time (struct timespec *begin, int resolution)
diff = (ts.tv_sec - begin->tv_sec) * 1000. + /* Seconds */
(ts.tv_nsec - begin->tv_nsec) / 1000000.; /* Nanoseconds */
- rspamd_sprintf (fmt, "%%.%df", resolution);
- rspamd_snprintf (res, sizeof (res), fmt, diff);
+ sprintf (fmt, "%%.%df", resolution);
+ snprintf (res, sizeof (res), fmt, diff);
return (const char *)res;
}