summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-10-16 20:46:49 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-10-16 20:46:49 +0400
commit115313228194cacf60a3e09c9b8117078ab37350 (patch)
tree046941a374876a6ab66fd92f9dafde4df17371df /src
parentd7a22c22ccc8aa3cd95c461d13f538b6d321ddc1 (diff)
downloadrspamd-115313228194cacf60a3e09c9b8117078ab37350.tar.gz
rspamd-115313228194cacf60a3e09c9b8117078ab37350.zip
* Add ability to change statfile size limit in config and allow reindexing of statfiles
Diffstat (limited to 'src')
-rw-r--r--src/classifiers/winnow.c8
-rw-r--r--src/filter.c4
-rw-r--r--src/statfile.c166
-rw-r--r--src/statfile.h2
4 files changed, 155 insertions, 25 deletions
diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c
index 9404644ac..acf12a462 100644
--- a/src/classifiers/winnow.c
+++ b/src/classifiers/winnow.c
@@ -120,7 +120,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
while (cur) {
st = cur->data;
if ((data.file = statfile_pool_is_open (pool, st->path)) == NULL) {
- if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
+ if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) {
msg_warn ("winnow_classify: cannot open %s, skip it", st->path);
cur = g_list_next (cur);
continue;
@@ -174,13 +174,13 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t * pool, char *symbol,
while (cur) {
st = cur->data;
if (strcmp (symbol, st->symbol) == 0) {
- if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
+ if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) {
/* Try to create statfile */
- if (statfile_pool_create (pool, st->path, st->size / sizeof (struct stat_file_block)) == -1) {
+ if (statfile_pool_create (pool, st->path, st->size) == -1) {
msg_err ("winnow_learn: cannot create statfile %s", st->path);
return;
}
- if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
+ if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) {
msg_err ("winnow_learn: cannot create statfile %s", st->path);
return;
}
diff --git a/src/filter.c b/src/filter.c
index 4269e29d5..fefdd968c 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -472,9 +472,9 @@ process_autolearn (struct statfile *st, struct worker_task *task, GTree * tokens
/* Check opened */
if (!statfile_pool_is_open (task->worker->srv->statfile_pool, filename)) {
/* Try open */
- if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == NULL) {
+ if (statfile_pool_open (task->worker->srv->statfile_pool, filename, st->size, FALSE) == NULL) {
/* Try create */
- if (statfile_pool_create (task->worker->srv->statfile_pool, filename, st->size / sizeof (struct stat_file_block)) == -1) {
+ if (statfile_pool_create (task->worker->srv->statfile_pool, filename, st->size) == -1) {
msg_info ("process_autolearn: error while creating statfile %s", filename);
return;
}
diff --git a/src/statfile.c b/src/statfile.c
index 697f54e4c..e44af6c2b 100644
--- a/src/statfile.c
+++ b/src/statfile.c
@@ -29,6 +29,11 @@
/* Maximum number of statistics files */
#define STATFILES_MAX 255
+static void statfile_pool_set_block_common (
+ statfile_pool_t * pool, stat_file_t * file,
+ uint32_t h1, uint32_t h2,
+ time_t t, float value,
+ gboolean from_now);
static int
cmpstatfile (const void *a, const void *b)
@@ -128,8 +133,72 @@ statfile_pool_new (size_t max_size)
return new;
}
+static stat_file_t *
+statfile_pool_reindex (statfile_pool_t * pool, char *filename, size_t old_size, size_t size)
+{
+ char *backup;
+ int fd;
+ stat_file_t *new;
+ u_char *map, *pos;
+ struct stat_file_block *block;
+
+ /* First of all rename old file */
+ memory_pool_lock_mutex (pool->lock);
+
+ backup = g_strconcat (filename, ".old", NULL);
+ if (rename (filename, backup) == -1) {
+ msg_err ("statfile_pool_reindex: cannot rename %s to %s: %s", filename, backup, strerror (errno));
+ g_free (backup);
+ memory_pool_unlock_mutex (pool->lock);
+ return NULL;
+ }
+
+ memory_pool_unlock_mutex (pool->lock);
+
+ /* Now create new file with required size */
+ if (statfile_pool_create (pool, filename, size) != 0) {
+ msg_err ("statfile_pool_reindex: cannot create new file");
+ g_free (backup);
+ return NULL;
+ }
+ /* Now open new file and start copying */
+ fd = open (backup, O_RDONLY);
+ new = statfile_pool_open (pool, filename, size, TRUE);
+
+ if (fd == -1 || new == NULL) {
+ msg_err ("statfile_pool_reindex: cannot open file: %s", strerror (errno));
+ g_free (backup);
+ return NULL;
+ }
+
+ /* Now start reading blocks from old statfile */
+ if ((map = mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ msg_err ("statfile_pool_reindex: cannot mmap file: %s", strerror (errno));
+ close (fd);
+ g_free (backup);
+ return NULL;
+ }
+
+ pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block));
+ while (pos - map < old_size) {
+ block = (struct stat_file_block *)pos;
+ if (block->hash1 != 0 && block->value != 0) {
+ statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, block->last_access, block->value, FALSE);
+ }
+ pos += sizeof (block);
+ }
+
+ munmap (map, old_size);
+ close (fd);
+ unlink (backup);
+ g_free (backup);
+
+ return new;
+
+}
+
stat_file_t *
-statfile_pool_open (statfile_pool_t * pool, char *filename)
+statfile_pool_open (statfile_pool_t * pool, char *filename, size_t size, gboolean forced)
{
struct stat st;
stat_file_t *new_file;
@@ -148,12 +217,20 @@ statfile_pool_open (statfile_pool_t * pool, char *filename)
return NULL;
}
- if (st.st_size > pool->max) {
+ if (!forced && st.st_size > pool->max) {
msg_info ("statfile_pool_open: cannot attach file to pool, too large: %zd", (size_t) st.st_size);
return NULL;
}
- while (pool->max + pool->opened * sizeof (struct stat_file) < pool->occupied + st.st_size) {
+ memory_pool_lock_mutex (pool->lock);
+ if (!forced && abs (st.st_size - size) > sizeof (struct stat_file_block)) {
+ memory_pool_unlock_mutex (pool->lock);
+ msg_warn ("statfile_pool_open: need to reindex statfile old size: %zd, new size: %zd", st.st_size, size);
+ return statfile_pool_reindex (pool, filename, st.st_size, size);
+ }
+ memory_pool_unlock_mutex (pool->lock);
+
+ while (!forced && (pool->max + pool->opened * sizeof (struct stat_file) * 2 < pool->occupied + st.st_size)) {
if (statfile_pool_expire (pool) == -1) {
/* Failed to find any more free space in pool */
msg_info ("statfile_pool_open: expiration for pool failed, opening file %s failed", filename);
@@ -235,7 +312,7 @@ statfile_pool_close (statfile_pool_t * pool, stat_file_t * file, gboolean keep_s
}
int
-statfile_pool_create (statfile_pool_t * pool, char *filename, size_t blocks)
+statfile_pool_create (statfile_pool_t * pool, char *filename, size_t size)
{
struct stat_file_header header = {
.magic = {'r', 's', 'd'},
@@ -247,6 +324,8 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t blocks)
};
struct stat_file_block block = { 0, 0, 0, 0 };
int fd;
+ unsigned int buflen, nblocks;
+ char *buf = NULL;
if (statfile_pool_is_open (pool, filename) != NULL) {
msg_info ("statfile_pool_open: file %s is already opened", filename);
@@ -254,6 +333,7 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t blocks)
}
memory_pool_lock_mutex (pool->lock);
+ nblocks = (size - sizeof (struct stat_file_header) - sizeof (struct stat_file_section)) / sizeof (struct stat_file_block);
if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
msg_info ("statfile_pool_create: cannot create file %s, error %d, %s", filename, errno, strerror (errno));
@@ -269,26 +349,53 @@ statfile_pool_create (statfile_pool_t * pool, char *filename, size_t blocks)
return -1;
}
- section.length = (uint64_t) blocks;
+ section.length = (uint64_t) nblocks;
if (write (fd, &section, sizeof (section)) == -1) {
msg_info ("statfile_pool_create: cannot write section header to file %s, error %d, %s", filename, errno, strerror (errno));
close (fd);
memory_pool_unlock_mutex (pool->lock);
return -1;
}
-
- while (blocks--) {
- if (write (fd, &block, sizeof (block)) == -1) {
- msg_info ("statfile_pool_create: cannot write block to file %s, error %d, %s", filename, errno, strerror (errno));
- close (fd);
- memory_pool_unlock_mutex (pool->lock);
- return -1;
+
+ /* Buffer for write 256 blocks at once */
+ if (nblocks > 256) {
+ buflen = MIN (nblocks / 256 * sizeof (block), sizeof (block) * 256);
+ buf = g_malloc0 (buflen);
+ }
+
+ while (nblocks) {
+ if (nblocks > 256) {
+ /* Just write buffer */
+ if (write (fd, buf, buflen) == -1) {
+ msg_info ("statfile_pool_create: cannot write blocks buffer to file %s, error %d, %s", filename, errno, strerror (errno));
+ close (fd);
+ memory_pool_unlock_mutex (pool->lock);
+ g_free (buf);
+ return -1;
+ }
+ nblocks -= 256;
+ }
+ else {
+ if (write (fd, &block, sizeof (block)) == -1) {
+ msg_info ("statfile_pool_create: cannot write block to file %s, error %d, %s", filename, errno, strerror (errno));
+ close (fd);
+ if (buf) {
+ g_free (buf);
+ }
+ memory_pool_unlock_mutex (pool->lock);
+ return -1;
+ }
+ nblocks --;
}
}
close (fd);
memory_pool_unlock_mutex (pool->lock);
+ if (buf) {
+ g_free (buf);
+ }
+
return 0;
}
@@ -353,8 +460,8 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
return 0;
}
-void
-statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t now, float value)
+static void
+statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t t, float value, gboolean from_now)
{
struct stat_file_block *block, *to_expire = NULL;
struct stat_file_header *header;
@@ -362,7 +469,9 @@ statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
u_char *c;
- file->access_time = now;
+ if (from_now) {
+ file->access_time = t;
+ }
if (!file->map) {
return;
}
@@ -380,7 +489,12 @@ statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
}
/* First try to find block in chain */
if (block->hash1 == h1 && block->hash2 == h2) {
- block->last_access = now - (time_t) header->create_time;
+ if (from_now) {
+ block->last_access = t - (time_t) header->create_time;
+ }
+ else {
+ block->last_access = t;
+ }
block->value = value;
return;
}
@@ -391,7 +505,12 @@ statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
block->hash1 = h1;
block->hash2 = h2;
block->value = value;
- block->last_access = now - (time_t) header->create_time;
+ if (from_now) {
+ block->last_access = t - (time_t) header->create_time;
+ }
+ else {
+ block->last_access = t;
+ }
return;
}
if (block->last_access > oldest) {
@@ -410,12 +529,23 @@ statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1
c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block);
block = (struct stat_file_block *)c;
}
- block->last_access = now - (time_t) header->create_time;
+ if (from_now) {
+ block->last_access = t - (time_t) header->create_time;
+ }
+ else {
+ block->last_access = t;
+ }
block->hash1 = h1;
block->hash2 = h2;
block->value = value;
}
+void
+statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, uint32_t h1, uint32_t h2, time_t now, float value)
+{
+ statfile_pool_set_block_common (pool, file, h1, h2, now, value, TRUE);
+}
+
stat_file_t *
statfile_pool_is_open (statfile_pool_t * pool, char *filename)
{
diff --git a/src/statfile.h b/src/statfile.h
index 4417f4af7..76efe264f 100644
--- a/src/statfile.h
+++ b/src/statfile.h
@@ -100,7 +100,7 @@ statfile_pool_t* statfile_pool_new (size_t max_size);
* @param filename name of statfile to open
* @return 0 if specified statfile is attached and -1 in case of error
*/
-stat_file_t* statfile_pool_open (statfile_pool_t *pool, char *filename);
+stat_file_t* statfile_pool_open (statfile_pool_t *pool, char *filename, size_t len, gboolean forced);
/**
* Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching