aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2012-10-04 22:14:10 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2012-10-04 22:14:10 +0400
commit3789849b7b2e617d0a287fe77490b6643f3a6b74 (patch)
tree8415305aee39a3aad6adbccbc1941a62f3a41cf8 /src
parent14e1129068d55bc8de0618832d4f7d33bb1b0f06 (diff)
downloadrspamd-3789849b7b2e617d0a287fe77490b6643f3a6b74.tar.gz
rspamd-3789849b7b2e617d0a287fe77490b6643f3a6b74.zip
* Add meta-classification example.
Many changes to advanced statistic and meta-classification logic. Add example of complex meta-classification.
Diffstat (limited to 'src')
-rw-r--r--src/classifiers/bayes.c11
-rw-r--r--src/controller.c21
-rw-r--r--src/lua/lua_classifier.c9
-rw-r--r--src/statfile.c20
4 files changed, 39 insertions, 22 deletions
diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c
index cad963c4b..a80bbe0ba 100644
--- a/src/classifiers/bayes.c
+++ b/src/classifiers/bayes.c
@@ -391,6 +391,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
struct statfile *st;
stat_file_t *file;
GList *cur;
+ gboolean skip_labels;
g_assert (pool != NULL);
g_assert (ctx != NULL);
@@ -411,11 +412,14 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
}
}
- cur = call_classifier_pre_callbacks (ctx->cfg, task, FALSE, FALSE, L);
+ cur = call_classifier_pre_callbacks (ctx->cfg, task, TRUE, is_spam, L);
if (cur) {
+ skip_labels = FALSE;
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
}
else {
+ /* Do not try to learn specific statfiles if pre callback returned nil */
+ skip_labels = TRUE;
cur = ctx->cfg->statfiles;
}
@@ -435,7 +439,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
while (cur) {
/* Select statfiles to learn */
st = cur->data;
- if (st->is_spam != is_spam) {
+ if (st->is_spam != is_spam || (skip_labels && st->label)) {
cur = g_list_next (cur);
continue;
}
@@ -460,8 +464,6 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
msg_err ("cannot open statfile %s after creation", st->path);
return FALSE;
}
- cur = g_list_next (cur);
- continue;
}
}
data.file = file;
@@ -470,6 +472,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
statfile_inc_revision (file);
statfile_pool_unlock_file (pool, data.file);
maybe_write_binlog (ctx->cfg, st, file, input);
+ msg_info ("increase revision for %s", st->path);
cur = g_list_next (cur);
}
diff --git a/src/controller.c b/src/controller.c
index c987bc15f..47d444317 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -488,11 +488,20 @@ process_stat_command (struct controller_session *session)
total = statfile_get_total_blocks (statfile);
statfile_get_revision (statfile, &rev, &ti);
if (total != (guint64)-1 && used != (guint64)-1) {
- r += rspamd_snprintf (out_buf + r, sizeof (out_buf) - r,
- "Statfile: %s (version %uL); length: %Hz; free blocks: %uL; total blocks: %uL; free: %.2f%%" CRLF,
- st->symbol, rev, st->size,
- (total - used), total,
- (double)((double)(total - used) / (double)total) * 100.);
+ if (st->label) {
+ r += rspamd_snprintf (out_buf + r, sizeof (out_buf) - r,
+ "Statfile: %s <%s> (version %uL); length: %Hz; free blocks: %uL; total blocks: %uL; free: %.2f%%" CRLF,
+ st->symbol, st->label, rev, st->size,
+ (total - used), total,
+ (double)((double)(total - used) / (double)total) * 100.);
+ }
+ else {
+ r += rspamd_snprintf (out_buf + r, sizeof (out_buf) - r,
+ "Statfile: %s (version %uL); length: %Hz; free blocks: %uL; total blocks: %uL; free: %.2f%%" CRLF,
+ st->symbol, rev, st->size,
+ (total - used), total,
+ (double)((double)(total - used) / (double)total) * 100.);
+ }
}
}
cur_st = g_list_next (cur_st);
@@ -1173,8 +1182,6 @@ fin_learn_task (void *arg)
if (task->state != WRITING_REPLY) {
task->state = WRITE_REPLY;
- /* Process all statfiles */
- process_statfiles (task);
}
/* Check if we have all events finished */
diff --git a/src/lua/lua_classifier.c b/src/lua/lua_classifier.c
index be18cda0d..202d29af3 100644
--- a/src/lua/lua_classifier.c
+++ b/src/lua/lua_classifier.c
@@ -247,19 +247,18 @@ lua_classifier_get_statfiles (lua_State *L)
struct classifier_config *ccf = lua_check_classifier (L);
GList *cur;
struct statfile *st, **pst;
+ gint i;
if (ccf) {
lua_newtable (L);
cur = g_list_first (ccf->statfiles);
+ i = 1;
while (cur) {
st = cur->data;
- /* t['statfile_name'] = statfile */
- lua_pushstring (L, st->symbol);
pst = lua_newuserdata (L, sizeof (struct statfile *));
lua_setclass (L, "rspamd{statfile}", -1);
*pst = st;
-
- lua_settable (L, -3);
+ lua_rawseti (L, -2, i++);
cur = g_list_next (cur);
}
@@ -388,7 +387,7 @@ lua_statfile_get_param (lua_State *L)
if (st != NULL && param != NULL) {
value = g_hash_table_lookup (st->opts, param);
- if (param != NULL) {
+ if (value != NULL) {
lua_pushstring (L, value);
return 1;
}
diff --git a/src/statfile.c b/src/statfile.c
index 3c4674fc9..15c41550a 100644
--- a/src/statfile.c
+++ b/src/statfile.c
@@ -415,7 +415,7 @@ statfile_pool_close (statfile_pool_t * pool, stat_file_t * file, gboolean keep_s
if (file->map) {
msg_info ("syncing statfile %s", file->filename);
- msync (file->map, file->len, MS_INVALIDATE | MS_SYNC);
+ msync (file->map, file->len, MS_ASYNC);
munmap (file->map, file->len);
}
if (file->fd != -1) {
@@ -606,7 +606,7 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guin
for (i = 0; i < CHAIN_LENGTH; i++) {
if (i + blocknum >= file->cur_section.length) {
/* Need to expire some block in chain */
- msg_debug ("chain %u is full, starting expire", blocknum);
+ msg_info ("chain %ud is full in statfile %s, starting expire", blocknum, file->filename);
break;
}
/* First try to find block in chain */
@@ -617,7 +617,7 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guin
/* Check whether we have a free block in chain */
if (block->hash1 == 0 && block->hash2 == 0) {
/* Write new block here */
- msg_debug ("found free block %u in chain %u, set h1=%u, h2=%u", i, blocknum, h1, h2);
+ msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", i, blocknum, h1, h2);
block->hash1 = h1;
block->hash2 = h2;
block->value = value;
@@ -880,12 +880,20 @@ statfile_pool_invalidate_callback (gint fd, short what, void *ud)
void
statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter)
{
+ gboolean pending;
- if (pool->invalidate_event == NULL || ! evtimer_pending (pool->invalidate_event, NULL)) {
- if (pool->invalidate_event == NULL) {
- pool->invalidate_event = memory_pool_alloc (pool->pool, sizeof (struct event));
+ if (pool->invalidate_event != NULL) {
+ pending = evtimer_pending (pool->invalidate_event, NULL);
+ if (pending) {
+ /* Replan event */
+ pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter);
+ pool->invalidate_tv.tv_usec = 0;
+ evtimer_add (pool->invalidate_event, &pool->invalidate_tv);
}
+ }
+ else {
+ pool->invalidate_event = memory_pool_alloc (pool->pool, sizeof (struct event));
pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter);
pool->invalidate_tv.tv_usec = 0;
evtimer_set (pool->invalidate_event, statfile_pool_invalidate_callback, pool);