diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2012-10-04 22:14:10 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2012-10-04 22:14:10 +0400 |
commit | 3789849b7b2e617d0a287fe77490b6643f3a6b74 (patch) | |
tree | 8415305aee39a3aad6adbccbc1941a62f3a41cf8 /src | |
parent | 14e1129068d55bc8de0618832d4f7d33bb1b0f06 (diff) | |
download | rspamd-3789849b7b2e617d0a287fe77490b6643f3a6b74.tar.gz rspamd-3789849b7b2e617d0a287fe77490b6643f3a6b74.zip |
* Add meta-classification example.
Many changes to advanced statistic and meta-classification logic.
Add example of complex meta-classification.
Diffstat (limited to 'src')
-rw-r--r-- | src/classifiers/bayes.c | 11 | ||||
-rw-r--r-- | src/controller.c | 21 | ||||
-rw-r--r-- | src/lua/lua_classifier.c | 9 | ||||
-rw-r--r-- | src/statfile.c | 20 |
4 files changed, 39 insertions, 22 deletions
diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c index cad963c4b..a80bbe0ba 100644 --- a/src/classifiers/bayes.c +++ b/src/classifiers/bayes.c @@ -391,6 +391,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, struct statfile *st; stat_file_t *file; GList *cur; + gboolean skip_labels; g_assert (pool != NULL); g_assert (ctx != NULL); @@ -411,11 +412,14 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, } } - cur = call_classifier_pre_callbacks (ctx->cfg, task, FALSE, FALSE, L); + cur = call_classifier_pre_callbacks (ctx->cfg, task, TRUE, is_spam, L); if (cur) { + skip_labels = FALSE; memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur); } else { + /* Do not try to learn specific statfiles if pre callback returned nil */ + skip_labels = TRUE; cur = ctx->cfg->statfiles; } @@ -435,7 +439,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, while (cur) { /* Select statfiles to learn */ st = cur->data; - if (st->is_spam != is_spam) { + if (st->is_spam != is_spam || (skip_labels && st->label)) { cur = g_list_next (cur); continue; } @@ -460,8 +464,6 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, msg_err ("cannot open statfile %s after creation", st->path); return FALSE; } - cur = g_list_next (cur); - continue; } } data.file = file; @@ -470,6 +472,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool, statfile_inc_revision (file); statfile_pool_unlock_file (pool, data.file); maybe_write_binlog (ctx->cfg, st, file, input); + msg_info ("increase revision for %s", st->path); cur = g_list_next (cur); } diff --git a/src/controller.c b/src/controller.c index c987bc15f..47d444317 100644 --- a/src/controller.c +++ b/src/controller.c @@ -488,11 +488,20 @@ process_stat_command (struct controller_session *session) total = statfile_get_total_blocks (statfile); statfile_get_revision (statfile, &rev, &ti); if (total != (guint64)-1 && used != (guint64)-1) { - r += rspamd_snprintf (out_buf + r, sizeof (out_buf) - r, - "Statfile: %s (version %uL); length: %Hz; free blocks: %uL; total blocks: %uL; free: %.2f%%" CRLF, - st->symbol, rev, st->size, - (total - used), total, - (double)((double)(total - used) / (double)total) * 100.); + if (st->label) { + r += rspamd_snprintf (out_buf + r, sizeof (out_buf) - r, + "Statfile: %s <%s> (version %uL); length: %Hz; free blocks: %uL; total blocks: %uL; free: %.2f%%" CRLF, + st->symbol, st->label, rev, st->size, + (total - used), total, + (double)((double)(total - used) / (double)total) * 100.); + } + else { + r += rspamd_snprintf (out_buf + r, sizeof (out_buf) - r, + "Statfile: %s (version %uL); length: %Hz; free blocks: %uL; total blocks: %uL; free: %.2f%%" CRLF, + st->symbol, rev, st->size, + (total - used), total, + (double)((double)(total - used) / (double)total) * 100.); + } } } cur_st = g_list_next (cur_st); @@ -1173,8 +1182,6 @@ fin_learn_task (void *arg) if (task->state != WRITING_REPLY) { task->state = WRITE_REPLY; - /* Process all statfiles */ - process_statfiles (task); } /* Check if we have all events finished */ diff --git a/src/lua/lua_classifier.c b/src/lua/lua_classifier.c index be18cda0d..202d29af3 100644 --- a/src/lua/lua_classifier.c +++ b/src/lua/lua_classifier.c @@ -247,19 +247,18 @@ lua_classifier_get_statfiles (lua_State *L) struct classifier_config *ccf = lua_check_classifier (L); GList *cur; struct statfile *st, **pst; + gint i; if (ccf) { lua_newtable (L); cur = g_list_first (ccf->statfiles); + i = 1; while (cur) { st = cur->data; - /* t['statfile_name'] = statfile */ - lua_pushstring (L, st->symbol); pst = lua_newuserdata (L, sizeof (struct statfile *)); lua_setclass (L, "rspamd{statfile}", -1); *pst = st; - - lua_settable (L, -3); + lua_rawseti (L, -2, i++); cur = g_list_next (cur); } @@ -388,7 +387,7 @@ lua_statfile_get_param (lua_State *L) if (st != NULL && param != NULL) { value = g_hash_table_lookup (st->opts, param); - if (param != NULL) { + if (value != NULL) { lua_pushstring (L, value); return 1; } diff --git a/src/statfile.c b/src/statfile.c index 3c4674fc9..15c41550a 100644 --- a/src/statfile.c +++ b/src/statfile.c @@ -415,7 +415,7 @@ statfile_pool_close (statfile_pool_t * pool, stat_file_t * file, gboolean keep_s if (file->map) { msg_info ("syncing statfile %s", file->filename); - msync (file->map, file->len, MS_INVALIDATE | MS_SYNC); + msync (file->map, file->len, MS_ASYNC); munmap (file->map, file->len); } if (file->fd != -1) { @@ -606,7 +606,7 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guin for (i = 0; i < CHAIN_LENGTH; i++) { if (i + blocknum >= file->cur_section.length) { /* Need to expire some block in chain */ - msg_debug ("chain %u is full, starting expire", blocknum); + msg_info ("chain %ud is full in statfile %s, starting expire", blocknum, file->filename); break; } /* First try to find block in chain */ @@ -617,7 +617,7 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guin /* Check whether we have a free block in chain */ if (block->hash1 == 0 && block->hash2 == 0) { /* Write new block here */ - msg_debug ("found free block %u in chain %u, set h1=%u, h2=%u", i, blocknum, h1, h2); + msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", i, blocknum, h1, h2); block->hash1 = h1; block->hash2 = h2; block->value = value; @@ -880,12 +880,20 @@ statfile_pool_invalidate_callback (gint fd, short what, void *ud) void statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter) { + gboolean pending; - if (pool->invalidate_event == NULL || ! evtimer_pending (pool->invalidate_event, NULL)) { - if (pool->invalidate_event == NULL) { - pool->invalidate_event = memory_pool_alloc (pool->pool, sizeof (struct event)); + if (pool->invalidate_event != NULL) { + pending = evtimer_pending (pool->invalidate_event, NULL); + if (pending) { + /* Replan event */ + pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); + pool->invalidate_tv.tv_usec = 0; + evtimer_add (pool->invalidate_event, &pool->invalidate_tv); } + } + else { + pool->invalidate_event = memory_pool_alloc (pool->pool, sizeof (struct event)); pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); pool->invalidate_tv.tv_usec = 0; evtimer_set (pool->invalidate_event, statfile_pool_invalidate_callback, pool); |