From 092a40dcf813accb11a0b6bb600dccea0b35fb1d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 12 Nov 2009 20:38:20 +0300 Subject: [PATCH] * Write revision and revision time to statfile * Make some improvements to API (trying to make it more clear) --- src/binlog.c | 36 +++++++++++++-------------- src/binlog.h | 3 ++- src/classifiers/classifiers.h | 4 +-- src/classifiers/winnow.c | 24 ++---------------- src/controller.c | 22 ++++++++++++++-- src/filter.c | 24 +++++++++--------- src/statfile.c | 12 ++++----- src/statfile.h | 5 ++-- src/util.c | 47 +++++++++++++++++++++++++++++++++-- src/util.h | 7 ++++++ 10 files changed, 116 insertions(+), 68 deletions(-) diff --git a/src/binlog.c b/src/binlog.c index c42e6b2af..9a96d4f7b 100644 --- a/src/binlog.c +++ b/src/binlog.c @@ -120,6 +120,9 @@ binlog_check_file (struct rspamd_binlog *log) return FALSE; } + log->cur_seq = log->cur_idx->last_index; + log->cur_time = log->cur_idx->indexes[log->cur_idx->last_index].time; + return TRUE; } @@ -246,6 +249,7 @@ write_binlog_tree (struct rspamd_binlog *log, GTree *nodes) idx = &log->cur_idx->indexes[log->cur_idx->last_index]; idx->seek = seek; idx->time = (uint64_t)time (NULL); + log->cur_time = idx->time; idx->len = g_tree_nnodes (nodes) * sizeof (struct rspamd_binlog_element); if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) { unlock_file (log->fd, FALSE); @@ -498,33 +502,22 @@ maybe_init_static () return TRUE; } -void -maybe_write_binlog (struct classifier_config *ccf, const char *symbol, GTree *nodes) +gboolean +maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes) { struct rspamd_binlog *log; - struct statfile *st = NULL; - GList *cur; if (ccf == NULL) { - return; - } - - cur = g_list_first (ccf->statfiles); - while (cur) { - st = cur->data; - if (strcmp (symbol, st->symbol) == 0) { - break; - } - st = NULL; - cur = g_list_next (cur); + return FALSE; } + if (st == NULL || nodes == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) { - return; + return FALSE; } if (!maybe_init_static ()) { - return; + return FALSE; } if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) { @@ -532,11 +525,16 @@ maybe_write_binlog (struct classifier_config *ccf, const char *symbol, GTree *no g_hash_table_insert (binlog_opened, st, log); } else { - return; + return FALSE; } } - (void)binlog_insert (log, nodes); + if (binlog_insert (log, nodes)) { + (void)statfile_set_revision (file, log->cur_seq, log->cur_time); + return TRUE; + } + + return FALSE; } diff --git a/src/binlog.h b/src/binlog.h index 84fba3db6..dbf22ee43 100644 --- a/src/binlog.h +++ b/src/binlog.h @@ -44,6 +44,7 @@ struct rspamd_binlog { time_t rotate_time; int rotate_jitter; uint64_t cur_seq; + uint64_t cur_time; int fd; memory_pool_t *pool; @@ -58,6 +59,6 @@ struct rspamd_binlog* binlog_open (memory_pool_t *pool, const char *path, time_t void binlog_close (struct rspamd_binlog *log); gboolean binlog_insert (struct rspamd_binlog *log, GTree *nodes); gboolean binlog_sync (struct rspamd_binlog *log, uint64_t from_rev, uint64_t from_time, GByteArray **rep); -void maybe_write_binlog (struct classifier_config *ccf, const char *symbol, GTree *nodes); +gboolean maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes); #endif diff --git a/src/classifiers/classifiers.h b/src/classifiers/classifiers.h index fcb251da1..cab6eff21 100644 --- a/src/classifiers/classifiers.h +++ b/src/classifiers/classifiers.h @@ -20,7 +20,7 @@ struct classifier { struct classifier_ctx* (*init_func)(memory_pool_t *pool, struct classifier_config *cf); void (*classify_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct worker_task *task); void (*learn_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, - char *symbol, GTree *input, gboolean in_class); + stat_file_t *file, GTree *input, gboolean in_class); }; /* Get classifier structure by name or return NULL if this name is not found */ @@ -29,7 +29,7 @@ struct classifier* get_classifier (char *name); /* Winnow algorithm */ struct classifier_ctx* winnow_init (memory_pool_t *pool, struct classifier_config *cf); void winnow_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct worker_task *task); -void winnow_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, char *symbol, GTree *input, gboolean in_class); +void winnow_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, stat_file_t *file, GTree *input, gboolean in_class); /* Array of all defined classifiers */ extern struct classifier classifiers[]; diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index cc2a0cc23..8f2b6a6fb 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -154,15 +154,13 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp } void -winnow_learn (struct classifier_ctx *ctx, statfile_pool_t * pool, char *symbol, GTree * input, int in_class) +winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, stat_file_t *file, GTree * input, int in_class) { struct winnow_callback_data data = { .file = NULL, .sum = 0, .count = 0, }; - GList *cur; - struct statfile *st; g_assert (pool != NULL); g_assert (ctx != NULL); @@ -172,25 +170,7 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t * pool, char *symbol, data.now = time (NULL); data.ctx = ctx; - cur = g_list_first (ctx->cfg->statfiles); - while (cur) { - st = cur->data; - if (strcmp (symbol, st->symbol) == 0) { - if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { - /* Try to create statfile */ - if (statfile_pool_create (pool, st->path, st->size) == -1) { - msg_err ("winnow_learn: cannot create statfile %s", st->path); - return; - } - if ((data.file = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { - msg_err ("winnow_learn: cannot create statfile %s", st->path); - return; - } - } - break; - } - cur = g_list_next (cur); - } + data.file = file; if (data.file != NULL) { statfile_pool_lock_file (pool, data.file); diff --git a/src/controller.c b/src/controller.c index 3b0179734..0b79813b3 100644 --- a/src/controller.c +++ b/src/controller.c @@ -385,6 +385,8 @@ controller_read_socket (f_str_t * in, void *arg) { struct controller_session *session = (struct controller_session *)arg; struct classifier_ctx *cls_ctx; + stat_file_t *statfile; + struct statfile *st; int len, i, r; char *s, **params, *cmd, out_buf[128]; struct worker_task *task; @@ -474,11 +476,27 @@ controller_read_socket (f_str_t * in, void *arg) } cur = g_list_next (cur); } + + /* Get or create statfile */ + statfile = get_statfile_by_symbol (session->worker->srv->statfile_pool, session->learn_classifier, + session->learn_symbol, &st, TRUE); + if (statfile == NULL) { + free_task (task, FALSE); + i = snprintf (out_buf, sizeof (out_buf), "learn failed" CRLF); + if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) { + return FALSE; + } + return TRUE; + } + + /* Init classifier */ cls_ctx = session->learn_classifier->classifier->init_func (session->session_pool, session->learn_classifier); - session->learn_classifier->classifier->learn_func (cls_ctx, session->worker->srv->statfile_pool, session->learn_symbol, tokens, session->in_class); + + /* XXX: remove this awful legacy */ + session->learn_classifier->classifier->learn_func (cls_ctx, session->worker->srv->statfile_pool, statfile, tokens, session->in_class); session->worker->srv->stat->messages_learned++; - maybe_write_binlog (session->learn_classifier, session->learn_symbol, tokens); + maybe_write_binlog (session->learn_classifier, st, statfile, tokens); free_task (task, FALSE); i = snprintf (out_buf, sizeof (out_buf), "learn ok" CRLF); diff --git a/src/filter.c b/src/filter.c index ca3270f47..66a256708 100644 --- a/src/filter.c +++ b/src/filter.c @@ -473,23 +473,23 @@ check_autolearn (struct statfile_autolearn_params *params, struct worker_task *t void process_autolearn (struct statfile *st, struct worker_task *task, GTree * tokens, struct classifier *classifier, char *filename, struct classifier_ctx *ctx) { + stat_file_t *statfile; + struct statfile *unused; + if (check_autolearn (st->autolearn, task)) { if (tokens) { msg_info ("process_autolearn: message with id <%s> autolearned statfile '%s'", task->message_id, filename); - /* Check opened */ - if (!statfile_pool_is_open (task->worker->srv->statfile_pool, filename)) { - /* Try open */ - if (statfile_pool_open (task->worker->srv->statfile_pool, filename, st->size, FALSE) == NULL) { - /* Try create */ - if (statfile_pool_create (task->worker->srv->statfile_pool, filename, st->size) == -1) { - msg_info ("process_autolearn: error while creating statfile %s", filename); - return; - } - } + + /* Get or create statfile */ + statfile = get_statfile_by_symbol (task->worker->srv->statfile_pool, ctx->cfg, + st->symbol, &unused, TRUE); + + if (statfile == NULL) { + return; } - classifier->learn_func (ctx, task->worker->srv->statfile_pool, st->symbol, tokens, TRUE); - maybe_write_binlog (ctx->cfg, st->symbol, tokens); + classifier->learn_func (ctx, task->worker->srv->statfile_pool, statfile, tokens, TRUE); + maybe_write_binlog (ctx->cfg, st, statfile, tokens); } } } diff --git a/src/statfile.c b/src/statfile.c index 4b8fe59b9..67bc677bb 100644 --- a/src/statfile.c +++ b/src/statfile.c @@ -752,11 +752,11 @@ statfile_get_section_by_name (const char *name) } gboolean -statfile_set_revision (statfile_pool_t *pool, stat_file_t *file, uint64_t rev, time_t time) +statfile_set_revision (stat_file_t *file, uint64_t rev, time_t time) { struct stat_file_header *header; - if (pool == NULL || file == NULL || file->map == NULL) { + if (file == NULL || file->map == NULL) { return FALSE; } @@ -765,15 +765,15 @@ statfile_set_revision (statfile_pool_t *pool, stat_file_t *file, uint64_t rev, t header->revision = rev; header->rev_time = time; - return FALSE; + return TRUE; } gboolean -statfile_get_revision (statfile_pool_t *pool, stat_file_t *file, uint64_t *rev, time_t *time) +statfile_get_revision (stat_file_t *file, uint64_t *rev, time_t *time) { struct stat_file_header *header; - if (pool == NULL || file == NULL || file->map == NULL) { + if (file == NULL || file->map == NULL) { return FALSE; } @@ -782,5 +782,5 @@ statfile_get_revision (statfile_pool_t *pool, stat_file_t *file, uint64_t *rev, *rev = header->revision; *time = header->rev_time; - return FALSE; + return TRUE; } diff --git a/src/statfile.h b/src/statfile.h index 2269d7ced..02b6dbcc8 100644 --- a/src/statfile.h +++ b/src/statfile.h @@ -217,7 +217,7 @@ uint32_t statfile_get_section_by_name (const char *name); * @param time time of revision * @return TRUE if revision was set */ -gboolean statfile_set_revision (statfile_pool_t *pool, stat_file_t *file, uint64_t rev, time_t time); +gboolean statfile_set_revision (stat_file_t *file, uint64_t rev, time_t time); /** * Set statfile revision and revision time @@ -227,6 +227,7 @@ gboolean statfile_set_revision (statfile_pool_t *pool, stat_file_t *file, uint64 * @param time saved time of revision * @return TRUE if revision was saved in rev and time */ -gboolean statfile_get_revision (statfile_pool_t *pool, stat_file_t *file, uint64_t *rev, time_t *time); +gboolean statfile_get_revision (stat_file_t *file, uint64_t *rev, time_t *time); + #endif diff --git a/src/util.c b/src/util.c index b431a9116..4eb0a4341 100644 --- a/src/util.c +++ b/src/util.c @@ -27,6 +27,7 @@ #include "util.h" #include "cfg_file.h" #include "main.h" +#include "statfile.h" /* Check log messages intensity once per minute */ #define CHECK_TIME 60 @@ -1094,7 +1095,7 @@ unlock_file (int fd, gboolean async) return TRUE; } -#else +#else /* HAVE_FLOCK */ /* Fctnl version */ gboolean lock_file (int fd, gboolean async) @@ -1138,7 +1139,49 @@ unlock_file (int fd, gboolean async) return TRUE; } -#endif +#endif /* HAVE_FLOCK */ + +#ifdef RSPAMD_MAIN +stat_file_t * +get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, + const char *symbol, struct statfile **st, gboolean try_create) +{ + stat_file_t *res = NULL; + GList *cur; + + if (pool == NULL || ccf == NULL || symbol == NULL) { + return NULL; + } + + cur = g_list_first (ccf->statfiles); + while (cur) { + *st = cur->data; + if (strcmp (symbol, (*st)->symbol) == 0) { + break; + } + *st = NULL; + cur = g_list_next (cur); + } + if (*st == NULL) { + return NULL; + } + + if ((res = statfile_pool_is_open (pool, (*st)->path)) == NULL) { + if ((res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE)) == NULL) { + msg_warn ("get_statfile_by_symbol: cannot open %s", (*st)->path); + if (try_create) { + if (statfile_pool_create (pool, (*st)->path, (*st)->size) == -1) { + msg_err ("get_statfile_by_symbol: cannot create statfile %s", (*st)->path); + return NULL; + } + res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE); + } + } + } + + return res; +} +#endif /* RSPAMD_MAIN */ /* * vi:ts=4 diff --git a/src/util.h b/src/util.h index 3def85b8a..4d8955351 100644 --- a/src/util.h +++ b/src/util.h @@ -4,10 +4,13 @@ #include "config.h" #include "mem_pool.h" #include "radix.h" +#include "statfile.h" struct config_file; struct rspamd_main; struct workq; +struct statfile; +struct classifier_config; /* Create socket and bind or connect it to specified address and port */ int make_tcp_socket (struct in_addr *, u_short, gboolean is_server, gboolean async); @@ -81,5 +84,9 @@ gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2); void gperf_profiler_init (struct config_file *cfg, const char *descr); +#ifdef RSPAMD_MAIN +stat_file_t* get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, + const char *symbol, struct statfile **st, gboolean try_create); +#endif #endif -- 2.39.5