]> source.dussan.org Git - rspamd.git/commitdiff
* Add max_tokens options to avoid classifying and learning with too much tokens from...
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 26 Jul 2011 12:57:36 +0000 (16:57 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 26 Jul 2011 12:57:36 +0000 (16:57 +0400)
Fix stupid memory leakage on client's timeout.

src/buffer.c
src/classifiers/bayes.c
src/main.c
src/worker.c

index 8048dc13bddc89babc19900a143dd1f9392b23f9..4f9ef304ebb7bac2cea7d53a61dd7d06ea3c6865 100644 (file)
@@ -446,14 +446,16 @@ dispatcher_cb (gint fd, short what, void *arg)
 
        debug_ip("in dispatcher callback, what: %d, fd: %d", (gint)what, fd);
 
-       switch (what) {
-       case EV_TIMEOUT:
+       if ((what & EV_TIMEOUT) != 0) {
                if (d->err_callback) {
                        err = g_error_new (G_DISPATCHER_ERROR, ETIMEDOUT, "IO timeout");
                        d->err_callback (err, d->user_data);
                }
-               break;
-       case EV_WRITE:
+       }
+       else if ((what & EV_READ) != 0) {
+               read_buffers (fd, d, FALSE);
+       }
+       else if ((what & EV_WRITE) != 0) {
                /* No data to write, disable further EV_WRITE to this fd */
                if (d->in_sendfile) {
                        sendfile_callback (d);
@@ -475,10 +477,6 @@ dispatcher_cb (gint fd, short what, void *arg)
                                write_buffers (fd, d, TRUE);
                        }
                }
-               break;
-       case EV_READ:
-               read_buffers (fd, d, FALSE);
-               break;
        }
 }
 
index 5d00505be8ef374275fe397dfb26be18fbbf40fd..c006228b4a4c1b5cef26408d9e031facf39680ca 100644 (file)
@@ -60,7 +60,8 @@ struct bayes_callback_data {
        stat_file_t                    *file;
        struct bayes_statfile_data     *statfiles;
        guint32                         statfiles_num;
-       guint64                          learned_tokens;
+       guint64                         learned_tokens;
+       gsize                           max_tokens;
 };
 
 static                          gboolean
@@ -92,6 +93,10 @@ bayes_learn_callback (gpointer key, gpointer value, gpointer data)
                cd->learned_tokens ++;
        }
 
+       if (cd->max_tokens != 0 && cd->learned_tokens > cd->max_tokens) {
+               /* Stop learning on max tokens */
+               return TRUE;
+       }
        return FALSE;
 }
 
@@ -151,6 +156,12 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
                }
        }
 
+       cd->learned_tokens ++;
+       if (cd->max_tokens != 0 && cd->learned_tokens > cd->max_tokens) {
+               /* Stop classifying on max tokens */
+               return TRUE;
+       }
+
        return FALSE;
 }
 
@@ -171,7 +182,8 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input,
 {
        struct bayes_callback_data      data;
        gchar                          *value;
-       gint                            nodes, minnodes, i = 0, cnt, best_num = 0;
+       gint                            nodes, i = 0, cnt, best_num = 0;
+       gsize                           minnodes;
        guint64                         rev, total_learns = 0;
        double                          best = 0;
        struct statfile                *st;
@@ -207,6 +219,15 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input,
        data.now = time (NULL);
        data.ctx = ctx;
 
+       data.learned_tokens = 0;
+       if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) {
+               minnodes = parse_limit (value);
+               data.max_tokens = minnodes;
+       }
+       else {
+               data.max_tokens = 0;
+       }
+
        while (cur) {
                /* Select statfile to classify */
                st = cur->data;
@@ -264,8 +285,9 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb
                                gboolean in_class, double *sum, double multiplier, GError **err)
 {
        struct bayes_callback_data      data;
-       char                           *value;
-       int                             nodes, minnodes;
+       gchar                          *value;
+       gint                            nodes;
+       gsize                           minnodes;
        struct statfile                *st, *sel_st = NULL;
        stat_file_t                    *to_learn;
        GList                          *cur;
@@ -286,7 +308,7 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb
                           bayes_error_quark(),         /* error domain */
                           1,                                           /* error code */
                           "message contains too few tokens: %d, while min is %d",
-                          nodes, minnodes);
+                          nodes, (int)minnodes);
                        return FALSE;
                }
        }
@@ -296,6 +318,14 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb
        data.now = time (NULL);
        data.ctx = ctx;
        data.learned_tokens = 0;
+       data.learned_tokens = 0;
+       if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) {
+               minnodes = parse_limit (value);
+               data.max_tokens = minnodes;
+       }
+       else {
+               data.max_tokens = 0;
+       }
        cur = ctx->cfg->statfiles;
        while (cur) {
                /* Select statfile to learn */
@@ -356,7 +386,8 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
 {
        struct bayes_callback_data      data;
        gchar                          *value;
-       gint                            nodes, minnodes;
+       gint                            nodes;
+       gsize                           minnodes;
        struct statfile                *st;
        stat_file_t                    *file;
        GList                          *cur;
@@ -375,7 +406,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
                                        bayes_error_quark(),            /* error domain */
                                        1,                                      /* error code */
                                        "message contains too few tokens: %d, while min is %d",
-                                       nodes, minnodes);
+                                       nodes, (int)minnodes);
                        return FALSE;
                }
        }
@@ -392,6 +423,15 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
        data.now = time (NULL);
        data.ctx = ctx;
 
+       data.learned_tokens = 0;
+       if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) {
+               minnodes = parse_limit (value);
+               data.max_tokens = minnodes;
+       }
+       else {
+               data.max_tokens = 0;
+       }
+
        while (cur) {
                /* Select statfiles to learn */
                st = cur->data;
index e37e830c0e931026e98dc25a123a44aa54afb7dc..f043023a2453e3bb2300660e769b3ca8e007a8e5 100644 (file)
@@ -873,6 +873,8 @@ main (gint argc, gchar **argv, gchar **env)
        /* Init classifiers options */
        register_classifier_opt ("bayes", "min_tokens");
        register_classifier_opt ("winnow", "min_tokens");
+       register_classifier_opt ("bayes", "max_tokens");
+       register_classifier_opt ("winnow", "max_tokens");
        register_classifier_opt ("winnow", "learn_threshold");
 
        /* Pre-init of cache */
index e4dfdce3f378eecaa9898398539d10ab82ead231..b919ad407ee9aa530169115896948be433a36611 100644 (file)
@@ -345,7 +345,6 @@ write_socket (void *arg)
        switch (task->state) {
        case WRITE_REPLY:
                if (!write_reply (task)) {
-                       destroy_session (task->s);
                        return FALSE;
                }
                if (ctx->is_custom) {
@@ -401,9 +400,7 @@ err_socket (GError * err, void *arg)
                fin_custom_filters (task);
        }
        g_error_free (err);
-       if (task->state != WRITE_REPLY) {
-               destroy_session (task->s);
-       }
+       destroy_session (task->s);
 }
 
 /*