aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-26 16:57:36 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-26 16:57:36 +0400
commit6eded20b2c15e524ed9a83c436a3b5f0bfbd253c (patch)
treeb5a5d7bf336a7eaf965d72554c8b51028f9eb0d1
parent8e4282bb260132232c26f3f87f48181044ea6cc3 (diff)
downloadrspamd-6eded20b2c15e524ed9a83c436a3b5f0bfbd253c.tar.gz
rspamd-6eded20b2c15e524ed9a83c436a3b5f0bfbd253c.zip
* Add max_tokens options to avoid classifying and learning with too much tokens from one message.
Fix stupid memory leakage on client's timeout.
-rw-r--r--src/buffer.c14
-rw-r--r--src/classifiers/bayes.c54
-rw-r--r--src/main.c2
-rw-r--r--src/worker.c5
4 files changed, 56 insertions, 19 deletions
diff --git a/src/buffer.c b/src/buffer.c
index 8048dc13b..4f9ef304e 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -446,14 +446,16 @@ dispatcher_cb (gint fd, short what, void *arg)
debug_ip("in dispatcher callback, what: %d, fd: %d", (gint)what, fd);
- switch (what) {
- case EV_TIMEOUT:
+ if ((what & EV_TIMEOUT) != 0) {
if (d->err_callback) {
err = g_error_new (G_DISPATCHER_ERROR, ETIMEDOUT, "IO timeout");
d->err_callback (err, d->user_data);
}
- break;
- case EV_WRITE:
+ }
+ else if ((what & EV_READ) != 0) {
+ read_buffers (fd, d, FALSE);
+ }
+ else if ((what & EV_WRITE) != 0) {
/* No data to write, disable further EV_WRITE to this fd */
if (d->in_sendfile) {
sendfile_callback (d);
@@ -475,10 +477,6 @@ dispatcher_cb (gint fd, short what, void *arg)
write_buffers (fd, d, TRUE);
}
}
- break;
- case EV_READ:
- read_buffers (fd, d, FALSE);
- break;
}
}
diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c
index 5d00505be..c006228b4 100644
--- a/src/classifiers/bayes.c
+++ b/src/classifiers/bayes.c
@@ -60,7 +60,8 @@ struct bayes_callback_data {
stat_file_t *file;
struct bayes_statfile_data *statfiles;
guint32 statfiles_num;
- guint64 learned_tokens;
+ guint64 learned_tokens;
+ gsize max_tokens;
};
static gboolean
@@ -92,6 +93,10 @@ bayes_learn_callback (gpointer key, gpointer value, gpointer data)
cd->learned_tokens ++;
}
+ if (cd->max_tokens != 0 && cd->learned_tokens > cd->max_tokens) {
+ /* Stop learning on max tokens */
+ return TRUE;
+ }
return FALSE;
}
@@ -151,6 +156,12 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
}
}
+ cd->learned_tokens ++;
+ if (cd->max_tokens != 0 && cd->learned_tokens > cd->max_tokens) {
+ /* Stop classifying on max tokens */
+ return TRUE;
+ }
+
return FALSE;
}
@@ -171,7 +182,8 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input,
{
struct bayes_callback_data data;
gchar *value;
- gint nodes, minnodes, i = 0, cnt, best_num = 0;
+ gint nodes, i = 0, cnt, best_num = 0;
+ gsize minnodes;
guint64 rev, total_learns = 0;
double best = 0;
struct statfile *st;
@@ -207,6 +219,15 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input,
data.now = time (NULL);
data.ctx = ctx;
+ data.learned_tokens = 0;
+ if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) {
+ minnodes = parse_limit (value);
+ data.max_tokens = minnodes;
+ }
+ else {
+ data.max_tokens = 0;
+ }
+
while (cur) {
/* Select statfile to classify */
st = cur->data;
@@ -264,8 +285,9 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb
gboolean in_class, double *sum, double multiplier, GError **err)
{
struct bayes_callback_data data;
- char *value;
- int nodes, minnodes;
+ gchar *value;
+ gint nodes;
+ gsize minnodes;
struct statfile *st, *sel_st = NULL;
stat_file_t *to_learn;
GList *cur;
@@ -286,7 +308,7 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb
bayes_error_quark(), /* error domain */
1, /* error code */
"message contains too few tokens: %d, while min is %d",
- nodes, minnodes);
+ nodes, (int)minnodes);
return FALSE;
}
}
@@ -296,6 +318,14 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb
data.now = time (NULL);
data.ctx = ctx;
data.learned_tokens = 0;
+ data.learned_tokens = 0;
+ if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) {
+ minnodes = parse_limit (value);
+ data.max_tokens = minnodes;
+ }
+ else {
+ data.max_tokens = 0;
+ }
cur = ctx->cfg->statfiles;
while (cur) {
/* Select statfile to learn */
@@ -356,7 +386,8 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
{
struct bayes_callback_data data;
gchar *value;
- gint nodes, minnodes;
+ gint nodes;
+ gsize minnodes;
struct statfile *st;
stat_file_t *file;
GList *cur;
@@ -375,7 +406,7 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
bayes_error_quark(), /* error domain */
1, /* error code */
"message contains too few tokens: %d, while min is %d",
- nodes, minnodes);
+ nodes, (int)minnodes);
return FALSE;
}
}
@@ -392,6 +423,15 @@ bayes_learn_spam (struct classifier_ctx* ctx, statfile_pool_t *pool,
data.now = time (NULL);
data.ctx = ctx;
+ data.learned_tokens = 0;
+ if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "max_tokens")) != NULL) {
+ minnodes = parse_limit (value);
+ data.max_tokens = minnodes;
+ }
+ else {
+ data.max_tokens = 0;
+ }
+
while (cur) {
/* Select statfiles to learn */
st = cur->data;
diff --git a/src/main.c b/src/main.c
index e37e830c0..f043023a2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -873,6 +873,8 @@ main (gint argc, gchar **argv, gchar **env)
/* Init classifiers options */
register_classifier_opt ("bayes", "min_tokens");
register_classifier_opt ("winnow", "min_tokens");
+ register_classifier_opt ("bayes", "max_tokens");
+ register_classifier_opt ("winnow", "max_tokens");
register_classifier_opt ("winnow", "learn_threshold");
/* Pre-init of cache */
diff --git a/src/worker.c b/src/worker.c
index e4dfdce3f..b919ad407 100644
--- a/src/worker.c
+++ b/src/worker.c
@@ -345,7 +345,6 @@ write_socket (void *arg)
switch (task->state) {
case WRITE_REPLY:
if (!write_reply (task)) {
- destroy_session (task->s);
return FALSE;
}
if (ctx->is_custom) {
@@ -401,9 +400,7 @@ err_socket (GError * err, void *arg)
fin_custom_filters (task);
}
g_error_free (err);
- if (task->state != WRITE_REPLY) {
- destroy_session (task->s);
- }
+ destroy_session (task->s);
}
/*