diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-01-25 21:31:52 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-01-25 21:31:52 +0300 |
commit | a28536ff4d1bc30392e185f48e61d3cf858ef7b2 (patch) | |
tree | 1543ef4c0c6f6790859132d8d316b3b83f2d01f8 /src/classifiers | |
parent | 76b69f300d8372969b6143e3e269376229d03edf (diff) | |
download | rspamd-a28536ff4d1bc30392e185f48e61d3cf858ef7b2.tar.gz rspamd-a28536ff4d1bc30392e185f48e61d3cf858ef7b2.zip |
Fixes in classifying for small messages.
Diffstat (limited to 'src/classifiers')
-rw-r--r-- | src/classifiers/bayes.c | 15 | ||||
-rw-r--r-- | src/classifiers/winnow.c | 15 |
2 files changed, 24 insertions, 6 deletions
diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c index 64783e0b4..9ef2544b0 100644 --- a/src/classifiers/bayes.c +++ b/src/classifiers/bayes.c @@ -178,7 +178,10 @@ bayes_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); - nodes = g_tree_nnodes (input) / FEATURE_WINDOW_SIZE; + nodes = g_tree_nnodes (input); + if (nodes > FEATURE_WINDOW_SIZE) { + nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; + } if (nodes < minnodes) { return FALSE; } @@ -250,7 +253,10 @@ bayes_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, const char *symb if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); - nodes = g_tree_nnodes (input) / FEATURE_WINDOW_SIZE; + nodes = g_tree_nnodes (input); + if (nodes > FEATURE_WINDOW_SIZE) { + nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; + } if (nodes < minnodes) { msg_info ("do not learn message as it has too few tokens: %d, while %d min", nodes, minnodes); *sum = 0; @@ -332,7 +338,10 @@ bayes_weights (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); - nodes = g_tree_nnodes (input) / FEATURE_WINDOW_SIZE; + nodes = g_tree_nnodes (input); + if (nodes > FEATURE_WINDOW_SIZE) { + nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; + } if (nodes < minnodes) { return NULL; } diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index 24ee7821c..2e8b98423 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -213,7 +213,10 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); - nodes = g_tree_nnodes (input) / FEATURE_WINDOW_SIZE; + nodes = g_tree_nnodes (input); + if (nodes > FEATURE_WINDOW_SIZE) { + nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; + } if (nodes < minnodes) { msg_info ("do not classify message as it has too few tokens: %d, while %d min", nodes, minnodes); return FALSE; @@ -305,7 +308,10 @@ winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inpu if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); - nodes = g_tree_nnodes (input) / FEATURE_WINDOW_SIZE; + nodes = g_tree_nnodes (input); + if (nodes > FEATURE_WINDOW_SIZE) { + nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; + } if (nodes < minnodes) { msg_info ("do not classify message as it has too few tokens: %d, while %d min", nodes, minnodes); return NULL; @@ -379,7 +385,10 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym if (ctx->cfg->opts && (value = g_hash_table_lookup (ctx->cfg->opts, "min_tokens")) != NULL) { minnodes = strtol (value, NULL, 10); - nodes = g_tree_nnodes (input) / FEATURE_WINDOW_SIZE; + nodes = g_tree_nnodes (input); + if (nodes > FEATURE_WINDOW_SIZE) { + nodes = nodes / FEATURE_WINDOW_SIZE + FEATURE_WINDOW_SIZE; + } if (nodes < minnodes) { msg_info ("do not learn message as it has too few tokens: %d, while %d min", nodes, minnodes); if (sum != NULL) { |