aboutsummaryrefslogtreecommitdiffstats
path: root/src/classifiers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-05-27 18:59:02 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-05-27 18:59:02 +0400
commit0dc48ea239965d05b760cb9d8e570e0d91aedb77 (patch)
treedb2d4c9b80a3408d12cb8bf4cfad57d45238abb9 /src/classifiers
parentac8249b6ee746f022b0753789e6e2b46ab842abc (diff)
downloadrspamd-0dc48ea239965d05b760cb9d8e570e0d91aedb77.tar.gz
rspamd-0dc48ea239965d05b760cb9d8e570e0d91aedb77.zip
* Convert statistic sums to use long double for counters
* Use hyperbolic tangent for internal normalizer
Diffstat (limited to 'src/classifiers')
-rw-r--r--src/classifiers/classifiers.h2
-rw-r--r--src/classifiers/winnow.c40
2 files changed, 18 insertions, 24 deletions
diff --git a/src/classifiers/classifiers.h b/src/classifiers/classifiers.h
index de937bc3f..02192d795 100644
--- a/src/classifiers/classifiers.h
+++ b/src/classifiers/classifiers.h
@@ -17,7 +17,7 @@ struct classifier_ctx {
struct classify_weight {
const char *name;
- double weight;
+ long double weight;
};
/* Common classifier structure */
diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c
index 637be759d..1d48cc2ba 100644
--- a/src/classifiers/winnow.c
+++ b/src/classifiers/winnow.c
@@ -51,7 +51,7 @@ struct winnow_callback_data {
struct classifier_ctx *ctx;
stat_file_t *file;
stat_file_t *learn_file;
- double sum;
+ long double sum;
double multiplier;
int count;
gboolean in_class;
@@ -71,12 +71,7 @@ classify_callback (gpointer key, gpointer value, gpointer data)
/* Consider that not found blocks have value 1 */
v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now);
if (fabs (v) > ALPHA) {
- if (cd->sum + v > MAX_WEIGHT) {
- cd->sum = MAX_WEIGHT;
- }
- else {
- cd->sum += v;
- }
+ cd->sum += v;
cd->in_class++;
}
@@ -160,12 +155,7 @@ learn_callback (gpointer key, gpointer value, gpointer data)
}
- if (cd->sum + node->value > MAX_WEIGHT) {
- cd->sum = MAX_WEIGHT;
- }
- else {
- cd->sum += node->value;
- }
+ cd->sum += node->value;
cd->count++;
@@ -188,7 +178,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
{
struct winnow_callback_data data;
char *sumbuf, *value;
- double res = 0., max = 0.;
+ long double res = 0., max = 0.;
GList *cur;
struct statfile *st, *sel = NULL;
int nodes, minnodes;
@@ -258,7 +248,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
if (sel != NULL) {
sumbuf = memory_pool_alloc (task->task_pool, 32);
- snprintf (sumbuf, 32, "%.2f", max);
+ snprintf (sumbuf, 32, "%.2Lg", max);
cur = g_list_prepend (NULL, sumbuf);
#ifdef WITH_LUA
max = call_classifier_post_callbacks (ctx->cfg, task, max);
@@ -271,7 +261,7 @@ GList *
winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * input, struct worker_task *task)
{
struct winnow_callback_data data;
- double res = 0.;
+ long double res = 0.;
GList *cur, *resl = NULL;
struct statfile *st;
struct classify_weight *w;
@@ -346,7 +336,7 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, stat_file_t *fi
int nodes, minnodes, iterations = 0;
struct statfile *st;
stat_file_t *sel;
- double res = 0., max = 0.;
+ long double res = 0., max = 0.;
GList *cur;
g_assert (pool != NULL);
@@ -407,12 +397,16 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, stat_file_t *fi
}
} while ((in_class ? sel != file : sel == file) && iterations ++ < MAX_LEARN_ITERATIONS);
+ if (iterations >= MAX_LEARN_ITERATIONS) {
+ msg_warn ("learning statfile %s was not fully successfull: iterations count is limited to %d, final sum is %G",
+ file->filename, MAX_LEARN_ITERATIONS, max);
+ }
+ else {
+ msg_info ("learned statfile %s successfully with %d iterations and sum %G", file->filename, iterations, max);
+ }
+
+
if (sum) {
- if (data.count != 0) {
- *sum = data.sum / data.count;
- }
- else {
- *sum = 0;
- }
+ *sum = max;
}
}