summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-05-27 18:59:02 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-05-27 18:59:02 +0400
commit0dc48ea239965d05b760cb9d8e570e0d91aedb77 (patch)
treedb2d4c9b80a3408d12cb8bf4cfad57d45238abb9 /src
parentac8249b6ee746f022b0753789e6e2b46ab842abc (diff)
downloadrspamd-0dc48ea239965d05b760cb9d8e570e0d91aedb77.tar.gz
rspamd-0dc48ea239965d05b760cb9d8e570e0d91aedb77.zip
* Convert statistic sums to use long double for counters
* Use hyperbolic tangent for internal normalizer
Diffstat (limited to 'src')
-rw-r--r--src/cfg_file.h2
-rw-r--r--src/cfg_utils.c19
-rw-r--r--src/classifiers/classifiers.h2
-rw-r--r--src/classifiers/winnow.c40
-rw-r--r--src/controller.c2
-rw-r--r--src/lua/lua_common.c4
-rw-r--r--src/lua/lua_common.h2
-rw-r--r--src/util.c78
8 files changed, 99 insertions, 50 deletions
diff --git a/src/cfg_file.h b/src/cfg_file.h
index baf65c377..dec843359 100644
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -161,7 +161,7 @@ struct statfile_binlog_params {
uint16_t master_port;
};
-typedef double (*statfile_normalize_func)(struct config_file *cfg, double score, void *params);
+typedef double (*statfile_normalize_func)(struct config_file *cfg, long double score, void *params);
/**
* Statfile config definition
diff --git a/src/cfg_utils.c b/src/cfg_utils.c
index f72bf51a3..d63e6bc93 100644
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -719,25 +719,16 @@ check_worker_conf (struct config_file *cfg, struct worker_conf *c)
}
static double
-internal_normalizer_func (struct config_file *cfg, double score, void *data)
+internal_normalizer_func (struct config_file *cfg, long double score, void *data)
{
- double max = *(double *)data;
+ long double max = *(double *)data;
if (score < 0) {
return score;
}
- else if (score > 0.001 && score < 1) {
- return 1;
- }
- else if (score > 1 && score < max / 2.) {
- return MIN(max, score * score);
- }
- else if (score < max) {
- return score;
- }
- else if (score > max) {
- return max;
- }
+ else {
+ return max * tanhl (score / max);
+ }
return score;
}
diff --git a/src/classifiers/classifiers.h b/src/classifiers/classifiers.h
index de937bc3f..02192d795 100644
--- a/src/classifiers/classifiers.h
+++ b/src/classifiers/classifiers.h
@@ -17,7 +17,7 @@ struct classifier_ctx {
struct classify_weight {
const char *name;
- double weight;
+ long double weight;
};
/* Common classifier structure */
diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c
index 637be759d..1d48cc2ba 100644
--- a/src/classifiers/winnow.c
+++ b/src/classifiers/winnow.c
@@ -51,7 +51,7 @@ struct winnow_callback_data {
struct classifier_ctx *ctx;
stat_file_t *file;
stat_file_t *learn_file;
- double sum;
+ long double sum;
double multiplier;
int count;
gboolean in_class;
@@ -71,12 +71,7 @@ classify_callback (gpointer key, gpointer value, gpointer data)
/* Consider that not found blocks have value 1 */
v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now);
if (fabs (v) > ALPHA) {
- if (cd->sum + v > MAX_WEIGHT) {
- cd->sum = MAX_WEIGHT;
- }
- else {
- cd->sum += v;
- }
+ cd->sum += v;
cd->in_class++;
}
@@ -160,12 +155,7 @@ learn_callback (gpointer key, gpointer value, gpointer data)
}
- if (cd->sum + node->value > MAX_WEIGHT) {
- cd->sum = MAX_WEIGHT;
- }
- else {
- cd->sum += node->value;
- }
+ cd->sum += node->value;
cd->count++;
@@ -188,7 +178,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
{
struct winnow_callback_data data;
char *sumbuf, *value;
- double res = 0., max = 0.;
+ long double res = 0., max = 0.;
GList *cur;
struct statfile *st, *sel = NULL;
int nodes, minnodes;
@@ -258,7 +248,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
if (sel != NULL) {
sumbuf = memory_pool_alloc (task->task_pool, 32);
- snprintf (sumbuf, 32, "%.2f", max);
+ snprintf (sumbuf, 32, "%.2Lg", max);
cur = g_list_prepend (NULL, sumbuf);
#ifdef WITH_LUA
max = call_classifier_post_callbacks (ctx->cfg, task, max);
@@ -271,7 +261,7 @@ GList *
winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * input, struct worker_task *task)
{
struct winnow_callback_data data;
- double res = 0.;
+ long double res = 0.;
GList *cur, *resl = NULL;
struct statfile *st;
struct classify_weight *w;
@@ -346,7 +336,7 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, stat_file_t *fi
int nodes, minnodes, iterations = 0;
struct statfile *st;
stat_file_t *sel;
- double res = 0., max = 0.;
+ long double res = 0., max = 0.;
GList *cur;
g_assert (pool != NULL);
@@ -407,12 +397,16 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, stat_file_t *fi
}
} while ((in_class ? sel != file : sel == file) && iterations ++ < MAX_LEARN_ITERATIONS);
+ if (iterations >= MAX_LEARN_ITERATIONS) {
+ msg_warn ("learning statfile %s was not fully successfull: iterations count is limited to %d, final sum is %G",
+ file->filename, MAX_LEARN_ITERATIONS, max);
+ }
+ else {
+ msg_info ("learned statfile %s successfully with %d iterations and sum %G", file->filename, iterations, max);
+ }
+
+
if (sum) {
- if (data.count != 0) {
- *sum = data.sum / data.count;
- }
- else {
- *sum = 0;
- }
+ *sum = max;
}
}
diff --git a/src/controller.c b/src/controller.c
index d3c5e9e70..236c3eca9 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -849,7 +849,7 @@ controller_read_socket (f_str_t * in, void *arg)
while (cur) {
w = cur->data;
- i += snprintf (out_buf + i, sizeof (out_buf) - i, "%s: %.2f" CRLF, w->name, w->weight);
+ i += snprintf (out_buf + i, sizeof (out_buf) - i, "%s: %.2Lg" CRLF, w->name, w->weight);
cur = g_list_next (cur);
}
if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) {
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c
index fc5fe0772..d1a2b614b 100644
--- a/src/lua/lua_common.c
+++ b/src/lua/lua_common.c
@@ -415,10 +415,10 @@ lua_consolidation_func (struct worker_task *task, const char *metric_name, const
}
double
-lua_normalizer_func (struct config_file *cfg, double score, void *params)
+lua_normalizer_func (struct config_file *cfg, long double score, void *params)
{
GList *p = params;
- double res = score;
+ long double res = score;
lua_State *L = cfg->lua_state;
/* Call specified function and put input score on stack */
diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h
index f89ccaa30..ffed03e58 100644
--- a/src/lua/lua_common.h
+++ b/src/lua/lua_common.h
@@ -44,7 +44,7 @@ void add_luabuf (const char *line);
GList *call_classifier_pre_callbacks (struct classifier_config *ccf, struct worker_task *task);
double call_classifier_post_callbacks (struct classifier_config *ccf, struct worker_task *task, double in);
-double lua_normalizer_func (struct config_file *cfg, double score, void *params);
+double lua_normalizer_func (struct config_file *cfg, long double score, void *params);
/* Config file functions */
void lua_post_load_config (struct config_file *cfg);
diff --git a/src/util.c b/src/util.c
index c093ccc36..bf0a491f3 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1034,7 +1034,10 @@ get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf,
* %[0][width|m][u][x|X]i int/ngx_int_t
* %[0][width][u][x|X]D int32_t/uint32_t
* %[0][width][u][x|X]L int64_t/uint64_t
- * %[0][width][.width]f float
+ * %[0][width][.width]f double
+ * %[0][width][.width]F long double
+ * %[0][width][.width]g double
+ * %[0][width][.width]G long double
* %P pid_t
* %r rlim_t
* %p void *
@@ -1082,7 +1085,7 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
{
u_char *p, zero, *last;
int d;
- float f, scale;
+ long double f, scale;
size_t len, slen;
int64_t i64;
uint64_t ui64;
@@ -1144,7 +1147,6 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
sign = 0;
fmt++;
continue;
-
case '.':
fmt++;
@@ -1258,7 +1260,43 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
case 'f':
- f = (float) va_arg (args, double);
+ f = (double) va_arg (args, double);
+ if (f < 0) {
+ *buf++ = '-';
+ f = -f;
+ }
+
+ ui64 = (int64_t) f;
+
+ buf = rspamd_sprintf_num (buf, last, ui64, zero, 0, width);
+
+ if (frac_width) {
+
+ if (buf < last) {
+ *buf++ = '.';
+ }
+
+ scale = 1.0;
+
+ for (i = 0; i < frac_width; i++) {
+ scale *= 10.0;
+ }
+
+ /*
+ * (int64_t) cast is required for msvc6:
+ * it can not convert uint64_t to double
+ */
+ ui64 = (uint64_t) ((f - (int64_t) ui64) * scale);
+
+ buf = rspamd_sprintf_num (buf, last, ui64, '0', 0, frac_width);
+ }
+
+ fmt++;
+
+ continue;
+
+ case 'F':
+ f = (long double) va_arg (args, long double);
if (f < 0) {
*buf++ = '-';
@@ -1282,9 +1320,9 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
}
/*
- * (int64_t) cast is required for msvc6:
- * it can not convert uint64_t to double
- */
+ * (int64_t) cast is required for msvc6:
+ * it can not convert uint64_t to double
+ */
ui64 = (uint64_t) ((f - (int64_t) ui64) * scale);
buf = rspamd_sprintf_num (buf, last, ui64, '0', 0, frac_width);
@@ -1294,6 +1332,32 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
continue;
+ case 'g':
+ f = (long double) va_arg (args, double);
+
+ if (f < 0) {
+ *buf++ = '-';
+ f = -f;
+ }
+ g_ascii_formatd (buf, last - buf, "%g", (double)f);
+ buf += strlen (buf);
+ fmt++;
+
+ continue;
+
+ case 'G':
+ f = (long double) va_arg (args, long double);
+
+ if (f < 0) {
+ *buf++ = '-';
+ f = -f;
+ }
+ g_ascii_formatd (buf, last - buf, "%g", (double)f);
+ buf += strlen (buf);
+ fmt++;
+
+ continue;
+
case 'p':
ui64 = (uintptr_t) va_arg (args, void *);
hex = 2;