]> source.dussan.org Git - rspamd.git/commitdiff
* Convert statistic sums to use long double for counters
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 27 May 2010 14:59:02 +0000 (18:59 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 27 May 2010 14:59:02 +0000 (18:59 +0400)
* Use hyperbolic tangent for internal normalizer

CMakeLists.txt
src/cfg_file.h
src/cfg_utils.c
src/classifiers/classifiers.h
src/classifiers/winnow.c
src/controller.c
src/lua/lua_common.c
src/lua/lua_common.h
src/util.c

index e8896ac0090804d261a3f2244fd3eb6a479ff20f..ef6e02b577ce8540a91030d26dd7361d9dc06eb2 100644 (file)
@@ -18,7 +18,6 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
 
 OPTION(DEBUG_MODE          "Enable debug output [default: ON]"                  ON)
 OPTION(ENABLE_OPTIMIZATION "Enable optimization [default: OFF]"                 OFF)
-OPTION(ENABLE_PERL         "Enable perl support [default: OFF]"                 OFF)
 OPTION(SKIP_RELINK_RPATH   "Skip relinking and full RPATH for the install tree" OFF)
 OPTION(ENABLE_REDIRECTOR   "Enable redirector install [default: OFF]"           OFF)
 OPTION(ENABLE_PROFILING    "Enable profiling [default: OFF]"                    OFF)
index baf65c3773a86a556cf108f0beae8ecb232e1da4..dec843359ec086d8ffa4cb5865f4ae832c74edb0 100644 (file)
@@ -161,7 +161,7 @@ struct statfile_binlog_params {
        uint16_t master_port;
 };
 
-typedef double (*statfile_normalize_func)(struct config_file *cfg, double score, void *params);
+typedef double (*statfile_normalize_func)(struct config_file *cfg, long double score, void *params);
 
 /**
  * Statfile config definition
index f72bf51a36a170ed6226d1fd8d7f1af1087bcfac..d63e6bc93e48bf893f963298fed33bacdfaa65c5 100644 (file)
@@ -719,25 +719,16 @@ check_worker_conf (struct config_file *cfg, struct worker_conf *c)
 }
 
 static double
-internal_normalizer_func (struct config_file *cfg, double score, void *data)
+internal_normalizer_func (struct config_file *cfg, long double score, void *data)
 {
-    double max = *(double *)data;
+    long double max = *(double *)data;
 
     if (score < 0) {
         return score;
     }
-    else if (score > 0.001 && score < 1) {
-        return 1;
-    }
-    else if (score > 1 && score < max / 2.) {
-        return MIN(max, score * score);
-    }
-    else if (score < max) {
-        return score;
-    }
-    else if (score > max) {
-        return max;
-    }
+       else {
+               return max * tanhl (score / max);
+       }
 
     return score;
 }
index de937bc3f36fbc648ca26482a0e619287afdcb74..02192d79550e5756ac01554d1ea215564e0a98c7 100644 (file)
@@ -17,7 +17,7 @@ struct classifier_ctx {
 
 struct classify_weight {
        const char *name;
-       double weight;
+       long double weight;
 };
 
 /* Common classifier structure */
index 637be759d01e96f1ddb0f84d0bb3f4beb5ba118f..1d48cc2ba0a282171c20b557738364e8f2aff3c8 100644 (file)
@@ -51,7 +51,7 @@ struct winnow_callback_data {
        struct classifier_ctx          *ctx;
        stat_file_t                    *file;
        stat_file_t                    *learn_file;
-       double                          sum;
+       long double                     sum;
        double                          multiplier;
        int                             count;
        gboolean                        in_class;
@@ -71,12 +71,7 @@ classify_callback (gpointer key, gpointer value, gpointer data)
        /* Consider that not found blocks have value 1 */
        v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now);
        if (fabs (v) > ALPHA) {
-        if (cd->sum + v > MAX_WEIGHT) {
-            cd->sum = MAX_WEIGHT;
-        }
-        else {
-                   cd->sum += v;
-        }
+               cd->sum += v;
                cd->in_class++;
        }
 
@@ -160,12 +155,7 @@ learn_callback (gpointer key, gpointer value, gpointer data)
        }
 
 
-    if (cd->sum + node->value > MAX_WEIGHT) {
-        cd->sum = MAX_WEIGHT;
-    }
-    else {
-           cd->sum += node->value;
-    }
+       cd->sum += node->value;
 
        cd->count++;
 
@@ -188,7 +178,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
 {
        struct winnow_callback_data     data;
        char                           *sumbuf, *value;
-       double                          res = 0., max = 0.;
+       long double                     res = 0., max = 0.;
        GList                          *cur;
        struct statfile                *st, *sel = NULL;
        int                             nodes, minnodes;
@@ -258,7 +248,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
 
        if (sel != NULL) {
                sumbuf = memory_pool_alloc (task->task_pool, 32);
-               snprintf (sumbuf, 32, "%.2f", max);
+               snprintf (sumbuf, 32, "%.2Lg", max);
                cur = g_list_prepend (NULL, sumbuf);
 #ifdef WITH_LUA
         max = call_classifier_post_callbacks (ctx->cfg, task, max);
@@ -271,7 +261,7 @@ GList *
 winnow_weights (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * input, struct worker_task *task)
 {
        struct winnow_callback_data     data;
-       double                          res = 0.;
+       long double                     res = 0.;
        GList                          *cur, *resl = NULL;
        struct statfile                *st;
        struct classify_weight         *w;
@@ -346,7 +336,7 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, stat_file_t *fi
        int                             nodes, minnodes, iterations = 0;
        struct statfile                *st;
        stat_file_t                    *sel;
-       double                          res = 0., max = 0.;
+       long double                     res = 0., max = 0.;
        GList                          *cur;
 
        g_assert (pool != NULL);
@@ -407,12 +397,16 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, stat_file_t *fi
                }
        } while ((in_class ? sel != file : sel == file)  && iterations ++ < MAX_LEARN_ITERATIONS);
        
+       if (iterations >= MAX_LEARN_ITERATIONS) {
+               msg_warn ("learning statfile %s  was not fully successfull: iterations count is limited to %d, final sum is %G", 
+                               file->filename, MAX_LEARN_ITERATIONS, max);
+       }
+       else {
+               msg_info ("learned statfile %s successfully with %d iterations and sum %G", file->filename, iterations, max);
+       }
+
+
        if (sum) {
-               if (data.count != 0) {
-                       *sum = data.sum / data.count;
-               }
-               else {
-                       *sum = 0;
-               }
+               *sum = max;
        }
 }
index d3c5e9e70c25c714cace4505af26a8397fce81a1..236c3eca9dbefe34e69ccbe66c1cf48b51ac2734 100644 (file)
@@ -849,7 +849,7 @@ controller_read_socket (f_str_t * in, void *arg)
 
                while (cur) {
                        w = cur->data;
-                       i += snprintf (out_buf + i, sizeof (out_buf) - i, "%s: %.2f" CRLF, w->name, w->weight);
+                       i += snprintf (out_buf + i, sizeof (out_buf) - i, "%s: %.2Lg" CRLF, w->name, w->weight);
                        cur = g_list_next (cur);
                }
                if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) {
index fc5fe077269fb1648af415574be15c6735259cbd..d1a2b614bf64a58c2b8de7a3c71a5c01d5eaf146 100644 (file)
@@ -415,10 +415,10 @@ lua_consolidation_func (struct worker_task *task, const char *metric_name, const
 }
 
 double 
-lua_normalizer_func (struct config_file *cfg, double score, void *params)
+lua_normalizer_func (struct config_file *cfg, long double score, void *params)
 {
     GList                          *p = params;
-    double                          res = score;
+    long double                          res = score;
        lua_State                      *L = cfg->lua_state;
 
     /* Call specified function and put input score on stack */
index f89ccaa30ff0b67842e9e962fd8d4dd1ba11a9fa..ffed03e58c9ecdbc409018ee915b9ab387fd4fa4 100644 (file)
@@ -44,7 +44,7 @@ void add_luabuf (const char *line);
 GList *call_classifier_pre_callbacks (struct classifier_config *ccf, struct worker_task *task);
 double call_classifier_post_callbacks (struct classifier_config *ccf, struct worker_task *task, double in);
 
-double lua_normalizer_func (struct config_file *cfg, double score, void *params);
+double lua_normalizer_func (struct config_file *cfg, long double score, void *params);
 
 /* Config file functions */
 void lua_post_load_config (struct config_file *cfg);
index c093ccc362a4bd629664c444ac4863ff50920655..bf0a491f3028c6865b99df61db288155d1138521 100644 (file)
@@ -1034,7 +1034,10 @@ get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf,
  *     %[0][width|m][u][x|X]i    int/ngx_int_t
  *     %[0][width][u][x|X]D      int32_t/uint32_t
  *     %[0][width][u][x|X]L      int64_t/uint64_t
- *     %[0][width][.width]f      float
+ *     %[0][width][.width]f      double
+ *     %[0][width][.width]F      long double
+ *     %[0][width][.width]g      double
+ *     %[0][width][.width]G      long double
  *     %P                                              pid_t
  *     %r                                              rlim_t
  *     %p                                              void *
@@ -1082,7 +1085,7 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
 {
        u_char             *p, zero, *last;
        int                 d;
-       float               f, scale;
+       long double         f, scale;
        size_t              len, slen;
        int64_t                         i64;
        uint64_t                        ui64;
@@ -1144,7 +1147,6 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
                                        sign = 0;
                                        fmt++;
                                        continue;
-
                                case '.':
                                        fmt++;
 
@@ -1258,7 +1260,43 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
 
 
                        case 'f':
-                               f = (float) va_arg (args, double);
+                               f = (double) va_arg (args, double);
+                               if (f < 0) {
+                                       *buf++ = '-';
+                                       f = -f;
+                               }
+                               
+                               ui64 = (int64_t) f;
+
+                               buf = rspamd_sprintf_num (buf, last, ui64, zero, 0, width);
+
+                               if (frac_width) {
+
+                                       if (buf < last) {
+                                               *buf++ = '.';
+                                       }
+
+                                       scale = 1.0;
+
+                                       for (i = 0; i < frac_width; i++) {
+                                               scale *= 10.0;
+                                       }
+
+                                       /*
+                                       * (int64_t) cast is required for msvc6:
+                                       * it can not convert uint64_t to double
+                                       */
+                                       ui64 = (uint64_t) ((f - (int64_t) ui64) * scale);
+
+                                       buf = rspamd_sprintf_num (buf, last, ui64, '0', 0, frac_width);
+                               }
+
+                               fmt++;
+
+                               continue;
+
+                       case 'F':
+                               f = (long double) va_arg (args, long double);
 
                                if (f < 0) {
                                        *buf++ = '-';
@@ -1282,9 +1320,9 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
                                        }
 
                                        /*
-                                        * (int64_t) cast is required for msvc6:
-                                        * it can not convert uint64_t to double
-                                        */
+                                       * (int64_t) cast is required for msvc6:
+                                       * it can not convert uint64_t to double
+                                       */
                                        ui64 = (uint64_t) ((f - (int64_t) ui64) * scale);
 
                                        buf = rspamd_sprintf_num (buf, last, ui64, '0', 0, frac_width);
@@ -1294,6 +1332,32 @@ rspamd_vsnprintf (u_char *buf, size_t max, const char *fmt, va_list args)
 
                                continue;
 
+                       case 'g':
+                               f = (long double) va_arg (args, double);
+
+                               if (f < 0) {
+                                       *buf++ = '-';
+                                       f = -f;
+                               }
+                               g_ascii_formatd (buf, last - buf, "%g", (double)f);
+                               buf += strlen (buf);
+                               fmt++;
+
+                               continue;
+
+                       case 'G':
+                               f = (long double) va_arg (args, long double);
+
+                               if (f < 0) {
+                                       *buf++ = '-';
+                                       f = -f;
+                               }
+                               g_ascii_formatd (buf, last - buf, "%g", (double)f);
+                               buf += strlen (buf);
+                               fmt++;
+
+                               continue;
+
                        case 'p':
                                ui64 = (uintptr_t) va_arg (args, void *);
                                hex = 2;