From c60bb11945a7144681c7760163479e9fc047e2eb Mon Sep 17 00:00:00 2001
From: Vsevolod Stakhov <vsevolod@rambler-co.ru>
Date: Fri, 13 Aug 2010 19:38:55 +0400
Subject: [PATCH] * Remove normalizer as it is winnow specific thing, so all
 statistic algorithms now returns value from 0 to 1

---
 src/cfg_xml.c            |  8 +-------
 src/classifiers/winnow.c | 22 ++++++++++++++++++----
 src/controller.c         |  3 ---
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/cfg_xml.c b/src/cfg_xml.c
index 43c4091d1..9e86997e0 100644
--- a/src/cfg_xml.c
+++ b/src/cfg_xml.c
@@ -1089,13 +1089,7 @@ handle_classifier_opt (struct config_file *cfg, struct rspamd_xml_userdata *ctx,
 gboolean 
 handle_statfile_normalizer (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset)
 {
-	struct statfile             *st = ctx->section_pointer;
-	
-	if (!parse_normalizer (cfg, st, data)) {
-		msg_err ("cannot parse normalizer string: %s", data);
-		return FALSE;
-	}
-	
+	msg_info ("normalizer option is now not available as rspamd always use internal normalizer for winnow (hyperbolic tanhent)");
 	return TRUE;
 }
 
diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c
index f8c104a52..c86782d85 100644
--- a/src/classifiers/winnow.c
+++ b/src/classifiers/winnow.c
@@ -267,9 +267,15 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
 #ifdef WITH_LUA
         max = call_classifier_post_callbacks (ctx->cfg, task, max);
 #endif
-		if (st->normalizer != NULL) {
-			max = st->normalizer (task->cfg, max, st->normalizer_data);
-		}
+#ifdef HAVE_TANHL
+        max = tanhl (max);
+#else
+        /*
+         * As some implementations of libm does not support tanhl, try to use
+         * tanh
+         */
+        max = tanh ((double) score);
+#endif
 		sumbuf = memory_pool_alloc (task->task_pool, 32);
 		rspamd_snprintf (sumbuf, 32, "%.2F", max);
 		cur = g_list_prepend (NULL, sumbuf);
@@ -557,7 +563,15 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym
 
 end:
 	if (sum) {
-		*sum = (double)max;
+#ifdef HAVE_TANHL
+        *sum = (double)tanhl (max);
+#else
+        /*
+         * As some implementations of libm does not support tanhl, try to use
+         * tanh
+         */
+        *sum = tanh ((double) score);
+#endif
 	}
 	return TRUE;
 }
diff --git a/src/controller.c b/src/controller.c
index a79159c69..f3cdc74de 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -849,9 +849,6 @@ controller_read_socket (f_str_t * in, void *arg)
 		session->worker->srv->stat->messages_learned++;
 
 		maybe_write_binlog (session->learn_classifier, st, statfile, tokens);
-		if (st->normalizer != NULL) {
-			sum = st->normalizer (session->cfg, sum, st->normalizer_data);
-		}
 		msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f",
 				task->message_id, session->learn_symbol, sum);
 		free_task (task, FALSE);
-- 
2.39.5