From c60bb11945a7144681c7760163479e9fc047e2eb Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 13 Aug 2010 19:38:55 +0400 Subject: [PATCH] * Remove normalizer as it is winnow specific thing, so all statistic algorithms now returns value from 0 to 1 --- src/cfg_xml.c | 8 +------- src/classifiers/winnow.c | 22 ++++++++++++++++++---- src/controller.c | 3 --- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/cfg_xml.c b/src/cfg_xml.c index 43c4091d1..9e86997e0 100644 --- a/src/cfg_xml.c +++ b/src/cfg_xml.c @@ -1089,13 +1089,7 @@ handle_classifier_opt (struct config_file *cfg, struct rspamd_xml_userdata *ctx, gboolean handle_statfile_normalizer (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset) { - struct statfile *st = ctx->section_pointer; - - if (!parse_normalizer (cfg, st, data)) { - msg_err ("cannot parse normalizer string: %s", data); - return FALSE; - } - + msg_info ("normalizer option is now not available as rspamd always use internal normalizer for winnow (hyperbolic tanhent)"); return TRUE; } diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index f8c104a52..c86782d85 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -267,9 +267,15 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp #ifdef WITH_LUA max = call_classifier_post_callbacks (ctx->cfg, task, max); #endif - if (st->normalizer != NULL) { - max = st->normalizer (task->cfg, max, st->normalizer_data); - } +#ifdef HAVE_TANHL + max = tanhl (max); +#else + /* + * As some implementations of libm does not support tanhl, try to use + * tanh + */ + max = tanh ((double) score); +#endif sumbuf = memory_pool_alloc (task->task_pool, 32); rspamd_snprintf (sumbuf, 32, "%.2F", max); cur = g_list_prepend (NULL, sumbuf); @@ -557,7 +563,15 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym end: if (sum) { - *sum = (double)max; +#ifdef HAVE_TANHL + *sum = (double)tanhl (max); +#else + /* + * As some implementations of libm does not support tanhl, try to use + * tanh + */ + *sum = tanh ((double) score); +#endif } return TRUE; } diff --git a/src/controller.c b/src/controller.c index a79159c69..f3cdc74de 100644 --- a/src/controller.c +++ b/src/controller.c @@ -849,9 +849,6 @@ controller_read_socket (f_str_t * in, void *arg) session->worker->srv->stat->messages_learned++; maybe_write_binlog (session->learn_classifier, st, statfile, tokens); - if (st->normalizer != NULL) { - sum = st->normalizer (session->cfg, sum, st->normalizer_data); - } msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f", task->message_id, session->learn_symbol, sum); free_task (task, FALSE); -- 2.39.5