diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-08-13 19:38:55 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-08-13 19:38:55 +0400 |
commit | c60bb11945a7144681c7760163479e9fc047e2eb (patch) | |
tree | 9eaecbe8fd4ec30374ddd4ec36eb8c95e4948ccf /src | |
parent | 868842d9c133e8823365386200f5c48442a068b5 (diff) | |
download | rspamd-c60bb11945a7144681c7760163479e9fc047e2eb.tar.gz rspamd-c60bb11945a7144681c7760163479e9fc047e2eb.zip |
* Remove normalizer as it is winnow specific thing, so all statistic algorithms now returns value from 0 to 1
Diffstat (limited to 'src')
-rw-r--r-- | src/cfg_xml.c | 8 | ||||
-rw-r--r-- | src/classifiers/winnow.c | 22 | ||||
-rw-r--r-- | src/controller.c | 3 |
3 files changed, 19 insertions, 14 deletions
diff --git a/src/cfg_xml.c b/src/cfg_xml.c index 43c4091d1..9e86997e0 100644 --- a/src/cfg_xml.c +++ b/src/cfg_xml.c @@ -1089,13 +1089,7 @@ handle_classifier_opt (struct config_file *cfg, struct rspamd_xml_userdata *ctx, gboolean handle_statfile_normalizer (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset) { - struct statfile *st = ctx->section_pointer; - - if (!parse_normalizer (cfg, st, data)) { - msg_err ("cannot parse normalizer string: %s", data); - return FALSE; - } - + msg_info ("normalizer option is now not available as rspamd always use internal normalizer for winnow (hyperbolic tanhent)"); return TRUE; } diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index f8c104a52..c86782d85 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -267,9 +267,15 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp #ifdef WITH_LUA max = call_classifier_post_callbacks (ctx->cfg, task, max); #endif - if (st->normalizer != NULL) { - max = st->normalizer (task->cfg, max, st->normalizer_data); - } +#ifdef HAVE_TANHL + max = tanhl (max); +#else + /* + * As some implementations of libm does not support tanhl, try to use + * tanh + */ + max = tanh ((double) score); +#endif sumbuf = memory_pool_alloc (task->task_pool, 32); rspamd_snprintf (sumbuf, 32, "%.2F", max); cur = g_list_prepend (NULL, sumbuf); @@ -557,7 +563,15 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym end: if (sum) { - *sum = (double)max; +#ifdef HAVE_TANHL + *sum = (double)tanhl (max); +#else + /* + * As some implementations of libm does not support tanhl, try to use + * tanh + */ + *sum = tanh ((double) score); +#endif } return TRUE; } diff --git a/src/controller.c b/src/controller.c index a79159c69..f3cdc74de 100644 --- a/src/controller.c +++ b/src/controller.c @@ -849,9 +849,6 @@ controller_read_socket (f_str_t * in, void *arg) session->worker->srv->stat->messages_learned++; maybe_write_binlog (session->learn_classifier, st, statfile, tokens); - if (st->normalizer != NULL) { - sum = st->normalizer (session->cfg, sum, st->normalizer_data); - } msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f", task->message_id, session->learn_symbol, sum); free_task (task, FALSE); |