aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-08-13 19:38:55 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-08-13 19:38:55 +0400
commitc60bb11945a7144681c7760163479e9fc047e2eb (patch)
tree9eaecbe8fd4ec30374ddd4ec36eb8c95e4948ccf /src
parent868842d9c133e8823365386200f5c48442a068b5 (diff)
downloadrspamd-c60bb11945a7144681c7760163479e9fc047e2eb.tar.gz
rspamd-c60bb11945a7144681c7760163479e9fc047e2eb.zip
* Remove normalizer as it is winnow specific thing, so all statistic algorithms now returns value from 0 to 1
Diffstat (limited to 'src')
-rw-r--r--src/cfg_xml.c8
-rw-r--r--src/classifiers/winnow.c22
-rw-r--r--src/controller.c3
3 files changed, 19 insertions, 14 deletions
diff --git a/src/cfg_xml.c b/src/cfg_xml.c
index 43c4091d1..9e86997e0 100644
--- a/src/cfg_xml.c
+++ b/src/cfg_xml.c
@@ -1089,13 +1089,7 @@ handle_classifier_opt (struct config_file *cfg, struct rspamd_xml_userdata *ctx,
gboolean
handle_statfile_normalizer (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset)
{
- struct statfile *st = ctx->section_pointer;
-
- if (!parse_normalizer (cfg, st, data)) {
- msg_err ("cannot parse normalizer string: %s", data);
- return FALSE;
- }
-
+ msg_info ("normalizer option is now not available as rspamd always use internal normalizer for winnow (hyperbolic tanhent)");
return TRUE;
}
diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c
index f8c104a52..c86782d85 100644
--- a/src/classifiers/winnow.c
+++ b/src/classifiers/winnow.c
@@ -267,9 +267,15 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp
#ifdef WITH_LUA
max = call_classifier_post_callbacks (ctx->cfg, task, max);
#endif
- if (st->normalizer != NULL) {
- max = st->normalizer (task->cfg, max, st->normalizer_data);
- }
+#ifdef HAVE_TANHL
+ max = tanhl (max);
+#else
+ /*
+ * As some implementations of libm does not support tanhl, try to use
+ * tanh
+ */
+ max = tanh ((double) score);
+#endif
sumbuf = memory_pool_alloc (task->task_pool, 32);
rspamd_snprintf (sumbuf, 32, "%.2F", max);
cur = g_list_prepend (NULL, sumbuf);
@@ -557,7 +563,15 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, const char *sym
end:
if (sum) {
- *sum = (double)max;
+#ifdef HAVE_TANHL
+ *sum = (double)tanhl (max);
+#else
+ /*
+ * As some implementations of libm does not support tanhl, try to use
+ * tanh
+ */
+ *sum = tanh ((double) score);
+#endif
}
return TRUE;
}
diff --git a/src/controller.c b/src/controller.c
index a79159c69..f3cdc74de 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -849,9 +849,6 @@ controller_read_socket (f_str_t * in, void *arg)
session->worker->srv->stat->messages_learned++;
maybe_write_binlog (session->learn_classifier, st, statfile, tokens);
- if (st->normalizer != NULL) {
- sum = st->normalizer (session->cfg, sum, st->normalizer_data);
- }
msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f",
task->message_id, session->learn_symbol, sum);
free_task (task, FALSE);