From 091e84951a2b032bb2930b300ffe43eaf01a304e Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 13 Jul 2011 13:07:45 +0400 Subject: [PATCH] Add validity detector for statfiles inside classifier. Add euristic to detect spam/ham classes based on statfile symbol. --- src/cfg_file.h | 1 + src/cfg_utils.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ src/cfg_xml.c | 4 ++-- 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/src/cfg_file.h b/src/cfg_file.h index d15923639..64a46d149 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -430,6 +430,7 @@ gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const g gboolean read_xml_config (struct config_file *cfg, const gchar *filename); gboolean check_modules_config (struct config_file *cfg); void insert_classifier_symbols (struct config_file *cfg); +gboolean check_classifier_statfiles (struct classifier_config *cf); struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name); diff --git a/src/cfg_utils.c b/src/cfg_utils.c index 6bd16d620..56202d4f1 100644 --- a/src/cfg_utils.c +++ b/src/cfg_utils.c @@ -1044,6 +1044,62 @@ find_classifier_conf (struct config_file *cfg, const gchar *name) return NULL; } +gboolean +check_classifier_statfiles (struct classifier_config *cf) +{ + struct statfile *st; + gboolean has_other = FALSE, cur_class; + GList *cur; + + /* First check classes directly */ + cur = cf->statfiles; + while (cur) { + st = cur->data; + if (!has_other) { + cur_class = st->is_spam; + has_other = TRUE; + } + else { + if (cur_class != st->is_spam) { + return TRUE; + } + } + + cur = g_list_next (cur); + } + + if (!has_other) { + /* We have only one statfile */ + return FALSE; + } + /* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */ + has_other = FALSE; + cur = cf->statfiles; + while (cur) { + st = cur->data; + if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) { + st->is_spam = TRUE; + } + else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) { + st->is_spam = FALSE; + } + + if (!has_other) { + cur_class = st->is_spam; + has_other = TRUE; + } + else { + if (cur_class != st->is_spam) { + return TRUE; + } + } + + cur = g_list_next (cur); + } + + return FALSE; +} + /* * vi:ts=4 */ diff --git a/src/cfg_xml.c b/src/cfg_xml.c index 6953edb3a..bb23dd641 100644 --- a/src/cfg_xml.c +++ b/src/cfg_xml.c @@ -1790,8 +1790,8 @@ rspamd_xml_end_element (GMarkupParseContext *context, const gchar *element_name, CHECK_TAG ("classifier", FALSE); if (res) { ccf = ud->section_pointer; - if (ccf->statfiles == NULL) { - *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles"); + if (ccf->statfiles == NULL || !check_classifier_statfiles (ccf)) { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles or statfiles of the same class"); ud->state = XML_ERROR; return; } -- 2.39.5