diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-07-13 13:07:45 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-07-13 13:07:45 +0400 |
commit | 091e84951a2b032bb2930b300ffe43eaf01a304e (patch) | |
tree | 5d5992bb7de97fdb20c04bda9fff013fbe134c82 | |
parent | c6d62c095bc27aebd6c5f8dc9716467ae147fb68 (diff) | |
download | rspamd-091e84951a2b032bb2930b300ffe43eaf01a304e.tar.gz rspamd-091e84951a2b032bb2930b300ffe43eaf01a304e.zip |
Add validity detector for statfiles inside classifier.
Add euristic to detect spam/ham classes based on statfile symbol.
-rw-r--r-- | src/cfg_file.h | 1 | ||||
-rw-r--r-- | src/cfg_utils.c | 56 | ||||
-rw-r--r-- | src/cfg_xml.c | 4 |
3 files changed, 59 insertions, 2 deletions
diff --git a/src/cfg_file.h b/src/cfg_file.h index d15923639..64a46d149 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -430,6 +430,7 @@ gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const g gboolean read_xml_config (struct config_file *cfg, const gchar *filename); gboolean check_modules_config (struct config_file *cfg); void insert_classifier_symbols (struct config_file *cfg); +gboolean check_classifier_statfiles (struct classifier_config *cf); struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name); diff --git a/src/cfg_utils.c b/src/cfg_utils.c index 6bd16d620..56202d4f1 100644 --- a/src/cfg_utils.c +++ b/src/cfg_utils.c @@ -1044,6 +1044,62 @@ find_classifier_conf (struct config_file *cfg, const gchar *name) return NULL; } +gboolean +check_classifier_statfiles (struct classifier_config *cf) +{ + struct statfile *st; + gboolean has_other = FALSE, cur_class; + GList *cur; + + /* First check classes directly */ + cur = cf->statfiles; + while (cur) { + st = cur->data; + if (!has_other) { + cur_class = st->is_spam; + has_other = TRUE; + } + else { + if (cur_class != st->is_spam) { + return TRUE; + } + } + + cur = g_list_next (cur); + } + + if (!has_other) { + /* We have only one statfile */ + return FALSE; + } + /* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */ + has_other = FALSE; + cur = cf->statfiles; + while (cur) { + st = cur->data; + if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) { + st->is_spam = TRUE; + } + else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) { + st->is_spam = FALSE; + } + + if (!has_other) { + cur_class = st->is_spam; + has_other = TRUE; + } + else { + if (cur_class != st->is_spam) { + return TRUE; + } + } + + cur = g_list_next (cur); + } + + return FALSE; +} + /* * vi:ts=4 */ diff --git a/src/cfg_xml.c b/src/cfg_xml.c index 6953edb3a..bb23dd641 100644 --- a/src/cfg_xml.c +++ b/src/cfg_xml.c @@ -1790,8 +1790,8 @@ rspamd_xml_end_element (GMarkupParseContext *context, const gchar *element_name, CHECK_TAG ("classifier", FALSE); if (res) { ccf = ud->section_pointer; - if (ccf->statfiles == NULL) { - *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles"); + if (ccf->statfiles == NULL || !check_classifier_statfiles (ccf)) { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles or statfiles of the same class"); ud->state = XML_ERROR; return; } |