aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-13 13:07:45 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-13 13:07:45 +0400
commit091e84951a2b032bb2930b300ffe43eaf01a304e (patch)
tree5d5992bb7de97fdb20c04bda9fff013fbe134c82
parentc6d62c095bc27aebd6c5f8dc9716467ae147fb68 (diff)
downloadrspamd-091e84951a2b032bb2930b300ffe43eaf01a304e.tar.gz
rspamd-091e84951a2b032bb2930b300ffe43eaf01a304e.zip
Add validity detector for statfiles inside classifier.
Add euristic to detect spam/ham classes based on statfile symbol.
-rw-r--r--src/cfg_file.h1
-rw-r--r--src/cfg_utils.c56
-rw-r--r--src/cfg_xml.c4
3 files changed, 59 insertions, 2 deletions
diff --git a/src/cfg_file.h b/src/cfg_file.h
index d15923639..64a46d149 100644
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -430,6 +430,7 @@ gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const g
gboolean read_xml_config (struct config_file *cfg, const gchar *filename);
gboolean check_modules_config (struct config_file *cfg);
void insert_classifier_symbols (struct config_file *cfg);
+gboolean check_classifier_statfiles (struct classifier_config *cf);
struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name);
diff --git a/src/cfg_utils.c b/src/cfg_utils.c
index 6bd16d620..56202d4f1 100644
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -1044,6 +1044,62 @@ find_classifier_conf (struct config_file *cfg, const gchar *name)
return NULL;
}
+gboolean
+check_classifier_statfiles (struct classifier_config *cf)
+{
+ struct statfile *st;
+ gboolean has_other = FALSE, cur_class;
+ GList *cur;
+
+ /* First check classes directly */
+ cur = cf->statfiles;
+ while (cur) {
+ st = cur->data;
+ if (!has_other) {
+ cur_class = st->is_spam;
+ has_other = TRUE;
+ }
+ else {
+ if (cur_class != st->is_spam) {
+ return TRUE;
+ }
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ if (!has_other) {
+ /* We have only one statfile */
+ return FALSE;
+ }
+ /* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */
+ has_other = FALSE;
+ cur = cf->statfiles;
+ while (cur) {
+ st = cur->data;
+ if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) {
+ st->is_spam = TRUE;
+ }
+ else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) {
+ st->is_spam = FALSE;
+ }
+
+ if (!has_other) {
+ cur_class = st->is_spam;
+ has_other = TRUE;
+ }
+ else {
+ if (cur_class != st->is_spam) {
+ return TRUE;
+ }
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ return FALSE;
+}
+
/*
* vi:ts=4
*/
diff --git a/src/cfg_xml.c b/src/cfg_xml.c
index 6953edb3a..bb23dd641 100644
--- a/src/cfg_xml.c
+++ b/src/cfg_xml.c
@@ -1790,8 +1790,8 @@ rspamd_xml_end_element (GMarkupParseContext *context, const gchar *element_name,
CHECK_TAG ("classifier", FALSE);
if (res) {
ccf = ud->section_pointer;
- if (ccf->statfiles == NULL) {
- *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles");
+ if (ccf->statfiles == NULL || !check_classifier_statfiles (ccf)) {
+ *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles or statfiles of the same class");
ud->state = XML_ERROR;
return;
}