]> source.dussan.org Git - rspamd.git/commitdiff
Add validity detector for statfiles inside classifier.
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 13 Jul 2011 09:07:45 +0000 (13:07 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 13 Jul 2011 09:07:45 +0000 (13:07 +0400)
Add euristic to detect spam/ham classes based on statfile symbol.

src/cfg_file.h
src/cfg_utils.c
src/cfg_xml.c

index d15923639a29d35b373a66cb8d33e6cec06abae7..64a46d1499d990a56093131b0db5fc5e109b2689 100644 (file)
@@ -430,6 +430,7 @@ gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const g
 gboolean read_xml_config (struct config_file *cfg, const gchar *filename);
 gboolean check_modules_config (struct config_file *cfg);
 void insert_classifier_symbols (struct config_file *cfg);
+gboolean check_classifier_statfiles (struct classifier_config *cf);
 
 struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name);
 
index 6bd16d620fb0290eb0f659234ac3170f5844e350..56202d4f11aa6afaab93ba1654b49a392679d1c3 100644 (file)
@@ -1044,6 +1044,62 @@ find_classifier_conf (struct config_file *cfg, const gchar *name)
        return NULL;
 }
 
+gboolean
+check_classifier_statfiles (struct classifier_config *cf)
+{
+       struct statfile                *st;
+       gboolean                        has_other = FALSE, cur_class;
+       GList                          *cur;
+
+       /* First check classes directly */
+       cur = cf->statfiles;
+       while (cur) {
+               st = cur->data;
+               if (!has_other) {
+                       cur_class = st->is_spam;
+                       has_other = TRUE;
+               }
+               else {
+                       if (cur_class != st->is_spam) {
+                               return TRUE;
+                       }
+               }
+
+               cur = g_list_next (cur);
+       }
+
+       if (!has_other) {
+               /* We have only one statfile */
+               return FALSE;
+       }
+       /* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */
+       has_other = FALSE;
+       cur = cf->statfiles;
+       while (cur) {
+               st = cur->data;
+               if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) {
+                       st->is_spam = TRUE;
+               }
+               else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) {
+                       st->is_spam = FALSE;
+               }
+
+               if (!has_other) {
+                       cur_class = st->is_spam;
+                       has_other = TRUE;
+               }
+               else {
+                       if (cur_class != st->is_spam) {
+                               return TRUE;
+                       }
+               }
+
+               cur = g_list_next (cur);
+       }
+
+       return FALSE;
+}
+
 /*
  * vi:ts=4
  */
index 6953edb3a603faec9f4186d889014c0c0523c270..bb23dd64137503c50aca2850a2fd5736fd795252 100644 (file)
@@ -1790,8 +1790,8 @@ rspamd_xml_end_element (GMarkupParseContext       *context, const gchar *element_name,
                        CHECK_TAG ("classifier", FALSE);
                        if (res) {
                                ccf = ud->section_pointer;
-                               if (ccf->statfiles == NULL) {
-                                       *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles");
+                               if (ccf->statfiles == NULL || !check_classifier_statfiles (ccf)) {
+                                       *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles or statfiles of the same class");
                                        ud->state = XML_ERROR;
                                        return;
                                }