Add euristic to detect spam/ham classes based on statfile symbol.tags/0.4.0
@@ -430,6 +430,7 @@ gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const g | |||
gboolean read_xml_config (struct config_file *cfg, const gchar *filename); | |||
gboolean check_modules_config (struct config_file *cfg); | |||
void insert_classifier_symbols (struct config_file *cfg); | |||
gboolean check_classifier_statfiles (struct classifier_config *cf); | |||
struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name); | |||
@@ -1044,6 +1044,62 @@ find_classifier_conf (struct config_file *cfg, const gchar *name) | |||
return NULL; | |||
} | |||
gboolean | |||
check_classifier_statfiles (struct classifier_config *cf) | |||
{ | |||
struct statfile *st; | |||
gboolean has_other = FALSE, cur_class; | |||
GList *cur; | |||
/* First check classes directly */ | |||
cur = cf->statfiles; | |||
while (cur) { | |||
st = cur->data; | |||
if (!has_other) { | |||
cur_class = st->is_spam; | |||
has_other = TRUE; | |||
} | |||
else { | |||
if (cur_class != st->is_spam) { | |||
return TRUE; | |||
} | |||
} | |||
cur = g_list_next (cur); | |||
} | |||
if (!has_other) { | |||
/* We have only one statfile */ | |||
return FALSE; | |||
} | |||
/* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */ | |||
has_other = FALSE; | |||
cur = cf->statfiles; | |||
while (cur) { | |||
st = cur->data; | |||
if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) { | |||
st->is_spam = TRUE; | |||
} | |||
else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) { | |||
st->is_spam = FALSE; | |||
} | |||
if (!has_other) { | |||
cur_class = st->is_spam; | |||
has_other = TRUE; | |||
} | |||
else { | |||
if (cur_class != st->is_spam) { | |||
return TRUE; | |||
} | |||
} | |||
cur = g_list_next (cur); | |||
} | |||
return FALSE; | |||
} | |||
/* | |||
* vi:ts=4 | |||
*/ |
@@ -1790,8 +1790,8 @@ rspamd_xml_end_element (GMarkupParseContext *context, const gchar *element_name, | |||
CHECK_TAG ("classifier", FALSE); | |||
if (res) { | |||
ccf = ud->section_pointer; | |||
if (ccf->statfiles == NULL) { | |||
*error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles"); | |||
if (ccf->statfiles == NULL || !check_classifier_statfiles (ccf)) { | |||
*error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "classifier cannot contains no statfiles or statfiles of the same class"); | |||
ud->state = XML_ERROR; | |||
return; | |||
} |