aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcebka@lenovo-laptop <cebka@lenovo-laptop>2010-02-01 19:07:33 +0300
committercebka@lenovo-laptop <cebka@lenovo-laptop>2010-02-01 19:07:33 +0300
commit18b4a627676f71b37c98f566218fad6a249025cb (patch)
tree389153db1066311040184f2b908e4f3b7b5e8536
parent56f520e21f7f164bcd2d99bb46b5875b0a398e75 (diff)
downloadrspamd-18b4a627676f71b37c98f566218fad6a249025cb.tar.gz
rspamd-18b4a627676f71b37c98f566218fad6a249025cb.zip
* Add ability to classify only specific headers (for example Subject)
-rw-r--r--src/cfg_file.y15
-rw-r--r--src/controller.c29
-rw-r--r--src/filter.c45
3 files changed, 55 insertions, 34 deletions
diff --git a/src/cfg_file.y b/src/cfg_file.y
index d5a008587..21509f2d6 100644
--- a/src/cfg_file.y
+++ b/src/cfg_file.y
@@ -60,7 +60,7 @@ struct rspamd_view *cur_view = NULL;
%token VIEW IP FROM SYMBOLS CLIENT_IP
%token AUTOLEARN MIN_MARK MAX_MARK MAXFILES MAXCORE
%token SETTINGS USER_SETTINGS DOMAIN_SETTINGS SYMBOL PATH SKIP_CHECK GROW_FACTOR
-%token LOG_BUFFER DEBUG_IP NORMALIZER
+%token LOG_BUFFER DEBUG_IP NORMALIZER HEADER_ONLY
%type <string> STRING
%type <string> VARIABLE
@@ -353,7 +353,6 @@ metriccmd:
| metricfunction
| metricscore
| metricrjscore
- | metricclassifier
| metriccache
;
@@ -412,18 +411,6 @@ metricrjscore:
}
;
-metricclassifier:
- CLASSIFIER EQSIGN QUOTEDSTRING {
- if (cur_metric == NULL) {
- cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric));
- }
- if ((cur_metric->classifier = get_classifier ($3)) == NULL) {
- yyerror ("yyparse: unknown classifier %s", $3);
- YYERROR;
- }
- }
- ;
-
metriccache:
CACHE_FILE EQSIGN QUOTEDSTRING {
if (cur_metric == NULL) {
diff --git a/src/controller.c b/src/controller.c
index b0aeca65a..4e4b44cb3 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -651,16 +651,29 @@ controller_read_socket (f_str_t * in, void *arg)
rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
return FALSE;
}
- cur = g_list_first (task->text_parts);
+ if ((s = g_hash_table_lookup (session->learn_classifier->opts, "header")) != NULL) {
+ cur = message_get_header (task->task_pool, task->message, s);
+ if (cur) {
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+ }
+ }
+ else {
+ cur = g_list_first (task->text_parts);
+ }
while (cur) {
- part = cur->data;
- if (part->is_empty) {
- cur = g_list_next (cur);
- continue;
+ if (s != NULL) {
+ c.len = strlen (cur->data);
+ c.begin = cur->data;
+ }
+ else {
+ part = cur->data;
+ if (part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ c.begin = part->content->data;
+ c.len = part->content->len;
}
- c.begin = part->content->data;
- c.len = part->content->len;
-
if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, session->session_pool, &c, &tokens)) {
i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
free_task (task, FALSE);
diff --git a/src/filter.c b/src/filter.c
index 0b1ecf583..9ad1362f0 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -534,23 +534,44 @@ classifiers_callback (gpointer value, void *arg)
GTree *tokens = NULL;
GList *cur;
f_str_t c;
-
- cur = g_list_first (task->text_parts);
+ char *header = NULL;
+
+ if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+ cur = message_get_header (task->task_pool, task->message, header);
+ if (cur) {
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+ }
+ }
+ else {
+ cur = g_list_first (task->text_parts);
+ }
ctx = cl->classifier->init_func (task->task_pool, cl);
if ((tokens = g_hash_table_lookup (data->tokens, cl->tokenizer)) == NULL) {
while (cur != NULL) {
- text_part = (struct mime_text_part *)cur->data;
- if (text_part->is_empty) {
- cur = g_list_next (cur);
- continue;
+ if (header) {
+ c.len = strlen (cur->data);
+ if (c.len > 0) {
+ c.begin = cur->data;
+ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+ msg_info ("cannot tokenize input");
+ return;
+ }
+ }
}
- c.begin = text_part->content->data;
- c.len = text_part->content->len;
- /* Tree would be freed at task pool freeing */
- if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
- msg_info ("cannot tokenize input");
- return;
+ else {
+ text_part = (struct mime_text_part *)cur->data;
+ if (text_part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ c.begin = text_part->content->data;
+ c.len = text_part->content->len;
+ /* Tree would be freed at task pool freeing */
+ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+ msg_info ("cannot tokenize input");
+ return;
+ }
}
cur = g_list_next (cur);
}