* Add ability to classify only specific headers (for example Subject)

author: cebka@lenovo-laptop <cebka@lenovo-laptop> 2010-02-01 19:07:33 +0300
committer: cebka@lenovo-laptop <cebka@lenovo-laptop> 2010-02-01 19:07:33 +0300
commit: 18b4a627676f71b37c98f566218fad6a249025cb (patch)
tree: 389153db1066311040184f2b908e4f3b7b5e8536
parent: 56f520e21f7f164bcd2d99bb46b5875b0a398e75 (diff)
download: rspamd-18b4a627676f71b37c98f566218fad6a249025cb.tar.gz
rspamd-18b4a627676f71b37c98f566218fad6a249025cb.zip
3 files changed, 55 insertions, 34 deletions
diff --git a/src/cfg_file.y b/src/cfg_file.y
index d5a008587..21509f2d6 100644
--- a/src/cfg_file.y
+++ b/src/cfg_file.y
@@ -60,7 +60,7 @@ struct rspamd_view *cur_view = NULL;
 %token  VIEW IP FROM SYMBOLS CLIENT_IP
 %token  AUTOLEARN MIN_MARK MAX_MARK MAXFILES MAXCORE
 %token  SETTINGS USER_SETTINGS DOMAIN_SETTINGS SYMBOL PATH SKIP_CHECK GROW_FACTOR
-%token  LOG_BUFFER DEBUG_IP NORMALIZER
+%token  LOG_BUFFER DEBUG_IP NORMALIZER HEADER_ONLY
 
 %type	<string>	STRING
 %type	<string>	VARIABLE
@@ -353,7 +353,6 @@ metriccmd:
 	| metricfunction
 	| metricscore
 	| metricrjscore
-	| metricclassifier
 	| metriccache
 	;
 	
@@ -412,18 +411,6 @@ metricrjscore:
 	}
 	;
 
-metricclassifier:
-	CLASSIFIER EQSIGN QUOTEDSTRING {
-		if (cur_metric == NULL) {
-			cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric));
-		}
-		if ((cur_metric->classifier = get_classifier ($3)) == NULL) {
-			yyerror ("yyparse: unknown classifier %s", $3);
-			YYERROR;
-		}
-	}
-	;
-
 metriccache:
 	CACHE_FILE EQSIGN QUOTEDSTRING {
 		if (cur_metric == NULL) {
diff --git a/src/controller.c b/src/controller.c
index b0aeca65a..4e4b44cb3 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -651,16 +651,29 @@ controller_read_socket (f_str_t * in, void *arg)
 			rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
 			return FALSE;
 		}
-		cur = g_list_first (task->text_parts);
+		if ((s = g_hash_table_lookup (session->learn_classifier->opts, "header")) != NULL) {
+			cur = message_get_header (task->task_pool, task->message, s);
+			if (cur) {
+				memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+			}
+		}
+		else {
+			cur = g_list_first (task->text_parts);
+		}
 		while (cur) {
-			part = cur->data;
-			if (part->is_empty) {
-				cur = g_list_next (cur);
-				continue;
+			if (s != NULL) {
+				c.len = strlen (cur->data);
+				c.begin = cur->data;
+			}
+			else {
+				part = cur->data;
+				if (part->is_empty) {
+					cur = g_list_next (cur);
+					continue;
+				}
+				c.begin = part->content->data;
+				c.len = part->content->len;
 			}
-			c.begin = part->content->data;
-			c.len = part->content->len;
-
 			if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, session->session_pool, &c, &tokens)) {
 				i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
 				free_task (task, FALSE);
diff --git a/src/filter.c b/src/filter.c
index 0b1ecf583..9ad1362f0 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -534,23 +534,44 @@ classifiers_callback (gpointer value, void *arg)
 	GTree                          *tokens = NULL;
 	GList                          *cur;
 	f_str_t                         c;
-
-	cur = g_list_first (task->text_parts);
+	char                           *header = NULL;
+	
+	if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+		cur = message_get_header (task->task_pool, task->message, header);
+		if (cur) {
+			memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+		}
+	}
+	else {
+		cur = g_list_first (task->text_parts);
+	}
 	ctx = cl->classifier->init_func (task->task_pool, cl);
 
 	if ((tokens = g_hash_table_lookup (data->tokens, cl->tokenizer)) == NULL) {
 		while (cur != NULL) {
-			text_part = (struct mime_text_part *)cur->data;
-			if (text_part->is_empty) {
-				cur = g_list_next (cur);
-				continue;
+			if (header) {
+				c.len = strlen (cur->data);
+				if (c.len > 0) {
+					c.begin = cur->data;
+					if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+						msg_info ("cannot tokenize input");
+						return;
+					}
+				}
 			}
-			c.begin = text_part->content->data;
-			c.len = text_part->content->len;
-			/* Tree would be freed at task pool freeing */
-			if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
-				msg_info ("cannot tokenize input");
-				return;
+			else {
+				text_part = (struct mime_text_part *)cur->data;
+				if (text_part->is_empty) {
+					cur = g_list_next (cur);
+					continue;
+				}
+				c.begin = text_part->content->data;
+				c.len = text_part->content->len;
+				/* Tree would be freed at task pool freeing */
+				if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+					msg_info ("cannot tokenize input");
+					return;
+				}
 			}
 			cur = g_list_next (cur);
 		}
author	cebka@lenovo-laptop <cebka@lenovo-laptop>	2010-02-01 19:07:33 +0300
committer	cebka@lenovo-laptop <cebka@lenovo-laptop>	2010-02-01 19:07:33 +0300
commit	18b4a627676f71b37c98f566218fad6a249025cb (patch)
tree	389153db1066311040184f2b908e4f3b7b5e8536
parent	56f520e21f7f164bcd2d99bb46b5875b0a398e75 (diff)
download	rspamd-18b4a627676f71b37c98f566218fad6a249025cb.tar.gz rspamd-18b4a627676f71b37c98f566218fad6a249025cb.zip