* Add ability to classify only specific headers (for example Subject)

author cebka@lenovo-laptop <cebka@lenovo-laptop>

Mon, 1 Feb 2010 16:07:33 +0000 (19:07 +0300)

committer cebka@lenovo-laptop <cebka@lenovo-laptop>

Mon, 1 Feb 2010 16:07:33 +0000 (19:07 +0300)
author cebka@lenovo-laptop <cebka@lenovo-laptop>
Mon, 1 Feb 2010 16:07:33 +0000 (19:07 +0300)
committer cebka@lenovo-laptop <cebka@lenovo-laptop>
Mon, 1 Feb 2010 16:07:33 +0000 (19:07 +0300)
diff --git a/src/cfg_file.y b/src/cfg_file.y

index d5a008587e6974afb1947baf3049a1ad05906d44..21509f2d69a8f718a6701828b66551a51d46d271 100644 (file)
--- a/src/cfg_file.y
+++ b/src/cfg_file.y
@@ -60,7 +60,7 @@ struct rspamd_view *cur_view = NULL;
  %token  VIEW IP FROM SYMBOLS CLIENT_IP
  %token  AUTOLEARN MIN_MARK MAX_MARK MAXFILES MAXCORE
  %token  SETTINGS USER_SETTINGS DOMAIN_SETTINGS SYMBOL PATH SKIP_CHECK GROW_FACTOR
-%token  LOG_BUFFER DEBUG_IP NORMALIZER
+%token  LOG_BUFFER DEBUG_IP NORMALIZER HEADER_ONLY
  
  %type  <string>        STRING
  %type  <string>        VARIABLE
@@ -353,7 +353,6 @@ metriccmd:
         | metricfunction
         | metricscore
         | metricrjscore
-       | metricclassifier
         | metriccache
         ;
         
@@ -412,18 +411,6 @@ metricrjscore:
         }
         ;
  
-metricclassifier:
-       CLASSIFIER EQSIGN QUOTEDSTRING {
-               if (cur_metric == NULL) {
-                       cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric));
-               }
-               if ((cur_metric->classifier = get_classifier ($3)) == NULL) {
-                       yyerror ("yyparse: unknown classifier %s", $3);
-                       YYERROR;
-               }
-       }
-       ;
-
  metriccache:
         CACHE_FILE EQSIGN QUOTEDSTRING {
                 if (cur_metric == NULL) {
diff --git a/src/controller.c b/src/controller.c

index b0aeca65ac32f240519c57dd9af649c3bf504e12..4e4b44cb340cfb02ba0d1a8a6f349de05172792d 100644 (file)
--- a/src/controller.c
+++ b/src/controller.c
@@ -651,16 +651,29 @@ controller_read_socket (f_str_t * in, void *arg)
                         rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
                         return FALSE;
                 }
-               cur = g_list_first (task->text_parts);
+               if ((s = g_hash_table_lookup (session->learn_classifier->opts, "header")) != NULL) {
+                       cur = message_get_header (task->task_pool, task->message, s);
+                       if (cur) {
+                               memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+                       }
+               }
+               else {
+                       cur = g_list_first (task->text_parts);
+               }
                 while (cur) {
-                       part = cur->data;
-                       if (part->is_empty) {
-                               cur = g_list_next (cur);
-                               continue;
+                       if (s != NULL) {
+                               c.len = strlen (cur->data);
+                               c.begin = cur->data;
+                       }
+                       else {
+                               part = cur->data;
+                               if (part->is_empty) {
+                                       cur = g_list_next (cur);
+                                       continue;
+                               }
+                               c.begin = part->content->data;
+                               c.len = part->content->len;
                         }
-                       c.begin = part->content->data;
-                       c.len = part->content->len;
-
                         if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, session->session_pool, &c, &tokens)) {
                                 i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
                                 free_task (task, FALSE);
diff --git a/src/filter.c b/src/filter.c

index 0b1ecf583959d5b5525ef50c316ba97f04db0060..9ad1362f07075fba9309a37f0c42dd49a310b54c 100644 (file)
--- a/src/filter.c
+++ b/src/filter.c
@@ -534,23 +534,44 @@ classifiers_callback (gpointer value, void *arg)
         GTree                          *tokens = NULL;
         GList                          *cur;
         f_str_t                         c;
-
-       cur = g_list_first (task->text_parts);
+       char                           *header = NULL;
+       
+       if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+               cur = message_get_header (task->task_pool, task->message, header);
+               if (cur) {
+                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+               }
+       }
+       else {
+               cur = g_list_first (task->text_parts);
+       }
         ctx = cl->classifier->init_func (task->task_pool, cl);
  
         if ((tokens = g_hash_table_lookup (data->tokens, cl->tokenizer)) == NULL) {
                 while (cur != NULL) {
-                       text_part = (struct mime_text_part *)cur->data;
-                       if (text_part->is_empty) {
-                               cur = g_list_next (cur);
-                               continue;
+                       if (header) {
+                               c.len = strlen (cur->data);
+                               if (c.len > 0) {
+                                       c.begin = cur->data;
+                                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+                                               msg_info ("cannot tokenize input");
+                                               return;
+                                       }
+                               }
                         }
-                       c.begin = text_part->content->data;
-                       c.len = text_part->content->len;
-                       /* Tree would be freed at task pool freeing */
-                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
-                               msg_info ("cannot tokenize input");
-                               return;
+                       else {
+                               text_part = (struct mime_text_part *)cur->data;
+                               if (text_part->is_empty) {
+                                       cur = g_list_next (cur);
+                                       continue;
+                               }
+                               c.begin = text_part->content->data;
+                               c.len = text_part->content->len;
+                               /* Tree would be freed at task pool freeing */
+                               if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+                                       msg_info ("cannot tokenize input");
+                                       return;
+                               }
                         }
                         cur = g_list_next (cur);
                 }
author	cebka@lenovo-laptop <cebka@lenovo-laptop>
	Mon, 1 Feb 2010 16:07:33 +0000 (19:07 +0300)
committer	cebka@lenovo-laptop <cebka@lenovo-laptop>
	Mon, 1 Feb 2010 16:07:33 +0000 (19:07 +0300)
src/cfg_file.y		patch \| blob \| history
src/controller.c		patch \| blob \| history
src/filter.c		patch \| blob \| history