]> source.dussan.org Git - rspamd.git/commitdiff
* Add ability to classify only specific headers (for example Subject)
authorcebka@lenovo-laptop <cebka@lenovo-laptop>
Mon, 1 Feb 2010 16:07:33 +0000 (19:07 +0300)
committercebka@lenovo-laptop <cebka@lenovo-laptop>
Mon, 1 Feb 2010 16:07:33 +0000 (19:07 +0300)
src/cfg_file.y
src/controller.c
src/filter.c

index d5a008587e6974afb1947baf3049a1ad05906d44..21509f2d69a8f718a6701828b66551a51d46d271 100644 (file)
@@ -60,7 +60,7 @@ struct rspamd_view *cur_view = NULL;
 %token  VIEW IP FROM SYMBOLS CLIENT_IP
 %token  AUTOLEARN MIN_MARK MAX_MARK MAXFILES MAXCORE
 %token  SETTINGS USER_SETTINGS DOMAIN_SETTINGS SYMBOL PATH SKIP_CHECK GROW_FACTOR
-%token  LOG_BUFFER DEBUG_IP NORMALIZER
+%token  LOG_BUFFER DEBUG_IP NORMALIZER HEADER_ONLY
 
 %type  <string>        STRING
 %type  <string>        VARIABLE
@@ -353,7 +353,6 @@ metriccmd:
        | metricfunction
        | metricscore
        | metricrjscore
-       | metricclassifier
        | metriccache
        ;
        
@@ -412,18 +411,6 @@ metricrjscore:
        }
        ;
 
-metricclassifier:
-       CLASSIFIER EQSIGN QUOTEDSTRING {
-               if (cur_metric == NULL) {
-                       cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric));
-               }
-               if ((cur_metric->classifier = get_classifier ($3)) == NULL) {
-                       yyerror ("yyparse: unknown classifier %s", $3);
-                       YYERROR;
-               }
-       }
-       ;
-
 metriccache:
        CACHE_FILE EQSIGN QUOTEDSTRING {
                if (cur_metric == NULL) {
index b0aeca65ac32f240519c57dd9af649c3bf504e12..4e4b44cb340cfb02ba0d1a8a6f349de05172792d 100644 (file)
@@ -651,16 +651,29 @@ controller_read_socket (f_str_t * in, void *arg)
                        rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
                        return FALSE;
                }
-               cur = g_list_first (task->text_parts);
+               if ((s = g_hash_table_lookup (session->learn_classifier->opts, "header")) != NULL) {
+                       cur = message_get_header (task->task_pool, task->message, s);
+                       if (cur) {
+                               memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+                       }
+               }
+               else {
+                       cur = g_list_first (task->text_parts);
+               }
                while (cur) {
-                       part = cur->data;
-                       if (part->is_empty) {
-                               cur = g_list_next (cur);
-                               continue;
+                       if (s != NULL) {
+                               c.len = strlen (cur->data);
+                               c.begin = cur->data;
+                       }
+                       else {
+                               part = cur->data;
+                               if (part->is_empty) {
+                                       cur = g_list_next (cur);
+                                       continue;
+                               }
+                               c.begin = part->content->data;
+                               c.len = part->content->len;
                        }
-                       c.begin = part->content->data;
-                       c.len = part->content->len;
-
                        if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, session->session_pool, &c, &tokens)) {
                                i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
                                free_task (task, FALSE);
index 0b1ecf583959d5b5525ef50c316ba97f04db0060..9ad1362f07075fba9309a37f0c42dd49a310b54c 100644 (file)
@@ -534,23 +534,44 @@ classifiers_callback (gpointer value, void *arg)
        GTree                          *tokens = NULL;
        GList                          *cur;
        f_str_t                         c;
-
-       cur = g_list_first (task->text_parts);
+       char                           *header = NULL;
+       
+       if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+               cur = message_get_header (task->task_pool, task->message, header);
+               if (cur) {
+                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+               }
+       }
+       else {
+               cur = g_list_first (task->text_parts);
+       }
        ctx = cl->classifier->init_func (task->task_pool, cl);
 
        if ((tokens = g_hash_table_lookup (data->tokens, cl->tokenizer)) == NULL) {
                while (cur != NULL) {
-                       text_part = (struct mime_text_part *)cur->data;
-                       if (text_part->is_empty) {
-                               cur = g_list_next (cur);
-                               continue;
+                       if (header) {
+                               c.len = strlen (cur->data);
+                               if (c.len > 0) {
+                                       c.begin = cur->data;
+                                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+                                               msg_info ("cannot tokenize input");
+                                               return;
+                                       }
+                               }
                        }
-                       c.begin = text_part->content->data;
-                       c.len = text_part->content->len;
-                       /* Tree would be freed at task pool freeing */
-                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
-                               msg_info ("cannot tokenize input");
-                               return;
+                       else {
+                               text_part = (struct mime_text_part *)cur->data;
+                               if (text_part->is_empty) {
+                                       cur = g_list_next (cur);
+                                       continue;
+                               }
+                               c.begin = text_part->content->data;
+                               c.len = text_part->content->len;
+                               /* Tree would be freed at task pool freeing */
+                               if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+                                       msg_info ("cannot tokenize input");
+                                       return;
+                               }
                        }
                        cur = g_list_next (cur);
                }