From: cebka@lenovo-laptop Date: Mon, 1 Feb 2010 16:07:33 +0000 (+0300) Subject: * Add ability to classify only specific headers (for example Subject) X-Git-Tag: 0.3.0~92 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=18b4a627676f71b37c98f566218fad6a249025cb;p=rspamd.git * Add ability to classify only specific headers (for example Subject) --- diff --git a/src/cfg_file.y b/src/cfg_file.y index d5a008587..21509f2d6 100644 --- a/src/cfg_file.y +++ b/src/cfg_file.y @@ -60,7 +60,7 @@ struct rspamd_view *cur_view = NULL; %token VIEW IP FROM SYMBOLS CLIENT_IP %token AUTOLEARN MIN_MARK MAX_MARK MAXFILES MAXCORE %token SETTINGS USER_SETTINGS DOMAIN_SETTINGS SYMBOL PATH SKIP_CHECK GROW_FACTOR -%token LOG_BUFFER DEBUG_IP NORMALIZER +%token LOG_BUFFER DEBUG_IP NORMALIZER HEADER_ONLY %type STRING %type VARIABLE @@ -353,7 +353,6 @@ metriccmd: | metricfunction | metricscore | metricrjscore - | metricclassifier | metriccache ; @@ -412,18 +411,6 @@ metricrjscore: } ; -metricclassifier: - CLASSIFIER EQSIGN QUOTEDSTRING { - if (cur_metric == NULL) { - cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); - } - if ((cur_metric->classifier = get_classifier ($3)) == NULL) { - yyerror ("yyparse: unknown classifier %s", $3); - YYERROR; - } - } - ; - metriccache: CACHE_FILE EQSIGN QUOTEDSTRING { if (cur_metric == NULL) { diff --git a/src/controller.c b/src/controller.c index b0aeca65a..4e4b44cb3 100644 --- a/src/controller.c +++ b/src/controller.c @@ -651,16 +651,29 @@ controller_read_socket (f_str_t * in, void *arg) rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE); return FALSE; } - cur = g_list_first (task->text_parts); + if ((s = g_hash_table_lookup (session->learn_classifier->opts, "header")) != NULL) { + cur = message_get_header (task->task_pool, task->message, s); + if (cur) { + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur); + } + } + else { + cur = g_list_first (task->text_parts); + } while (cur) { - part = cur->data; - if (part->is_empty) { - cur = g_list_next (cur); - continue; + if (s != NULL) { + c.len = strlen (cur->data); + c.begin = cur->data; + } + else { + part = cur->data; + if (part->is_empty) { + cur = g_list_next (cur); + continue; + } + c.begin = part->content->data; + c.len = part->content->len; } - c.begin = part->content->data; - c.len = part->content->len; - if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, session->session_pool, &c, &tokens)) { i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF); free_task (task, FALSE); diff --git a/src/filter.c b/src/filter.c index 0b1ecf583..9ad1362f0 100644 --- a/src/filter.c +++ b/src/filter.c @@ -534,23 +534,44 @@ classifiers_callback (gpointer value, void *arg) GTree *tokens = NULL; GList *cur; f_str_t c; - - cur = g_list_first (task->text_parts); + char *header = NULL; + + if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) { + cur = message_get_header (task->task_pool, task->message, header); + if (cur) { + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur); + } + } + else { + cur = g_list_first (task->text_parts); + } ctx = cl->classifier->init_func (task->task_pool, cl); if ((tokens = g_hash_table_lookup (data->tokens, cl->tokenizer)) == NULL) { while (cur != NULL) { - text_part = (struct mime_text_part *)cur->data; - if (text_part->is_empty) { - cur = g_list_next (cur); - continue; + if (header) { + c.len = strlen (cur->data); + if (c.len > 0) { + c.begin = cur->data; + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) { + msg_info ("cannot tokenize input"); + return; + } + } } - c.begin = text_part->content->data; - c.len = text_part->content->len; - /* Tree would be freed at task pool freeing */ - if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) { - msg_info ("cannot tokenize input"); - return; + else { + text_part = (struct mime_text_part *)cur->data; + if (text_part->is_empty) { + cur = g_list_next (cur); + continue; + } + c.begin = text_part->content->data; + c.len = text_part->content->len; + /* Tree would be freed at task pool freeing */ + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) { + msg_info ("cannot tokenize input"); + return; + } } cur = g_list_next (cur); }