From 58e6cb1a4fce75ef521233bd734a7706b2f5e26f Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 3 Apr 2017 18:00:01 +0100 Subject: [PATCH] [Feature] Store headers order --- src/libmime/message.h | 1 + src/libmime/mime_headers.c | 19 ++++++++++++------- src/libmime/mime_headers.h | 4 +++- src/libmime/mime_parser.c | 6 ++++++ src/libserver/task.c | 22 ++++++++++++++-------- src/libserver/task.h | 1 + 6 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/libmime/message.h b/src/libmime/message.h index 03e0e8345..22f4fd24d 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -49,6 +49,7 @@ struct rspamd_mime_part { rspamd_ftok_t parsed_data; struct rspamd_mime_part *parent_part; GHashTable *raw_headers; + GQueue *headers_order; gchar *raw_headers_str; gsize raw_headers_len; enum rspamd_cte cte; diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index 2ea4c0dcb..5044fc013 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -121,7 +121,8 @@ rspamd_mime_header_check_special (struct rspamd_task *task, static void rspamd_mime_header_add (struct rspamd_task *task, - GHashTable *target, struct rspamd_mime_header *rh, + GHashTable *target, GQueue *order, + struct rspamd_mime_header *rh, gboolean check_special) { GPtrArray *ar; @@ -137,6 +138,8 @@ rspamd_mime_header_add (struct rspamd_task *task, msg_debug_task ("add new raw header %s: %s", rh->name, rh->value); } + g_queue_push_tail (order, rh); + if (check_special) { rspamd_mime_header_check_special (task, rh); } @@ -145,7 +148,9 @@ rspamd_mime_header_add (struct rspamd_task *task, /* Convert raw headers to a list of struct raw_header * */ void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, - const gchar *in, gsize len, gboolean check_newlines) + GQueue *order, + const gchar *in, gsize len, + gboolean check_newlines) { struct rspamd_mime_header *nh = NULL; const gchar *p, *c, *end; @@ -153,7 +158,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, gint state = 0, l, next_state = 100, err_state = 100, t_state; gboolean valid_folding = FALSE; guint nlines_count[RSPAMD_TASK_NEWLINES_MAX]; - guint order = 0; + guint norder = 0; p = in; end = p + len; @@ -334,16 +339,16 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, /* We also validate utf8 and replace all non-valid utf8 chars */ rspamd_mime_charset_utf_enforce (nh->decoded, strlen (nh->decoded)); - rspamd_mime_header_add (task, target, nh, check_newlines); - nh->order = order ++; + rspamd_mime_header_add (task, target, order, nh, check_newlines); + nh->order = norder ++; state = 0; break; case 5: /* Header has only name, no value */ nh->value = ""; nh->decoded = ""; - rspamd_mime_header_add (task, target, nh, check_newlines); - nh->order = order ++; + rspamd_mime_header_add (task, target, order, nh, check_newlines); + nh->order = norder ++; state = 0; break; case 99: diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index 9e77aa14b..299e84dea 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -47,7 +47,9 @@ struct rspamd_mime_header { * @param check_newlines */ void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, - const gchar *in, gsize len, gboolean check_newlines); + GQueue *order, + const gchar *in, gsize len, + gboolean check_newlines); /** * Perform rfc2047 decoding of a header diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 1626cecbe..9b245e35a 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -517,6 +517,7 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, npart->parent_part = multipart; npart->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); + npart->headers_order = g_queue_new (); g_ptr_array_add (multipart->specific.mp.children, npart); if (hdr_pos > 0 && hdr_pos < str.len) { @@ -527,6 +528,7 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, if (task->raw_headers_content.len > 0) { rspamd_mime_headers_process (task, npart->raw_headers, + npart->headers_order, npart->raw_headers_str, npart->raw_headers_len, FALSE); @@ -1032,6 +1034,7 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (task->raw_headers_content.len > 0) { rspamd_mime_headers_process (task, task->raw_headers, + task->headers_order, task->raw_headers_content.begin, task->raw_headers_content.len, TRUE); @@ -1052,6 +1055,7 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (task->raw_headers_content.len > 0) { rspamd_mime_headers_process (task, task->raw_headers, + task->headers_order, task->raw_headers_content.begin, task->raw_headers_content.len, TRUE); @@ -1078,6 +1082,7 @@ rspamd_mime_parse_message (struct rspamd_task *task, hdr_pos = rspamd_string_find_eoh (&str, &body_pos); npart->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); + npart->headers_order = g_queue_new (); if (hdr_pos > 0 && hdr_pos < str.len) { npart->raw_headers_str = str.str; @@ -1086,6 +1091,7 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (npart->raw_headers_len > 0) { rspamd_mime_headers_process (task, npart->raw_headers, + npart->headers_order, npart->raw_headers_str, npart->raw_headers_len, FALSE); diff --git a/src/libserver/task.c b/src/libserver/task.c index cbd9a7a31..d2313afd7 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -90,6 +90,7 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg) new_task->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); + new_task->headers_order = g_queue_new (); new_task->request_headers = g_hash_table_new_full (rspamd_ftok_icase_hash, rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, rspamd_request_header_dtor); @@ -100,19 +101,22 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg) rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, rspamd_fstring_mapped_ftok_free); rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->reply_headers); + (rspamd_mempool_destruct_t) g_hash_table_unref, + new_task->reply_headers); rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->raw_headers); + (rspamd_mempool_destruct_t) g_hash_table_unref, + new_task->raw_headers); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_queue_free, + new_task->headers_order); new_task->emails = g_hash_table_new (rspamd_email_hash, rspamd_emails_cmp); rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->emails); + (rspamd_mempool_destruct_t) g_hash_table_unref, + new_task->emails); new_task->urls = g_hash_table_new (rspamd_url_hash, rspamd_urls_cmp); rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->urls); + (rspamd_mempool_destruct_t) g_hash_table_unref, + new_task->urls); new_task->parts = g_ptr_array_sized_new (4); rspamd_mempool_add_destructor (new_task->task_pool, rspamd_ptr_array_free_hard, new_task->parts); @@ -208,6 +212,8 @@ rspamd_task_free (struct rspamd_task *task) g_hash_table_unref (p->raw_headers); } + g_queue_free (p->headers_order); + if (IS_CT_MULTIPART (p->ct)) { if (p->specific.mp.children) { g_ptr_array_free (p->specific.mp.children, TRUE); diff --git a/src/libserver/task.h b/src/libserver/task.h index db86a9ac9..e892e776f 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -157,6 +157,7 @@ struct rspamd_task { GHashTable *urls; /**< list of parsed urls */ GHashTable *emails; /**< list of parsed emails */ GHashTable *raw_headers; /**< list of raw headers */ + GQueue *headers_order; /**< order of raw headers */ GHashTable *results; /**< hash table of metric_result indexed by * metric's name */ GHashTable *lua_cache; /**< cache of lua objects */ -- 2.39.5