diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-10 15:01:41 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-12 15:18:17 +0100 |
commit | 9aa104f6a709198527538c6553a0a96db1ecc5e9 (patch) | |
tree | b5973624d406f88ea4c179d2bc6e7b17965572d8 /src | |
parent | 4c1013fbdd26d37a31ac5895e0ecefb248fa4c6d (diff) | |
download | rspamd-9aa104f6a709198527538c6553a0a96db1ecc5e9.tar.gz rspamd-9aa104f6a709198527538c6553a0a96db1ecc5e9.zip |
[Project] Start mime structures refactoring
Diffstat (limited to 'src')
-rw-r--r-- | src/libmime/message.c | 27 | ||||
-rw-r--r-- | src/libmime/message.h | 38 | ||||
-rw-r--r-- | src/libmime/mime_headers.c | 22 | ||||
-rw-r--r-- | src/libmime/mime_headers.h | 72 | ||||
-rw-r--r-- | src/libserver/dkim.c | 4 | ||||
-rw-r--r-- | src/libserver/protocol.c | 2 | ||||
-rw-r--r-- | src/libserver/task.c | 8 | ||||
-rw-r--r-- | src/libserver/task.h | 23 |
8 files changed, 122 insertions, 74 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 482287769..1d9da26f2 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1091,6 +1091,22 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, task->queue_id = mid; } +static void +rspamd_message_dtor (struct rspamd_message *msg) +{ + +} + +struct rspamd_message* +rspamd_message_new (struct rspamd_task *task) +{ + struct rspamd_message *msg; + + msg = rspamd_mempool_alloc0 (sizeof (*msg)); + + +} + gboolean rspamd_message_parse (struct rspamd_task *task) { @@ -1593,3 +1609,14 @@ rspamd_message_get_mime_header_array (struct rspamd_task *task, return ret; } + +struct rspamd_message * +rspamd_message_ref (struct rspamd_message *msg) +{ + REF_RETAIN (msg); +} + +void rspamd_message_unref (struct rspamd_message *msg) +{ + REF_RELEASE (msg);s +} diff --git a/src/libmime/message.h b/src/libmime/message.h index 17c4ec5b9..7d58fa88f 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -12,6 +12,8 @@ #include "cryptobox.h" #include "mime_headers.h" #include "content_type.h" +#include "libutil/ref.h" +#include "libutil/str_util.h" #include <unicode/uchar.h> #include <unicode/utext.h> @@ -129,6 +131,36 @@ struct rspamd_mime_text_part { guint unicode_scripts; }; +struct rspamd_message { + const gchar *message_id; + gchar *subject; + + GPtrArray *parts; /**< list of parsed parts */ + GPtrArray *text_parts; /**< list of text parts */ + struct { + const gchar *begin; + gsize len; + const gchar *body_start; + } raw_headers_content; /**< list of raw headers */ + GPtrArray *received; /**< list of received headers */ + GHashTable *urls; /**< list of parsed urls */ + GHashTable *emails; /**< list of parsed emails */ + GHashTable *raw_headers; /**< list of raw headers */ + GQueue *headers_order; /**< order of raw headers */ + GPtrArray *rcpt_mime; + GPtrArray *from_mime; + enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */ + ref_entry_t ref; +}; + +#ifndef FULL_DEBUG +#define MESSAGE_FIELD(task, field) ((task)->message->(field)) +#else +#define MESSAGE_FIELD(task, field) do { \ + if (!task->message) {msg_err_task("no message when getting field %s", #field); g_assert(0);} \ + } while(0), ((task)->message->(field)) +#endif + /** * Parse and pre-process mime message * @param task worker_task object @@ -191,6 +223,12 @@ enum rspamd_cte rspamd_cte_from_string (const gchar *str); */ const gchar *rspamd_cte_to_string (enum rspamd_cte ct); +struct rspamd_message* rspamd_message_new (struct rspamd_task *task); + +struct rspamd_message *rspamd_message_ref (struct rspamd_message *msg); + +void rspamd_message_unref (struct rspamd_message *msg); + #ifdef __cplusplus } #endif diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index cf6d0f763..952a163b9 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -44,31 +44,31 @@ rspamd_mime_header_check_special (struct rspamd_task *task, g_ptr_array_add (task->received, recv); } - rh->type = RSPAMD_HEADER_RECEIVED; + rh->flags = RSPAMD_HEADER_RECEIVED; break; case 0x76F31A09F4352521ULL: /* to */ task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool, rh->decoded, strlen (rh->decoded), task->rcpt_mime); - rh->type = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; break; case 0x7EB117C1480B76ULL: /* cc */ task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool, rh->decoded, strlen (rh->decoded), task->rcpt_mime); - rh->type = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; break; case 0xE4923E11C4989C8DULL: /* bcc */ task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool, rh->decoded, strlen (rh->decoded), task->rcpt_mime); - rh->type = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; break; case 0x41E1985EDC1CBDE4ULL: /* from */ task->from_mime = rspamd_email_address_from_mime (task->task_pool, rh->decoded, strlen (rh->decoded), task->from_mime); - rh->type = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE; break; case 0x43A558FC7C240226ULL: /* message-id */ { - rh->type = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE; p = rh->decoded; end = p + strlen (p); @@ -107,20 +107,20 @@ rspamd_mime_header_check_special (struct rspamd_task *task, if (task->subject == NULL) { task->subject = rh->decoded; } - rh->type = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE; break; case 0xEE4AA2EAAC61D6F4ULL: /* return-path */ if (task->from_envelope == NULL) { task->from_envelope = rspamd_email_address_from_smtp (rh->decoded, strlen (rh->decoded)); } - rh->type = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE; break; case 0xB9EEFAD2E93C2161ULL: /* delivered-to */ if (task->deliver_to == NULL) { task->deliver_to = rh->decoded; } - rh->type = RSPAMD_HEADER_DELIVERED_TO; + rh->flags = RSPAMD_HEADER_DELIVERED_TO; break; case 0x2EC3BFF3C393FC10ULL: /* date */ case 0xAC0DDB1A1D214CAULL: /* sender */ @@ -128,7 +128,7 @@ rspamd_mime_header_check_special (struct rspamd_task *task, case 0x81CD9E9131AB6A9AULL: /* content-type */ case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */ case 0xB3F6704CB3AD6589ULL: /* references */ - rh->type = RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_UNIQUE; break; } } @@ -472,7 +472,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, while (cur) { nh = cur->data; - if (nh->name && nh->type != RSPAMD_HEADER_RECEIVED) { + if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) { rspamd_cryptobox_hash_update (&hs, nh->name, strlen (nh->name)); } diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index 5cb300978..60fd7b697 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -31,20 +31,22 @@ enum rspamd_rfc2047_encoding { RSPAMD_RFC2047_BASE64, }; -enum rspamd_mime_header_special_type { - RSPAMD_HEADER_GENERIC = 0, - RSPAMD_HEADER_RECEIVED = 1 << 0, - RSPAMD_HEADER_TO = 1 << 2, - RSPAMD_HEADER_CC = 1 << 3, - RSPAMD_HEADER_BCC = 1 << 4, - RSPAMD_HEADER_FROM = 1 << 5, - RSPAMD_HEADER_MESSAGE_ID = 1 << 6, - RSPAMD_HEADER_SUBJECT = 1 << 7, - RSPAMD_HEADER_RETURN_PATH = 1 << 8, - RSPAMD_HEADER_DELIVERED_TO = 1 << 9, - RSPAMD_HEADER_SENDER = 1 << 10, - RSPAMD_HEADER_RCPT = 1 << 11, - RSPAMD_HEADER_UNIQUE = 1 << 12 +enum rspamd_mime_header_flags { + RSPAMD_HEADER_GENERIC = 0u, + RSPAMD_HEADER_RECEIVED = 1u << 0u, + RSPAMD_HEADER_TO = 1u << 2u, + RSPAMD_HEADER_CC = 1u << 3u, + RSPAMD_HEADER_BCC = 1u << 4u, + RSPAMD_HEADER_FROM = 1u << 5u, + RSPAMD_HEADER_MESSAGE_ID = 1u << 6u, + RSPAMD_HEADER_SUBJECT = 1u << 7u, + RSPAMD_HEADER_RETURN_PATH = 1u << 8u, + RSPAMD_HEADER_DELIVERED_TO = 1u << 9u, + RSPAMD_HEADER_SENDER = 1u << 10u, + RSPAMD_HEADER_RCPT = 1u << 11u, + RSPAMD_HEADER_UNIQUE = 1u << 12u, + RSPAMD_HEADER_EMPTY_SEPARATOR = 1u << 13u, + RSPAMD_HEADER_TAB_SEPARATED = 1u << 14u, }; struct rspamd_mime_header { @@ -52,32 +54,31 @@ struct rspamd_mime_header { gchar *value; const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */ gsize raw_len; - gboolean tab_separated; - gboolean empty_separator; guint order; - enum rspamd_mime_header_special_type type; + int flags; /* see enum rspamd_mime_header_flags */ gchar *separator; gchar *decoded; + struct rspamd_mime_header *prev, *next; /* Headers with the same name */ + struct rspamd_mime_header *ord_prev, *ord_next; /* Overall order of headers */ }; enum rspamd_received_type { RSPAMD_RECEIVED_SMTP = 0, - RSPAMD_RECEIVED_ESMTP, - RSPAMD_RECEIVED_ESMTPA, - RSPAMD_RECEIVED_ESMTPS, - RSPAMD_RECEIVED_ESMTPSA, - RSPAMD_RECEIVED_LMTP, - RSPAMD_RECEIVED_IMAP, - RSPAMD_RECEIVED_LOCAL, - RSPAMD_RECEIVED_HTTP, - RSPAMD_RECEIVED_MAPI, - RSPAMD_RECEIVED_UNKNOWN + RSPAMD_RECEIVED_ESMTP = 1u << 0u, + RSPAMD_RECEIVED_ESMTPA = 1u << 1u, + RSPAMD_RECEIVED_ESMTPS = 1u << 2u, + RSPAMD_RECEIVED_ESMTPSA = 1u << 3u, + RSPAMD_RECEIVED_LMTP = 1u << 4u, + RSPAMD_RECEIVED_IMAP = 1u << 5u, + RSPAMD_RECEIVED_LOCAL = 1u << 6u, + RSPAMD_RECEIVED_HTTP = 1u << 7u, + RSPAMD_RECEIVED_MAPI = 1u << 8u, + RSPAMD_RECEIVED_UNKNOWN = 1u << 9u, + RSPAMD_RECEIVED_FLAG_ARTIFICIAL = (1u << 10u), + RSPAMD_RECEIVED_FLAG_SSL = (1u << 11u), + RSPAMD_RECEIVED_FLAG_AUTHENTICATED = (1u << 12u), }; -#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0) -#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1) -#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2) - struct received_header { const gchar *from_hostname; const gchar *from_ip; @@ -88,8 +89,8 @@ struct received_header { rspamd_inet_addr_t *addr; struct rspamd_mime_header *hdr; time_t timestamp; - enum rspamd_received_type type; - gint flags; + gint flags; /* See enum rspamd_received_type */ + struct received_header *prev, *next; }; /** @@ -100,8 +101,9 @@ struct received_header { * @param len * @param check_newlines */ -void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, - GQueue *order, +void rspamd_mime_headers_process (struct rspamd_task *task, + GHashTable *target, + struct rspamd_mime_header **order_ptr, const gchar *in, gsize len, gboolean check_newlines); diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c index 9386c5cdc..c0ea29f08 100644 --- a/src/libserver/dkim.c +++ b/src/libserver/dkim.c @@ -2175,7 +2175,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx, if (ar) { /* Check uniqueness of the header */ rh = g_ptr_array_index (ar, 0); - if ((rh->type & RSPAMD_HEADER_UNIQUE) && ar->len > 1) { + if ((rh->flags & RSPAMD_HEADER_UNIQUE) && ar->len > 1) { guint64 random_cookie = ottery_rand_uint64 (); msg_warn_dkim ("header %s is intended to be unique by" @@ -2210,7 +2210,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx, (gint)rh->raw_len, rh->raw_value); } else { - if (ctx->is_sign && (rh->type & RSPAMD_HEADER_FROM)) { + if (ctx->is_sign && (rh->flags & RSPAMD_HEADER_FROM)) { /* Special handling of the From handling when rewrite is done */ gboolean has_rewrite = FALSE; guint i; diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index bef7a0452..ddd072882 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -1409,6 +1409,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg, gint flags = RSPAMD_PROTOCOL_DEFAULT; struct rspamd_action *action; +#if 0 /* Write custom headers */ g_hash_table_iter_init (&hiter, task->reply_headers); while (g_hash_table_iter_next (&hiter, &h, &v)) { @@ -1416,6 +1417,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg, rspamd_http_message_add_header (msg, hn->begin, hv->begin); } +#endif flags |= RSPAMD_PROTOCOL_URLS; diff --git a/src/libserver/task.c b/src/libserver/task.c index 04be61744..88ee730a3 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -130,14 +130,8 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg, rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, rspamd_request_header_dtor); rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->request_headers); - new_task->reply_headers = g_hash_table_new_full (rspamd_ftok_icase_hash, - rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, - rspamd_fstring_mapped_ftok_free); - rspamd_mempool_add_destructor (new_task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->reply_headers); + new_task->request_headers); rspamd_mempool_add_destructor (new_task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, new_task->raw_headers); diff --git a/src/libserver/task.h b/src/libserver/task.h index ac55dd910..00caf3ab6 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -134,6 +134,7 @@ enum rspamd_task_stage { struct rspamd_email_address; struct rspamd_lang_detector; enum rspamd_newlines_type; +struct rspamd_message; /** * Worker task structure @@ -147,15 +148,12 @@ struct rspamd_task { gulong message_len; /**< Message length */ gchar *helo; /**< helo header value */ gchar *queue_id; /**< queue id if specified */ - const gchar *message_id; /**< message id */ rspamd_inet_addr_t *from_addr; /**< from addr for a task */ rspamd_inet_addr_t *client_addr; /**< address of connected socket */ gchar *deliver_to; /**< address to deliver */ gchar *user; /**< user to deliver */ - gchar *subject; /**< subject (for non-mime) */ const gchar *hostname; /**< hostname reported by MTA */ GHashTable *request_headers; /**< HTTP headers in a request */ - GHashTable *reply_headers; /**< Custom reply headers */ struct { const gchar *begin; gsize len; @@ -163,29 +161,14 @@ struct rspamd_task { } msg; /**< message buffer */ struct rspamd_http_connection *http_conn; /**< HTTP server connection */ struct rspamd_async_session *s; /**< async session object */ - GPtrArray *parts; /**< list of parsed parts */ - GPtrArray *text_parts; /**< list of text parts */ - struct { - const gchar *begin; - gsize len; - const gchar *body_start; - } raw_headers_content; /**< list of raw headers */ - GPtrArray *received; /**< list of received headers */ - GHashTable *urls; /**< list of parsed urls */ - GHashTable *emails; /**< list of parsed emails */ - GHashTable *raw_headers; /**< list of raw headers */ - GQueue *headers_order; /**< order of raw headers */ struct rspamd_metric_result *result; /**< Metric result */ GHashTable *lua_cache; /**< cache of lua objects */ GPtrArray *tokens; /**< statistics tokens */ GArray *meta_words; /**< rspamd_stat_token_t produced from meta headers (e.g. Subject) */ - GPtrArray *rcpt_mime; GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */ - GPtrArray *from_mime; struct rspamd_email_address *from_envelope; - enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */ ucl_object_t *messages; /**< list of messages that would be reported */ struct rspamd_re_runtime *re_rt; /**< regexp runtime */ @@ -215,6 +198,7 @@ struct rspamd_task { const gchar *classifier; /**< Classifier to learn (if needed) */ struct rspamd_lang_detector *lang_det; /**< Languages detector */ + struct rspamd_message *message; guchar digest[16]; }; @@ -252,7 +236,8 @@ gboolean rspamd_task_fin (void *arg); * @return */ gboolean rspamd_task_load_message (struct rspamd_task *task, - struct rspamd_http_message *msg, const gchar *start, gsize len); + struct rspamd_http_message *msg, + const gchar *start, gsize len); /** * Process task |