From 58eb1a0b68572f86be8c861d1f38db9ef34b712b Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 14 Jul 2019 12:38:32 +0100 Subject: [PATCH] [Rework] Rework request headers processing --- src/libserver/protocol.c | 94 ++++++++++++++++++++------------------- src/libserver/task.c | 96 ++++++++++++++++++++-------------------- src/libserver/task.h | 35 ++++++++++----- src/lua/lua_task.c | 8 ++++ 4 files changed, 126 insertions(+), 107 deletions(-) diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 8834529ee..6d436d56d 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -58,7 +58,7 @@ rspamd_protocol_quark (void) * Remove <> from the fixed string and copy it to the pool */ static gchar * -rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_fstring_t *in) +rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_ftok_t *in) { guint nchars = 0; const gchar *p; @@ -68,7 +68,7 @@ rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_fstring_t *in) g_assert (in != NULL); g_assert (in->len > 0); - p = in->str; + p = in->begin; while ((g_ascii_isspace (*p) || *p == '<') && nchars < in->len) { if (*p == '<') { @@ -81,7 +81,7 @@ rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_fstring_t *in) tok.begin = p; - p = in->str + in->len - 1; + p = in->begin + in->len - 1; tok.len = in->len - nchars; while (g_ascii_isspace (*p) && tok.len > 0) { @@ -344,28 +344,34 @@ gboolean rspamd_protocol_handle_headers (struct rspamd_task *task, struct rspamd_http_message *msg) { - rspamd_fstring_t *hn, *hv; rspamd_ftok_t *hn_tok, *hv_tok, srch; gboolean fl, has_ip = FALSE; struct rspamd_http_header *header, *h, *htmp; + gchar *ntok; HASH_ITER (hh, msg->headers, header, htmp) { DL_FOREACH (header, h) { - hn = rspamd_fstring_new_init (h->name.begin, h->name.len); - hv = rspamd_fstring_new_init (h->value.begin, h->value.len); - hn_tok = rspamd_ftok_map (hn); - hv_tok = rspamd_ftok_map (hv); + ntok = rspamd_mempool_ftokdup (task->task_pool, &h->name); + hn_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hn_tok)); + hn_tok->begin = ntok; + hn_tok->len = h->name.len; + + + ntok = rspamd_mempool_ftokdup (task->task_pool, &h->value); + hv_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hv_tok)); + hv_tok->begin = ntok; + hv_tok->len = h->value.len; switch (*hn_tok->begin) { case 'd': case 'D': IF_HEADER (DELIVER_TO_HEADER) { - task->deliver_to = rspamd_protocol_escape_braces (task, hv); + task->deliver_to = rspamd_protocol_escape_braces (task, hv_tok); msg_debug_protocol ("read deliver-to header, value: %s", task->deliver_to); } else { - msg_debug_protocol ("wrong header: %V", hn); + msg_debug_protocol ("wrong header: %T", hn_tok); } break; case 'h': @@ -383,12 +389,13 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, case 'f': case 'F': IF_HEADER (FROM_HEADER) { - task->from_envelope = rspamd_email_address_from_smtp (hv->str, - hv->len); - msg_debug_protocol ("read from header, value: %V", hv); + task->from_envelope = rspamd_email_address_from_smtp ( + hv_tok->begin, + hv_tok->len); + msg_debug_protocol ("read from header, value: %T", hv_tok); if (!task->from_envelope) { - msg_err_protocol ("bad from header: '%V'", hv); + msg_err_protocol ("bad from header: '%T'", hv_tok); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } } @@ -401,8 +408,8 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, case 'j': case 'J': IF_HEADER (JSON_HEADER) { - msg_debug_protocol ("read json header, value: %V", hv); - fl = rspamd_config_parse_flag (hv->str, hv->len); + msg_debug_protocol ("read json header, value: %T", hv_tok); + fl = rspamd_config_parse_flag (hv_tok->begin, hv_tok->len); if (fl) { task->flags |= RSPAMD_TASK_FLAG_JSON; } @@ -411,7 +418,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, } } else { - msg_debug_protocol ("wrong header: %V", hn); + msg_debug_protocol ("wrong header: %T", hn_tok); } break; case 'q': @@ -422,20 +429,20 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, msg_debug_protocol ("read queue_id header, value: %s", task->queue_id); } else { - msg_debug_protocol ("wrong header: %V", hn); + msg_debug_protocol ("wrong header: %T", hn_tok); } break; case 'r': case 'R': IF_HEADER (RCPT_HEADER) { rspamd_protocol_process_recipients (task, hv_tok); - msg_debug_protocol ("read rcpt header, value: %V", hv); + msg_debug_protocol ("read rcpt header, value: %T", hv_tok); } IF_HEADER (RAW_DATA_HEADER) { srch.begin = "yes"; srch.len = 3; - msg_debug_protocol ("read raw data header, value: %V", hv); + msg_debug_protocol ("read raw data header, value: %T", hv_tok); if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) { task->flags &= ~RSPAMD_TASK_FLAG_MIME; @@ -446,16 +453,17 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, case 'i': case 'I': IF_HEADER (IP_ADDR_HEADER) { - if (!rspamd_parse_inet_address (&task->from_addr, hv->str, hv->len)) { - msg_err_protocol ("bad ip header: '%V'", hv); + if (!rspamd_parse_inet_address (&task->from_addr, + hv_tok->begin, hv_tok->len)) { + msg_err_protocol ("bad ip header: '%T'", hv_tok); } else { - msg_debug_protocol ("read IP header, value: %V", hv); + msg_debug_protocol ("read IP header, value: %T", hv_tok); has_ip = TRUE; } } else { - msg_debug_protocol ("wrong header: %V", hn); + msg_debug_protocol ("wrong header: %T", hn_tok); } break; case 'p': @@ -464,7 +472,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, srch.begin = "all"; srch.len = 3; - msg_debug_protocol ("read pass header, value: %V", hv); + msg_debug_protocol ("read pass header, value: %V", hv_tok); if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) { task->flags |= RSPAMD_TASK_FLAG_PASS_ALL; @@ -472,14 +480,14 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, } } IF_HEADER (PROFILE_HEADER) { - msg_debug_protocol ("read profile header, value: %V", hv); + msg_debug_protocol ("read profile header, value: %T", hv_tok); task->flags |= RSPAMD_TASK_FLAG_PROFILE; } break; case 's': case 'S': IF_HEADER (SETTINGS_ID_HEADER) { - msg_debug_protocol ("read settings-id header, value: %V", hv); + msg_debug_protocol ("read settings-id header, value: %T", hv_tok); task->settings_elt = rspamd_config_find_settings_name_ref ( task->cfg, hv_tok->begin, hv_tok->len); @@ -492,15 +500,15 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, cur->name, cur->id); } - msg_warn_protocol ("unknown settings id: %V(%d); known_ids: %v", - hv, + msg_warn_protocol ("unknown settings id: %T(%d); known_ids: %v", + hv_tok, rspamd_config_name_to_id (hv_tok->begin, hv_tok->len), known_ids); g_string_free (known_ids, TRUE); } else { - msg_debug_protocol ("applied settings id %V -> %ud", hv, + msg_debug_protocol ("applied settings id %T -> %ud", hv_tok, task->settings_elt->id); } } @@ -512,7 +520,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, * We must ignore User header in case of spamc, as SA has * different meaning of this header */ - msg_debug_protocol ("read user header, value: %V", hv); + msg_debug_protocol ("read user header, value: %T", hv_tok); if (!RSPAMD_TASK_IS_SPAMC (task)) { task->user = rspamd_mempool_ftokdup (task->task_pool, hv_tok); @@ -522,7 +530,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, } } IF_HEADER (URLS_HEADER) { - msg_debug_protocol ("read urls header, value: %V", hv); + msg_debug_protocol ("read urls header, value: %T", hv_tok); srch.begin = "extended"; srch.len = 8; @@ -535,7 +543,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, /* TODO: add more formats there */ } IF_HEADER (USER_AGENT_HEADER) { - msg_debug_protocol ("read user-agent header, value: %V", hv); + msg_debug_protocol ("read user-agent header, value: %T", hv_tok); if (hv_tok->len == 6 && rspamd_lc_cmp (hv_tok->begin, "rspamc", 6) == 0) { @@ -546,7 +554,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, case 'l': case 'L': IF_HEADER (NO_LOG_HEADER) { - msg_debug_protocol ("read log header, value: %V", hv); + msg_debug_protocol ("read log header, value: %T", hv_tok); srch.begin = "no"; srch.len = 2; @@ -558,15 +566,9 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, case 'm': case 'M': IF_HEADER (MLEN_HEADER) { - msg_debug_protocol ("read message length header, value: %V", hv); - if (!rspamd_strtoul (hv_tok->begin, - hv_tok->len, - &task->message_len)) { - msg_err_protocol ("Invalid message length header: %V", hv); - } - else { - task->flags |= RSPAMD_TASK_FLAG_HAS_CONTROL; - } + msg_debug_protocol ("read message length header, value: %T", + hv_tok); + task->flags |= RSPAMD_TASK_FLAG_HAS_CONTROL; } IF_HEADER (MTA_TAG_HEADER) { gchar *mta_tag; @@ -586,18 +588,18 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, } IF_HEADER (MILTER_HEADER) { task->flags |= RSPAMD_TASK_FLAG_MILTER; - msg_debug_protocol ("read Milter header, value: %V", hv); + msg_debug_protocol ("read Milter header, value: %T", hv_tok); } break; case 't': case 'T': IF_HEADER (TLS_CIPHER_HEADER) { task->flags |= RSPAMD_TASK_FLAG_SSL; - msg_debug_protocol ("read TLS cipher header, value: %V", hv); + msg_debug_protocol ("read TLS cipher header, value: %T", hv_tok); } break; default: - msg_debug_protocol ("generic header: %V", hn); + msg_debug_protocol ("generic header: %T", hn_tok); break; } diff --git a/src/libserver/task.c b/src/libserver/task.c index 950af5ec8..54f2510a2 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -16,7 +16,8 @@ #include "task.h" #include "rspamd.h" #include "filter.h" -#include "protocol.h" +#include "libserver/protocol.h" +#include "libserver/protocol_internal.h" #include "message.h" #include "lua/lua_common.h" #include "email_addr.h" @@ -40,6 +41,10 @@ #include +__KHASH_IMPL (rspamd_req_headers_hash, static inline, + rspamd_ftok_t *, struct rspamd_request_header_chain *, 1, + rspamd_ftok_icase_hash, rspamd_ftok_icase_equal) + /* * Do not print more than this amount of elts */ @@ -51,23 +56,6 @@ rspamd_task_quark (void) return g_quark_from_static_string ("task-error"); } -static void -rspamd_request_header_dtor (gpointer p) -{ - GPtrArray *ar = p; - guint i; - rspamd_ftok_t *tok; - - if (ar) { - for (i = 0; i < ar->len; i ++) { - tok = g_ptr_array_index (ar, i); - rspamd_fstring_mapped_ftok_free (tok); - } - - g_ptr_array_free (ar, TRUE); - } -} - /* * Create new task */ @@ -123,13 +111,7 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg, new_task->task_pool = pool; } - new_task->request_headers = g_hash_table_new_full (rspamd_ftok_icase_hash, - rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, - rspamd_request_header_dtor); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->request_headers); - + new_task->request_headers = kh_init (rspamd_req_headers_hash); new_task->sock = -1; new_task->flags |= (RSPAMD_TASK_FLAG_MIME|RSPAMD_TASK_FLAG_JSON); new_task->result = rspamd_create_metric_result (new_task); @@ -314,6 +296,7 @@ rspamd_task_free (struct rspamd_task *task) REF_RELEASE (task->cfg); } + kh_destroy (rspamd_req_headers_hash, task->request_headers); rspamd_message_unref (task->message); if (task->flags & RSPAMD_TASK_FLAG_OWN_POOL) { @@ -647,15 +630,19 @@ rspamd_task_load_message (struct rspamd_task *task, } if (task->flags & RSPAMD_TASK_FLAG_HAS_CONTROL) { - /* We have control chunk, so we need to process it separately */ - if (task->msg.len < task->message_len) { + rspamd_ftok_t *hv = rspamd_task_get_request_header (task, MLEN_HEADER); + gulong message_len = 0; + + if (!hv || !rspamd_strtoul (hv->begin, hv->len, &message_len) || + task->msg.len < message_len) { msg_warn_task ("message has invalid message length: %ul and total len: %ul", - task->message_len, task->msg.len); + message_len, task->msg.len); g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, "Invalid length"); return FALSE; } - control_len = task->msg.len - task->message_len; + + control_len = task->msg.len - message_len; if (control_len > 0) { parser = ucl_parser_new (UCL_PARSER_KEY_LOWERCASE); @@ -1590,32 +1577,33 @@ rspamd_ftok_t * rspamd_task_get_request_header (struct rspamd_task *task, const gchar *name) { - GPtrArray *ret; - rspamd_ftok_t srch; - - srch.begin = (gchar *)name; - srch.len = strlen (name); - - ret = g_hash_table_lookup (task->request_headers, &srch); + struct rspamd_request_header_chain *ret = + rspamd_task_get_request_header_multiple (task, name); if (ret) { - return (rspamd_ftok_t *)g_ptr_array_index (ret, 0); + return ret->hdr; } return NULL; } -GPtrArray* +struct rspamd_request_header_chain * rspamd_task_get_request_header_multiple (struct rspamd_task *task, const gchar *name) { - GPtrArray *ret; + struct rspamd_request_header_chain *ret = NULL; rspamd_ftok_t srch; + khiter_t k; srch.begin = (gchar *)name; srch.len = strlen (name); - ret = g_hash_table_lookup (task->request_headers, &srch); + k = kh_get (rspamd_req_headers_hash, task->request_headers, + &srch); + + if (k != kh_end (task->request_headers)) { + ret = kh_value (task->request_headers, k); + } return ret; } @@ -1625,20 +1613,30 @@ void rspamd_task_add_request_header (struct rspamd_task *task, rspamd_ftok_t *name, rspamd_ftok_t *value) { - GPtrArray *ret; - ret = g_hash_table_lookup (task->request_headers, name); + khiter_t k; + gint res; + struct rspamd_request_header_chain *chain, *nchain; - if (ret) { - g_ptr_array_add (ret, value); + k = kh_put (rspamd_req_headers_hash, task->request_headers, + name, &res); + + if (res == 0) { + /* Existing name */ + nchain = rspamd_mempool_alloc (task->task_pool, sizeof (*nchain)); + nchain->hdr = value; + nchain->next = NULL; + chain = kh_value (task->request_headers, k); - /* We need to free name token */ - rspamd_fstring_mapped_ftok_free (name); + /* Slow but OK here */ + LL_APPEND (chain, nchain); } else { - ret = g_ptr_array_sized_new (2); - g_ptr_array_add (ret, value); - g_hash_table_replace (task->request_headers, name, ret); + nchain = rspamd_mempool_alloc (task->task_pool, sizeof (*nchain)); + nchain->hdr = value; + nchain->next = NULL; + + kh_value (task->request_headers, k) = nchain; } } diff --git a/src/libserver/task.h b/src/libserver/task.h index e0e1fc808..67f33488a 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -23,6 +23,7 @@ #include "mem_pool.h" #include "dns.h" #include "re_cache.h" +#include "khash.h" #ifdef __cplusplus extern "C" { @@ -121,6 +122,7 @@ enum rspamd_task_stage { #define RSPAMD_TASK_FLAG_MILTER (1u << 28u) #define RSPAMD_TASK_FLAG_SSL (1u << 29u) #define RSPAMD_TASK_FLAG_BAD_UNICODE (1u << 30u) +#define RSPAMD_TASK_FLAG_MESSAGE_REWRITE (1u << 31u) #define RSPAMD_TASK_IS_SKIPPED(task) (((task)->flags & RSPAMD_TASK_FLAG_SKIP)) #define RSPAMD_TASK_IS_JSON(task) (((task)->flags & RSPAMD_TASK_FLAG_JSON)) @@ -136,16 +138,28 @@ struct rspamd_lang_detector; enum rspamd_newlines_type; struct rspamd_message; +struct rspamd_task_data_storage { + const gchar *begin; + gsize len; + gchar *fpath; +}; + +struct rspamd_request_header_chain { + rspamd_ftok_t *hdr; + struct rspamd_request_header_chain *next; +}; + +__KHASH_TYPE (rspamd_req_headers_hash, rspamd_ftok_t *, struct rspamd_request_header_chain *) + /** * Worker task structure */ struct rspamd_task { struct rspamd_worker *worker; /**< pointer to worker object */ enum rspamd_command cmd; /**< command */ - gint sock; /**< socket descriptor */ - guint32 flags; /**< Bit flags */ - guint32 dns_requests; /**< number of DNS requests per this task */ - gulong message_len; /**< Message length */ + gint sock; /**< socket descriptor */ + guint32 dns_requests; /**< number of DNS requests per this task */ + guint32 flags; /**< Bit flags */ gchar *helo; /**< helo header value */ gchar *queue_id; /**< queue id if specified */ rspamd_inet_addr_t *from_addr; /**< from addr for a task */ @@ -153,12 +167,8 @@ struct rspamd_task { gchar *deliver_to; /**< address to deliver */ gchar *user; /**< user to deliver */ const gchar *hostname; /**< hostname reported by MTA */ - GHashTable *request_headers; /**< HTTP headers in a request */ - struct { - const gchar *begin; - gsize len; - gchar *fpath; - } msg; /**< message buffer */ + khash_t(rspamd_req_headers_hash) *request_headers; /**< HTTP headers in a request */ + struct rspamd_task_data_storage msg; /**< message buffer */ struct rspamd_http_connection *http_conn; /**< HTTP server connection */ struct rspamd_async_session *s; /**< async session object */ struct rspamd_metric_result *result; /**< Metric result */ @@ -308,8 +318,9 @@ rspamd_ftok_t *rspamd_task_get_request_header (struct rspamd_task *task, * @param name * @return */ -GPtrArray *rspamd_task_get_request_header_multiple (struct rspamd_task *task, - const gchar *name); +struct rspamd_request_header_chain *rspamd_task_get_request_header_multiple ( + struct rspamd_task *task, + const gchar *name); /** * Adds a new request header to task (name and value should be mapped to fstring) diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 0b2dd4ab3..6182d64fb 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -1476,6 +1476,7 @@ lua_task_set_message (lua_State * L) lua_pop (L, 1); } + task->flags |= RSPAMD_TASK_FLAG_MESSAGE_REWRITE; task->msg.begin = buf; task->msg.len = final_len; } @@ -1502,6 +1503,7 @@ lua_task_set_message (lua_State * L) if (buf) { task->msg.begin = buf; task->msg.len = final_len; + task->flags |= RSPAMD_TASK_FLAG_MESSAGE_REWRITE; } } @@ -4778,6 +4780,8 @@ lua_task_has_flag (lua_State *L) RSPAMD_TASK_FLAG_BAD_UNICODE); LUA_TASK_GET_FLAG (flag, "mime", RSPAMD_TASK_FLAG_MIME); + LUA_TASK_GET_FLAG (flag, "message_rewrite", + RSPAMD_TASK_FLAG_MESSAGE_REWRITE); if (!found) { msg_warn_task ("unknown flag requested: %s", flag); @@ -4853,6 +4857,10 @@ lua_task_get_flags (lua_State *L) lua_pushstring (L, "milter"); lua_rawseti (L, -2, idx++); break; + case RSPAMD_TASK_FLAG_MESSAGE_REWRITE: + lua_pushstring (L, "message_rewrite"); + lua_rawseti (L, -2, idx++); + break; default: break; } -- 2.39.5