From 973c12585f0e73f3413163758f8fe960be373cb0 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 17 Oct 2024 14:33:45 +0100 Subject: [PATCH] [Rework] Allow `Content-Encoding` standard header for zstd compression --- src/client/rspamdclient.c | 1 + src/libserver/protocol.c | 481 +++++++++++++++--------------- src/libserver/protocol_internal.h | 2 + src/libserver/task.c | 6 +- src/libserver/worker_util.c | 3 +- src/lua/lua_http.c | 3 +- 6 files changed, 253 insertions(+), 243 deletions(-) diff --git a/src/client/rspamdclient.c b/src/client/rspamdclient.c index bcb3cf67c..d07b24332 100644 --- a/src/client/rspamdclient.c +++ b/src/client/rspamdclient.c @@ -441,6 +441,7 @@ rspamd_client_command(struct rspamd_client_connection *conn, if (compressed) { rspamd_http_message_add_header(req->msg, COMPRESSION_HEADER, "zstd"); + rspamd_http_message_add_header(req->msg, CONTENT_ENCODING_HEADER, "zstd"); if (dict_id != 0) { char dict_str[32]; diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index a86111ff2..7d007370b 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -490,271 +490,271 @@ rspamd_protocol_handle_headers(struct rspamd_task *task, hv_tok->len = h->value.len; switch (*hn_tok->begin) { - case 'd': - case 'D': - IF_HEADER(DELIVER_TO_HEADER) - { - task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok); - msg_debug_protocol("read deliver-to header, value: %s", - task->deliver_to); - } - else - { - msg_debug_protocol("wrong header: %T", hn_tok); - } - break; - case 'h': - case 'H': - IF_HEADER(HELO_HEADER) - { - task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok); - msg_debug_protocol("read helo header, value: %s", task->helo); - } - IF_HEADER(HOSTNAME_HEADER) - { - task->hostname = rspamd_mempool_ftokdup(task->task_pool, - hv_tok); - msg_debug_protocol("read hostname header, value: %s", task->hostname); - } - break; - case 'f': - case 'F': - IF_HEADER(FROM_HEADER) - { - if (hv_tok->len == 0) { - /* Replace '' with '<>' to fix parsing issue */ - RSPAMD_FTOK_ASSIGN(hv_tok, "<>"); + case 'd': + case 'D': + IF_HEADER(DELIVER_TO_HEADER) + { + task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok); + msg_debug_protocol("read deliver-to header, value: %s", + task->deliver_to); } - task->from_envelope = rspamd_email_address_from_smtp( - hv_tok->begin, - hv_tok->len); - msg_debug_protocol("read from header, value: %T", hv_tok); - - if (!task->from_envelope) { - msg_err_protocol("bad from header: '%T'", hv_tok); - task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; + else + { + msg_debug_protocol("wrong header: %T", hn_tok); } - } - IF_HEADER(FILENAME_HEADER) - { - task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool, - hv_tok); - msg_debug_protocol("read filename header, value: %s", task->msg.fpath); - } - IF_HEADER(FLAGS_HEADER) - { - msg_debug_protocol("read flags header, value: %T", hv_tok); - rspamd_protocol_process_flags(task, hv_tok); - } - break; - case 'q': - case 'Q': - IF_HEADER(QUEUE_ID_HEADER) - { - task->queue_id = rspamd_mempool_ftokdup(task->task_pool, - hv_tok); - msg_debug_protocol("read queue_id header, value: %s", task->queue_id); - } - else - { - msg_debug_protocol("wrong header: %T", hn_tok); - } - break; - case 'r': - case 'R': - IF_HEADER(RCPT_HEADER) - { - rspamd_protocol_process_recipients(task, hv_tok); - msg_debug_protocol("read rcpt header, value: %T", hv_tok); - } - IF_HEADER(RAW_DATA_HEADER) - { - srch.begin = "yes"; - srch.len = 3; - - msg_debug_protocol("read raw data header, value: %T", hv_tok); + break; + case 'h': + case 'H': + IF_HEADER(HELO_HEADER) + { + task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok); + msg_debug_protocol("read helo header, value: %s", task->helo); + } + IF_HEADER(HOSTNAME_HEADER) + { + task->hostname = rspamd_mempool_ftokdup(task->task_pool, + hv_tok); + msg_debug_protocol("read hostname header, value: %s", task->hostname); + } + break; + case 'f': + case 'F': + IF_HEADER(FROM_HEADER) + { + if (hv_tok->len == 0) { + /* Replace '' with '<>' to fix parsing issue */ + RSPAMD_FTOK_ASSIGN(hv_tok, "<>"); + } + task->from_envelope = rspamd_email_address_from_smtp( + hv_tok->begin, + hv_tok->len); + msg_debug_protocol("read from header, value: %T", hv_tok); - if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { - task->flags &= ~RSPAMD_TASK_FLAG_MIME; - msg_debug_protocol("disable mime parsing"); + if (!task->from_envelope) { + msg_err_protocol("bad from header: '%T'", hv_tok); + task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; + } } - } - break; - case 'i': - case 'I': - IF_HEADER(IP_ADDR_HEADER) - { - if (!rspamd_parse_inet_address(&task->from_addr, - hv_tok->begin, hv_tok->len, - RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { - msg_err_protocol("bad ip header: '%T'", hv_tok); + IF_HEADER(FILENAME_HEADER) + { + task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool, + hv_tok); + msg_debug_protocol("read filename header, value: %s", task->msg.fpath); } - else { - msg_debug_protocol("read IP header, value: %T", hv_tok); - has_ip = TRUE; + IF_HEADER(FLAGS_HEADER) + { + msg_debug_protocol("read flags header, value: %T", hv_tok); + rspamd_protocol_process_flags(task, hv_tok); } - } - else - { - msg_debug_protocol("wrong header: %T", hn_tok); - } - break; - case 'p': - case 'P': - IF_HEADER(PASS_HEADER) - { - srch.begin = "all"; - srch.len = 3; + break; + case 'q': + case 'Q': + IF_HEADER(QUEUE_ID_HEADER) + { + task->queue_id = rspamd_mempool_ftokdup(task->task_pool, + hv_tok); + msg_debug_protocol("read queue_id header, value: %s", task->queue_id); + } + else + { + msg_debug_protocol("wrong header: %T", hn_tok); + } + break; + case 'r': + case 'R': + IF_HEADER(RCPT_HEADER) + { + rspamd_protocol_process_recipients(task, hv_tok); + msg_debug_protocol("read rcpt header, value: %T", hv_tok); + } + IF_HEADER(RAW_DATA_HEADER) + { + srch.begin = "yes"; + srch.len = 3; - msg_debug_protocol("read pass header, value: %T", hv_tok); + msg_debug_protocol("read raw data header, value: %T", hv_tok); - if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { - task->flags |= RSPAMD_TASK_FLAG_PASS_ALL; - msg_debug_protocol("pass all filters"); + if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { + task->flags &= ~RSPAMD_TASK_FLAG_MIME; + msg_debug_protocol("disable mime parsing"); + } } - } - IF_HEADER(PROFILE_HEADER) - { - msg_debug_protocol("read profile header, value: %T", hv_tok); - task->flags |= RSPAMD_TASK_FLAG_PROFILE; - } - break; - case 's': - case 'S': - IF_HEADER(SETTINGS_ID_HEADER) - { - msg_debug_protocol("read settings-id header, value: %T", hv_tok); - task->settings_elt = rspamd_config_find_settings_name_ref( - task->cfg, hv_tok->begin, hv_tok->len); - - if (task->settings_elt == NULL) { - GString *known_ids = g_string_new(NULL); - struct rspamd_config_settings_elt *cur; - - DL_FOREACH(task->cfg->setting_ids, cur) - { - rspamd_printf_gstring(known_ids, "%s(%ud);", - cur->name, cur->id); + break; + case 'i': + case 'I': + IF_HEADER(IP_ADDR_HEADER) + { + if (!rspamd_parse_inet_address(&task->from_addr, + hv_tok->begin, hv_tok->len, + RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { + msg_err_protocol("bad ip header: '%T'", hv_tok); + } + else { + msg_debug_protocol("read IP header, value: %T", hv_tok); + has_ip = TRUE; } + } + else + { + msg_debug_protocol("wrong header: %T", hn_tok); + } + break; + case 'p': + case 'P': + IF_HEADER(PASS_HEADER) + { + srch.begin = "all"; + srch.len = 3; + + msg_debug_protocol("read pass header, value: %T", hv_tok); + + if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { + task->flags |= RSPAMD_TASK_FLAG_PASS_ALL; + msg_debug_protocol("pass all filters"); + } + } + IF_HEADER(PROFILE_HEADER) + { + msg_debug_protocol("read profile header, value: %T", hv_tok); + task->flags |= RSPAMD_TASK_FLAG_PROFILE; + } + break; + case 's': + case 'S': + IF_HEADER(SETTINGS_ID_HEADER) + { + msg_debug_protocol("read settings-id header, value: %T", hv_tok); + task->settings_elt = rspamd_config_find_settings_name_ref( + task->cfg, hv_tok->begin, hv_tok->len); + + if (task->settings_elt == NULL) { + GString *known_ids = g_string_new(NULL); + struct rspamd_config_settings_elt *cur; + + DL_FOREACH(task->cfg->setting_ids, cur) + { + rspamd_printf_gstring(known_ids, "%s(%ud);", + cur->name, cur->id); + } - msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v", - hv_tok, - rspamd_config_name_to_id(hv_tok->begin, hv_tok->len), - known_ids); + msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v", + hv_tok, + rspamd_config_name_to_id(hv_tok->begin, hv_tok->len), + known_ids); - g_string_free(known_ids, TRUE); + g_string_free(known_ids, TRUE); + } + else { + msg_debug_protocol("applied settings id %T -> %ud", hv_tok, + task->settings_elt->id); + } } - else { - msg_debug_protocol("applied settings id %T -> %ud", hv_tok, - task->settings_elt->id); + IF_HEADER(SETTINGS_HEADER) + { + msg_debug_protocol("read settings header, value: %T", hv_tok); + seen_settings_header = TRUE; } - } - IF_HEADER(SETTINGS_HEADER) - { - msg_debug_protocol("read settings header, value: %T", hv_tok); - seen_settings_header = TRUE; - } - break; - case 'u': - case 'U': - IF_HEADER(USER_HEADER) - { - /* + break; + case 'u': + case 'U': + IF_HEADER(USER_HEADER) + { + /* * We must ignore User header in case of spamc, as SA has * different meaning of this header */ - msg_debug_protocol("read user header, value: %T", hv_tok); - if (!RSPAMD_TASK_IS_SPAMC(task)) { - task->auth_user = rspamd_mempool_ftokdup(task->task_pool, - hv_tok); - } - else { - msg_info_protocol("ignore user header: legacy SA protocol"); + msg_debug_protocol("read user header, value: %T", hv_tok); + if (!RSPAMD_TASK_IS_SPAMC(task)) { + task->auth_user = rspamd_mempool_ftokdup(task->task_pool, + hv_tok); + } + else { + msg_info_protocol("ignore user header: legacy SA protocol"); + } } - } - IF_HEADER(URLS_HEADER) - { - msg_debug_protocol("read urls header, value: %T", hv_tok); + IF_HEADER(URLS_HEADER) + { + msg_debug_protocol("read urls header, value: %T", hv_tok); - srch.begin = "extended"; - srch.len = 8; + srch.begin = "extended"; + srch.len = 8; - if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { - task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS; - msg_debug_protocol("extended urls information"); - } - - /* TODO: add more formats there */ - } - IF_HEADER(USER_AGENT_HEADER) - { - msg_debug_protocol("read user-agent header, value: %T", hv_tok); + if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS; + msg_debug_protocol("extended urls information"); + } - if (hv_tok->len == 6 && - rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) { - task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT; + /* TODO: add more formats there */ } - } - break; - case 'l': - case 'L': - IF_HEADER(NO_LOG_HEADER) - { - msg_debug_protocol("read log header, value: %T", hv_tok); - srch.begin = "no"; - srch.len = 2; + IF_HEADER(USER_AGENT_HEADER) + { + msg_debug_protocol("read user-agent header, value: %T", hv_tok); - if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { - task->flags |= RSPAMD_TASK_FLAG_NO_LOG; + if (hv_tok->len == 6 && + rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) { + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT; + } } - } - IF_HEADER(LOG_TAG_HEADER) - { - msg_debug_protocol("read log-tag header, value: %T", hv_tok); - /* Ensure that a tag is valid */ - if (rspamd_fast_utf8_validate(hv_tok->begin, hv_tok->len) == 0) { - memcpy(task->task_pool->tag.uid, hv_tok->begin, - MIN(hv_tok->len, sizeof(task->task_pool->tag.uid))); + break; + case 'l': + case 'L': + IF_HEADER(NO_LOG_HEADER) + { + msg_debug_protocol("read log header, value: %T", hv_tok); + srch.begin = "no"; + srch.len = 2; + + if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { + task->flags |= RSPAMD_TASK_FLAG_NO_LOG; + } } - } - break; - case 'm': - case 'M': - IF_HEADER(MTA_TAG_HEADER) - { - char *mta_tag; - mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok); - rspamd_mempool_set_variable(task->task_pool, - RSPAMD_MEMPOOL_MTA_TAG, - mta_tag, NULL); - msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag); - } - IF_HEADER(MTA_NAME_HEADER) - { - char *mta_name; - mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok); - rspamd_mempool_set_variable(task->task_pool, - RSPAMD_MEMPOOL_MTA_NAME, - mta_name, NULL); - msg_debug_protocol("read MTA-Name header, value: %s", mta_name); - } - IF_HEADER(MILTER_HEADER) - { - task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER; - msg_debug_protocol("read Milter header, value: %T", hv_tok); - } - break; - case 't': - case 'T': - IF_HEADER(TLS_CIPHER_HEADER) - { - task->flags |= RSPAMD_TASK_FLAG_SSL; - msg_debug_protocol("read TLS cipher header, value: %T", hv_tok); - } - break; - default: - msg_debug_protocol("generic header: %T", hn_tok); - break; + IF_HEADER(LOG_TAG_HEADER) + { + msg_debug_protocol("read log-tag header, value: %T", hv_tok); + /* Ensure that a tag is valid */ + if (rspamd_fast_utf8_validate(hv_tok->begin, hv_tok->len) == 0) { + memcpy(task->task_pool->tag.uid, hv_tok->begin, + MIN(hv_tok->len, sizeof(task->task_pool->tag.uid))); + } + } + break; + case 'm': + case 'M': + IF_HEADER(MTA_TAG_HEADER) + { + char *mta_tag; + mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok); + rspamd_mempool_set_variable(task->task_pool, + RSPAMD_MEMPOOL_MTA_TAG, + mta_tag, NULL); + msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag); + } + IF_HEADER(MTA_NAME_HEADER) + { + char *mta_name; + mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok); + rspamd_mempool_set_variable(task->task_pool, + RSPAMD_MEMPOOL_MTA_NAME, + mta_name, NULL); + msg_debug_protocol("read MTA-Name header, value: %s", mta_name); + } + IF_HEADER(MILTER_HEADER) + { + task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER; + msg_debug_protocol("read Milter header, value: %T", hv_tok); + } + break; + case 't': + case 'T': + IF_HEADER(TLS_CIPHER_HEADER) + { + task->flags |= RSPAMD_TASK_FLAG_SSL; + msg_debug_protocol("read TLS cipher header, value: %T", hv_tok); + } + break; + default: + msg_debug_protocol("generic header: %T", hn_tok); + break; } rspamd_task_add_request_header (task, hn_tok, hv_tok); @@ -1716,6 +1716,7 @@ void rspamd_protocol_http_reply(struct rspamd_http_message *msg, rspamd_fstring_free(reply); rspamd_http_message_set_body_from_fstring_steal(msg, compressed_reply); rspamd_http_message_add_header(msg, COMPRESSION_HEADER, "zstd"); + rspamd_http_message_add_header(msg, CONTENT_ENCODING_HEADER, "zstd"); if (task->cfg->libs_ctx->out_dict && task->cfg->libs_ctx->out_dict->id != 0) { diff --git a/src/libserver/protocol_internal.h b/src/libserver/protocol_internal.h index 11f21430e..5582908c2 100644 --- a/src/libserver/protocol_internal.h +++ b/src/libserver/protocol_internal.h @@ -93,6 +93,8 @@ extern "C" { #define RAW_DATA_HEADER "Raw" #define COMPRESSION_HEADER "Compression" #define MESSAGE_OFFSET_HEADER "Message-Offset" +#define CONTENT_ENCODING_HEADER "Content-Encoding" +#define ACCEPT_ENCODING_HEADER "Accept-Enconding" #ifdef __cplusplus } diff --git a/src/libserver/task.c b/src/libserver/task.c index 833046470..bd1e07549 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -519,7 +519,11 @@ rspamd_task_load_message(struct rspamd_task *task, debug_task("got input of length %z", task->msg.len); /* Check compression */ - tok = rspamd_task_get_request_header(task, "compression"); + tok = rspamd_task_get_request_header(task, COMPRESSION_HEADER); + + if (!tok) { + tok = rspamd_task_get_request_header(task, CONTENT_ENCODING_HEADER); + } if (tok) { /* Need to uncompress */ diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c index 383d89c14..75836573f 100644 --- a/src/libserver/worker_util.c +++ b/src/libserver/worker_util.c @@ -57,6 +57,7 @@ #include "contrib/libev/ev.h" #include "libstat/stat_api.h" +#include "libserver/protocol_internal.h" struct rspamd_worker *rspamd_current_worker = NULL; @@ -600,7 +601,7 @@ rspamd_controller_maybe_compress(struct rspamd_http_connection_entry *entry, { if (entry->support_gzip) { if (rspamd_fstring_gzip(&buf)) { - rspamd_http_message_add_header(msg, "Content-Encoding", "gzip"); + rspamd_http_message_add_header(msg, CONTENT_ENCODING_HEADER, "gzip"); } } diff --git a/src/lua/lua_http.c b/src/lua/lua_http.c index 8ba612c1b..904f1cbbf 100644 --- a/src/lua/lua_http.c +++ b/src/lua/lua_http.c @@ -21,6 +21,7 @@ #include "unix-std.h" #include "zlib.h" #include "utlist.h" +#include "libserver/protocol_internal.h" /*** * @module rspamd_http @@ -1107,7 +1108,7 @@ lua_http_request(lua_State *L) if (body) { if (gzip) { if (rspamd_fstring_gzip(&body)) { - rspamd_http_message_add_header(msg, "Content-Encoding", "gzip"); + rspamd_http_message_add_header(msg, CONTENT_ENCODING_HEADER, "gzip"); } } -- 2.39.5