diff options
-rw-r--r-- | conf/options.inc | 3 | ||||
-rw-r--r-- | rules/regexp/headers.lua | 21 | ||||
-rw-r--r-- | src/client/rspamc.cxx | 2 | ||||
-rw-r--r-- | src/fuzzy_storage.c | 6 | ||||
-rw-r--r-- | src/libserver/cfg_file.h | 1 | ||||
-rw-r--r-- | src/libserver/cfg_rcl.cxx | 8 | ||||
-rw-r--r-- | src/libserver/cfg_utils.cxx | 1 | ||||
-rw-r--r-- | src/libserver/protocol.c | 120 | ||||
-rw-r--r-- | src/libserver/protocol_internal.h | 1 | ||||
-rw-r--r-- | src/libserver/task.c | 46 | ||||
-rw-r--r-- | src/libserver/task.h | 3 | ||||
-rw-r--r-- | src/lua/lua_config.c | 24 | ||||
-rw-r--r-- | src/plugins/lua/milter_headers.lua | 11 | ||||
-rw-r--r-- | src/ragel/content_disposition.rl | 2 | ||||
-rw-r--r-- | src/ragel/smtp_address.rl | 1 | ||||
-rw-r--r-- | src/ragel/smtp_base.rl | 25 | ||||
-rw-r--r-- | test/functional/lib/rspamd.robot | 2 |
17 files changed, 88 insertions, 189 deletions
diff --git a/conf/options.inc b/conf/options.inc index d5cf60d7b..fd9b662ab 100644 --- a/conf/options.inc +++ b/conf/options.inc @@ -70,3 +70,6 @@ task_timeout = 8s; # Emit soft reject when timeout takes place soft_reject_on_timeout = false; + +# Use utf8 mode for mime operations (use if your MTA announces SMTPUTF8 support) +enable_mime_utf = false; diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua index ef4d532bf..44fa964e3 100644 --- a/rules/regexp/headers.lua +++ b/rules/regexp/headers.lua @@ -53,9 +53,22 @@ reconf['TO_NEEDS_ENCODING'] = { score = 1.0, mime_only = true, description = 'To header needs encoding', - group = 'headers' + group = 'headers', } +if rspamd_config:is_mime_utf8() then + -- Disable some of the rules preserving the underlying logic + reconf['FROM_NEEDS_ENCODING'].condition = function() + return false + end + reconf['TO_NEEDS_ENCODING'].condition = function() + return false + end + reconf['SUBJECT_NEEDS_ENCODING'].condition = function() + return false + end +end + -- Detects that there is no space in From header (e.g. Some Name<some@host>) reconf['R_NO_SPACE_IN_FROM'] = { re = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X', @@ -713,12 +726,6 @@ reconf['HEADER_DATE_EMPTY_DELIMITER'] = { } -- Definitions of received headers regexp -reconf['RCVD_ILLEGAL_CHARS'] = { - re = 'Received=/[\\x80-\\xff]/X', - score = 4.0, - description = 'Received header has raw illegal character', - group = 'headers' -} local MAIL_RU_Return_Path = 'Return-path=/^\\s*<.+\\@mail\\.ru>$/iX' local MAIL_RU_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@mail\\.ru>$/iX' diff --git a/src/client/rspamc.cxx b/src/client/rspamc.cxx index 1c67e4167..31a4aaf24 100644 --- a/src/client/rspamc.cxx +++ b/src/client/rspamc.cxx @@ -31,6 +31,7 @@ #include <cstdint> #include <cstdio> #include <cmath> +#include <locale> #include "frozen/string.h" #include "frozen/unordered_map.h" @@ -2180,6 +2181,7 @@ int main(int argc, char **argv, char **env) { auto *kwattrs = g_queue_new(); + std::locale::global(std::locale("")); read_cmd_line(&argc, &argv); tty = isatty(STDOUT_FILENO); diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index 3064d45a0..f21992a94 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -2475,6 +2475,12 @@ rspamd_fuzzy_stat_to_ucl(struct rspamd_fuzzy_storage_ctx *ctx, gboolean ip_stat) rspamd_fuzzy_key_stat_iter(pk_iter, fuzzy_key, keys_obj, ip_stat); }); + if (ctx->dynamic_keys) { + kh_foreach(ctx->dynamic_keys, pk_iter, fuzzy_key, { + rspamd_fuzzy_key_stat_iter(pk_iter, fuzzy_key, keys_obj, ip_stat); + }); + } + ucl_object_insert_key(obj, keys_obj, "keys", 0, false); /* Now generic stats */ diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index fa784f2a2..f59c6ff89 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -367,6 +367,7 @@ struct rspamd_config { gboolean public_groups_only; /**< Output merely public groups everywhere */ enum rspamd_gtube_patterns_policy gtube_patterns_policy; /**< Enable test patterns */ gboolean enable_css_parser; /**< Enable css parsing in HTML */ + gboolean enable_mime_utf; /**< Enable utf8 mime parsing */ gsize max_cores_size; /**< maximum size occupied by rspamd core files */ gsize max_cores_count; /**< maximum number of core files */ diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx index 270678491..79509e12e 100644 --- a/src/libserver/cfg_rcl.cxx +++ b/src/libserver/cfg_rcl.cxx @@ -1929,7 +1929,13 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections) rspamd_rcl_parse_struct_boolean, G_STRUCT_OFFSET(struct rspamd_config, enable_css_parser), 0, - "Enable CSS parser (experimental)"); + "Enable CSS parser"); + rspamd_rcl_add_default_handler(sub, + "enable_mime_utf", + rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET(struct rspamd_config, enable_mime_utf), + 0, + "Enable UTF8 mode for mime"); rspamd_rcl_add_default_handler(sub, "enable_experimental", rspamd_rcl_parse_struct_boolean, diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx index d8696e72d..38adf8390 100644 --- a/src/libserver/cfg_utils.cxx +++ b/src/libserver/cfg_utils.cxx @@ -341,6 +341,7 @@ rspamd_config_new(enum rspamd_config_init_flags flags) cfg->heartbeat_interval = 10.0; cfg->enable_css_parser = true; + cfg->enable_mime_utf = false; cfg->script_modules = g_ptr_array_new(); REF_INIT_RETAIN(cfg, rspamd_config_free); diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index ee2192913..a86111ff2 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -720,12 +720,6 @@ rspamd_protocol_handle_headers(struct rspamd_task *task, break; case 'm': case 'M': - IF_HEADER(MLEN_HEADER) - { - msg_debug_protocol("read message length header, value: %T", - hv_tok); - task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL; - } IF_HEADER(MTA_TAG_HEADER) { char *mta_tag; @@ -782,120 +776,6 @@ if (!has_ip) { return TRUE; } -#define BOOL_TO_FLAG(val, flags, flag) \ - do { \ - if ((val)) (flags) |= (flag); \ - else \ - (flags) &= ~(flag); \ - } while (0) - -gboolean -rspamd_protocol_parse_task_flags(rspamd_mempool_t *pool, - const ucl_object_t *obj, - gpointer ud, - struct rspamd_rcl_section *section, - GError **err) -{ - struct rspamd_rcl_struct_parser *pd = ud; - int *target; - const char *key; - gboolean value; - - target = (int *) (((char *) pd->user_struct) + pd->offset); - key = ucl_object_key(obj); - value = ucl_object_toboolean(obj); - - if (key != NULL) { - if (g_ascii_strcasecmp(key, "pass_all") == 0) { - BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_PASS_ALL); - } - else if (g_ascii_strcasecmp(key, "no_log") == 0) { - BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_NO_LOG); - } - } - - return TRUE; -} - -static struct rspamd_rcl_sections_map *control_parser = NULL; - -RSPAMD_CONSTRUCTOR(rspamd_protocol_control_parser_ctor) -{ - - struct rspamd_rcl_section *sub = rspamd_rcl_add_section(&control_parser, NULL, - "*", - NULL, - NULL, - UCL_OBJECT, - FALSE, - TRUE); - /* Default handlers */ - rspamd_rcl_add_default_handler(sub, - "ip", - rspamd_rcl_parse_struct_addr, - G_STRUCT_OFFSET(struct rspamd_task, from_addr), - 0, - NULL); - rspamd_rcl_add_default_handler(sub, - "from", - rspamd_rcl_parse_struct_mime_addr, - G_STRUCT_OFFSET(struct rspamd_task, from_envelope), - 0, - NULL); - rspamd_rcl_add_default_handler(sub, - "rcpt", - rspamd_rcl_parse_struct_mime_addr, - G_STRUCT_OFFSET(struct rspamd_task, rcpt_envelope), - 0, - NULL); - rspamd_rcl_add_default_handler(sub, - "helo", - rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET(struct rspamd_task, helo), - 0, - NULL); - rspamd_rcl_add_default_handler(sub, - "user", - rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET(struct rspamd_task, auth_user), - 0, - NULL); - rspamd_rcl_add_default_handler(sub, - "pass_all", - rspamd_protocol_parse_task_flags, - G_STRUCT_OFFSET(struct rspamd_task, flags), - 0, - NULL); - rspamd_rcl_add_default_handler(sub, - "json", - rspamd_protocol_parse_task_flags, - G_STRUCT_OFFSET(struct rspamd_task, flags), - 0, - NULL); -} - -RSPAMD_DESTRUCTOR(rspamd_protocol_control_parser_dtor) -{ - rspamd_rcl_sections_free(control_parser); -} - -gboolean -rspamd_protocol_handle_control(struct rspamd_task *task, - const ucl_object_t *control) -{ - GError *err = NULL; - - if (!rspamd_rcl_parse(control_parser, task->cfg, task, task->task_pool, - control, &err)) { - msg_warn_protocol("cannot parse control block: %e", err); - g_error_free(err); - - return FALSE; - } - - return TRUE; -} - gboolean rspamd_protocol_handle_request(struct rspamd_task *task, struct rspamd_http_message *msg) diff --git a/src/libserver/protocol_internal.h b/src/libserver/protocol_internal.h index e55e54851..11f21430e 100644 --- a/src/libserver/protocol_internal.h +++ b/src/libserver/protocol_internal.h @@ -79,7 +79,6 @@ extern "C" { #define DELIVER_TO_HEADER "Deliver-To" #define NO_LOG_HEADER "Log" #define LOG_TAG_HEADER "Log-Tag" -#define MLEN_HEADER "Message-Length" #define USER_AGENT_HEADER "User-Agent" #define MTA_TAG_HEADER "MTA-Tag" #define PROFILE_HEADER "Profile" diff --git a/src/libserver/task.c b/src/libserver/task.c index 637f401a9..833046470 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -323,9 +323,6 @@ gboolean rspamd_task_load_message(struct rspamd_task *task, struct rspamd_http_message *msg, const char *start, gsize len) { - unsigned int control_len, r; - struct ucl_parser *parser; - ucl_object_t *control_obj; char filepath[PATH_MAX], *fp; int fd, flen; gulong offset = 0, shmem_size = 0; @@ -349,8 +346,8 @@ rspamd_task_load_message(struct rspamd_task *task, if (tok) { /* Shared memory part */ - r = rspamd_strlcpy(filepath, tok->begin, - MIN(sizeof(filepath), tok->len + 1)); + size_t r = rspamd_strlcpy(filepath, tok->begin, + MIN(sizeof(filepath), tok->len + 1)); rspamd_url_decode(filepath, filepath, r + 1); flen = strlen(filepath); @@ -448,8 +445,8 @@ rspamd_task_load_message(struct rspamd_task *task, if (tok) { debug_task("want to scan file %T", tok); - r = rspamd_strlcpy(filepath, tok->begin, - MIN(sizeof(filepath), tok->len + 1)); + size_t r = rspamd_strlcpy(filepath, tok->begin, + MIN(sizeof(filepath), tok->len + 1)); rspamd_url_decode(filepath, filepath, r + 1); flen = strlen(filepath); @@ -626,41 +623,6 @@ rspamd_task_load_message(struct rspamd_task *task, task->flags |= RSPAMD_TASK_FLAG_EMPTY; } - if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL) { - rspamd_ftok_t *hv = rspamd_task_get_request_header(task, MLEN_HEADER); - gulong message_len = 0; - - if (!hv || !rspamd_strtoul(hv->begin, hv->len, &message_len) || - task->msg.len < message_len) { - msg_warn_task("message has invalid message length: %ul and total len: %ul", - message_len, task->msg.len); - g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, - "Invalid length"); - return FALSE; - } - - control_len = task->msg.len - message_len; - - if (control_len > 0) { - parser = ucl_parser_new(UCL_PARSER_KEY_LOWERCASE); - - if (!ucl_parser_add_chunk(parser, task->msg.begin, control_len)) { - msg_warn_task("processing of control chunk failed: %s", - ucl_parser_get_error(parser)); - ucl_parser_free(parser); - } - else { - control_obj = ucl_parser_get_object(parser); - ucl_parser_free(parser); - rspamd_protocol_handle_control(task, control_obj); - ucl_object_unref(control_obj); - } - - task->msg.begin += control_len; - task->msg.len -= control_len; - } - } - return TRUE; } diff --git a/src/libserver/task.h b/src/libserver/task.h index 7e6341a84..6be350098 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -115,9 +115,6 @@ enum rspamd_task_stage { #define RSPAMD_TASK_FLAG_MESSAGE_REWRITE (1u << 24u) #define RSPAMD_TASK_FLAG_MAX_SHIFT (24u) - -/* Request has a JSON control block */ -#define RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL (1u << 0u) /* Request has been done by a local client */ #define RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT (1u << 1u) /* Request has been sent via milter */ diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index be4dd7081..0b4d208b4 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -795,6 +795,13 @@ LUA_FUNCTION_DEF(config, get_cpu_flags); LUA_FUNCTION_DEF(config, has_torch); /*** + * @method rspamd_config:is_mime_utf8() + * Returns true if Rspamd is configured to use UTF for mime processing + * @return {boolean} true if mime utf is enabled + */ +LUA_FUNCTION_DEF(config, is_mime_utf8); + +/*** * @method rspamd_config:experimental_enabled() * Returns true if experimental plugins are enabled * @return {boolean} true if experimental plugins are enabled @@ -921,6 +928,7 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, set_peak_cb), LUA_INTERFACE_DEF(config, get_cpu_flags), LUA_INTERFACE_DEF(config, has_torch), + LUA_INTERFACE_DEF(config, is_mime_utf8), LUA_INTERFACE_DEF(config, experimental_enabled), LUA_INTERFACE_DEF(config, load_ucl), LUA_INTERFACE_DEF(config, parse_rcl), @@ -4229,6 +4237,22 @@ lua_config_has_torch(lua_State *L) } static int +lua_config_is_mime_utf8(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg != NULL) { + lua_pushboolean(L, cfg->enable_mime_utf); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int lua_config_experimental_enabled(lua_State *L) { LUA_TRACE_POINT; diff --git a/src/plugins/lua/milter_headers.lua b/src/plugins/lua/milter_headers.lua index b53a45457..58a227fec 100644 --- a/src/plugins/lua/milter_headers.lua +++ b/src/plugins/lua/milter_headers.lua @@ -22,7 +22,7 @@ end -- A plugin that provides common header manipulations local logger = require "rspamd_logger" -local util = require "rspamd_util" +local rspamd_util = require "rspamd_util" local N = 'milter_headers' local lua_util = require "lua_util" local lua_maps = require "lua_maps" @@ -30,7 +30,7 @@ local lua_mime = require "lua_mime" local ts = require("tableshape").types local E = {} -local HOSTNAME = util.get_hostname() +local HOSTNAME = rspamd_util.get_hostname() local settings = { remove_upstream_spam_flag = true; @@ -213,6 +213,13 @@ local function milter_headers(task) if not add[hname] then add[hname] = {} end + if rspamd_config:is_mime_utf8() then + if not rspamd_util.is_valid_utf8(value) then + value = rspamd_util.mime_header_encode(value) + end + else + value = rspamd_util.mime_header_encode(value) + end table.insert(add[hname], { order = (order or settings.default_headers_order or -1), value = lua_util.fold_header(task, hname, value, stop_chars) diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl index 862015ea1..93d3c9d3d 100644 --- a/src/ragel/content_disposition.rl +++ b/src/ragel/content_disposition.rl @@ -7,7 +7,7 @@ balanced_ccontent := ccontent* ')' @{ fret; }; comment = "(" (FWS? ccontent)* FWS? ")"; CFWS = ((FWS? comment)+ FWS?) | FWS; - qcontent = qtextSMTP | quoted_pairSMTP | textUTF8; + qcontent = qtextSMTP | quoted_pairSMTP; quoted_string = CFWS? (DQUOTE (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End) diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl index 0caf1a65e..eb0fc2d9d 100644 --- a/src/ragel/smtp_address.rl +++ b/src/ragel/smtp_address.rl @@ -24,6 +24,7 @@ # SMTP address spec # Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2 + # Additions from rfc6532 (smtputf8): https://tools.ietf.org/html/rfc6532#section-3.2 QcontentSMTP = qtextSMTP | quoted_pairSMTP %User_has_backslash; Quoted_string = ( DQUOTE QcontentSMTP* >User_start %User_end DQUOTE ) %Quoted_addr; diff --git a/src/ragel/smtp_base.rl b/src/ragel/smtp_base.rl index cb4f066bc..eefc430d5 100644 --- a/src/ragel/smtp_base.rl +++ b/src/ragel/smtp_base.rl @@ -9,26 +9,27 @@ CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n"); DQUOTE = '"'; + utf8_cont = 0x80..0xbf; + utf8_2c = 0xc0..0xdf utf8_cont; + utf8_3c = 0xe0..0xef utf8_cont utf8_cont; + utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont; + UTF8_non_ascii = utf8_2c | utf8_3c | utf8_4c; + # Printable US-ASCII characters not including specials atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" | - "-" | "`" | "{" | "|" | "}" | "~"; + "-" | "`" | "{" | "|" | "}" | "~" | UTF8_non_ascii; # Printable US-ASCII characters not including "[", "]", or "\" - dtext = 33..90 | 94..126; + dtext = 33..90 | 94..126 | UTF8_non_ascii; # Printable US-ASCII characters not including "(", ")", or "\" - ctext = 33..39 | 42..91 | 93..126; + ctext = 33..39 | 42..91 | 93..126 | UTF8_non_ascii; - dcontent = 33..90 | 94..126; - Let_dig = alpha | digit; - Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig; + dcontent = 33..90 | 94..126 | UTF8_non_ascii; + Let_dig = alpha | digit | UTF8_non_ascii; + Ldh_str = ( Let_dig | "_" | "-" )* Let_dig; quoted_pairSMTP = "\\" 32..126; - qtextSMTP = 32..33 | 35..91 | 93..126; - utf8_cont = 0x80..0xbf; - utf8_2c = 0xc0..0xdf utf8_cont; - utf8_3c = 0xe0..0xef utf8_cont utf8_cont; - utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont; - textUTF8 = qtextSMTP | utf8_2c | utf8_3c | utf8_4c; + qtextSMTP = 32..33 | 35..91 | 93..126 | UTF8_non_ascii; Atom = atext+; Dot_string = Atom ("." Atom)*; dot_atom_text = atext+ ("." atext+)*; diff --git a/test/functional/lib/rspamd.robot b/test/functional/lib/rspamd.robot index de4e5285f..68bcb66fb 100644 --- a/test/functional/lib/rspamd.robot +++ b/test/functional/lib/rspamd.robot @@ -106,6 +106,8 @@ Expect Added Header ... msg=add_headers block was not present in protocol response Dictionary Should Contain Key ${SCAN_RESULT}[milter][add_headers] ${header_name} ... msg=${header_name} was not added + Dictionary Should Contain Key ${SCAN_RESULT}[milter][add_headers][${header_name}] value + ... msg=no value field in ${header_name} index: ${SCAN_RESULT}[milter][add_headers][${header_name}] Should Be Equal ${SCAN_RESULT}[milter][add_headers][${header_name}][value] ${header_value} Should Be Equal as Numbers ${SCAN_RESULT}[milter][add_headers][${header_name}][order] ${pos} |