diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-09-27 16:53:57 +0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-27 16:53:57 +0600 |
commit | 705f647b879a8c29219faa549862864fee8375c6 (patch) | |
tree | 4221c6d163d6bce5bc9066667c50fcfe9a88b363 | |
parent | 0b397e439ffdaa58ca709db91ac51d30c80cbd50 (diff) | |
parent | f8700e56ec5659f331f0aca0b28eec43a4e7cb33 (diff) | |
download | rspamd-705f647b879a8c29219faa549862864fee8375c6.tar.gz rspamd-705f647b879a8c29219faa549862864fee8375c6.zip |
Merge pull request #5056 from rspamd/vstakhov-utf8-mime
[Feature] MIME UTF8 support
-rw-r--r-- | conf/options.inc | 3 | ||||
-rw-r--r-- | rules/regexp/headers.lua | 21 | ||||
-rw-r--r-- | src/libserver/cfg_file.h | 1 | ||||
-rw-r--r-- | src/libserver/cfg_rcl.cxx | 8 | ||||
-rw-r--r-- | src/libserver/cfg_utils.cxx | 1 | ||||
-rw-r--r-- | src/lua/lua_config.c | 24 | ||||
-rw-r--r-- | src/plugins/lua/milter_headers.lua | 11 | ||||
-rw-r--r-- | src/ragel/content_disposition.rl | 2 | ||||
-rw-r--r-- | src/ragel/smtp_address.rl | 1 | ||||
-rw-r--r-- | src/ragel/smtp_base.rl | 25 |
10 files changed, 74 insertions, 23 deletions
diff --git a/conf/options.inc b/conf/options.inc index d5cf60d7b..fd9b662ab 100644 --- a/conf/options.inc +++ b/conf/options.inc @@ -70,3 +70,6 @@ task_timeout = 8s; # Emit soft reject when timeout takes place soft_reject_on_timeout = false; + +# Use utf8 mode for mime operations (use if your MTA announces SMTPUTF8 support) +enable_mime_utf = false; diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua index ef4d532bf..daa1e378d 100644 --- a/rules/regexp/headers.lua +++ b/rules/regexp/headers.lua @@ -53,9 +53,22 @@ reconf['TO_NEEDS_ENCODING'] = { score = 1.0, mime_only = true, description = 'To header needs encoding', - group = 'headers' + group = 'headers', } +if rspamd_config:is_mime_utf() then + -- Disable some of the rules preserving the underlying logic + reconf['FROM_NEEDS_ENCODING'].condition = function() + return false + end + reconf['TO_NEEDS_ENCODING'].condition = function() + return false + end + reconf['SUBJECT_NEEDS_ENCODING'].condition = function() + return false + end +end + -- Detects that there is no space in From header (e.g. Some Name<some@host>) reconf['R_NO_SPACE_IN_FROM'] = { re = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X', @@ -713,12 +726,6 @@ reconf['HEADER_DATE_EMPTY_DELIMITER'] = { } -- Definitions of received headers regexp -reconf['RCVD_ILLEGAL_CHARS'] = { - re = 'Received=/[\\x80-\\xff]/X', - score = 4.0, - description = 'Received header has raw illegal character', - group = 'headers' -} local MAIL_RU_Return_Path = 'Return-path=/^\\s*<.+\\@mail\\.ru>$/iX' local MAIL_RU_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@mail\\.ru>$/iX' diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index fa784f2a2..f59c6ff89 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -367,6 +367,7 @@ struct rspamd_config { gboolean public_groups_only; /**< Output merely public groups everywhere */ enum rspamd_gtube_patterns_policy gtube_patterns_policy; /**< Enable test patterns */ gboolean enable_css_parser; /**< Enable css parsing in HTML */ + gboolean enable_mime_utf; /**< Enable utf8 mime parsing */ gsize max_cores_size; /**< maximum size occupied by rspamd core files */ gsize max_cores_count; /**< maximum number of core files */ diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx index 270678491..79509e12e 100644 --- a/src/libserver/cfg_rcl.cxx +++ b/src/libserver/cfg_rcl.cxx @@ -1929,7 +1929,13 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections) rspamd_rcl_parse_struct_boolean, G_STRUCT_OFFSET(struct rspamd_config, enable_css_parser), 0, - "Enable CSS parser (experimental)"); + "Enable CSS parser"); + rspamd_rcl_add_default_handler(sub, + "enable_mime_utf", + rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET(struct rspamd_config, enable_mime_utf), + 0, + "Enable UTF8 mode for mime"); rspamd_rcl_add_default_handler(sub, "enable_experimental", rspamd_rcl_parse_struct_boolean, diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx index d8696e72d..38adf8390 100644 --- a/src/libserver/cfg_utils.cxx +++ b/src/libserver/cfg_utils.cxx @@ -341,6 +341,7 @@ rspamd_config_new(enum rspamd_config_init_flags flags) cfg->heartbeat_interval = 10.0; cfg->enable_css_parser = true; + cfg->enable_mime_utf = false; cfg->script_modules = g_ptr_array_new(); REF_INIT_RETAIN(cfg, rspamd_config_free); diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index be4dd7081..e3f8b2e57 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -795,6 +795,13 @@ LUA_FUNCTION_DEF(config, get_cpu_flags); LUA_FUNCTION_DEF(config, has_torch); /*** + * @method rspamd_config:is_mime_utf() + * Returns true if Rspamd is configured to use UTF for mime processing + * @return {boolean} true if mime utf is enabled + */ +LUA_FUNCTION_DEF(config, is_mime_utf); + +/*** * @method rspamd_config:experimental_enabled() * Returns true if experimental plugins are enabled * @return {boolean} true if experimental plugins are enabled @@ -921,6 +928,7 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF(config, set_peak_cb), LUA_INTERFACE_DEF(config, get_cpu_flags), LUA_INTERFACE_DEF(config, has_torch), + LUA_INTERFACE_DEF(config, is_mime_utf), LUA_INTERFACE_DEF(config, experimental_enabled), LUA_INTERFACE_DEF(config, load_ucl), LUA_INTERFACE_DEF(config, parse_rcl), @@ -4229,6 +4237,22 @@ lua_config_has_torch(lua_State *L) } static int +lua_config_is_mime_utf(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_config *cfg = lua_check_config(L, 1); + + if (cfg != NULL) { + lua_pushboolean(L, cfg->enable_mime_utf); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static int lua_config_experimental_enabled(lua_State *L) { LUA_TRACE_POINT; diff --git a/src/plugins/lua/milter_headers.lua b/src/plugins/lua/milter_headers.lua index b53a45457..58a227fec 100644 --- a/src/plugins/lua/milter_headers.lua +++ b/src/plugins/lua/milter_headers.lua @@ -22,7 +22,7 @@ end -- A plugin that provides common header manipulations local logger = require "rspamd_logger" -local util = require "rspamd_util" +local rspamd_util = require "rspamd_util" local N = 'milter_headers' local lua_util = require "lua_util" local lua_maps = require "lua_maps" @@ -30,7 +30,7 @@ local lua_mime = require "lua_mime" local ts = require("tableshape").types local E = {} -local HOSTNAME = util.get_hostname() +local HOSTNAME = rspamd_util.get_hostname() local settings = { remove_upstream_spam_flag = true; @@ -213,6 +213,13 @@ local function milter_headers(task) if not add[hname] then add[hname] = {} end + if rspamd_config:is_mime_utf8() then + if not rspamd_util.is_valid_utf8(value) then + value = rspamd_util.mime_header_encode(value) + end + else + value = rspamd_util.mime_header_encode(value) + end table.insert(add[hname], { order = (order or settings.default_headers_order or -1), value = lua_util.fold_header(task, hname, value, stop_chars) diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl index 862015ea1..93d3c9d3d 100644 --- a/src/ragel/content_disposition.rl +++ b/src/ragel/content_disposition.rl @@ -7,7 +7,7 @@ balanced_ccontent := ccontent* ')' @{ fret; }; comment = "(" (FWS? ccontent)* FWS? ")"; CFWS = ((FWS? comment)+ FWS?) | FWS; - qcontent = qtextSMTP | quoted_pairSMTP | textUTF8; + qcontent = qtextSMTP | quoted_pairSMTP; quoted_string = CFWS? (DQUOTE (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End) diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl index 0caf1a65e..eb0fc2d9d 100644 --- a/src/ragel/smtp_address.rl +++ b/src/ragel/smtp_address.rl @@ -24,6 +24,7 @@ # SMTP address spec # Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2 + # Additions from rfc6532 (smtputf8): https://tools.ietf.org/html/rfc6532#section-3.2 QcontentSMTP = qtextSMTP | quoted_pairSMTP %User_has_backslash; Quoted_string = ( DQUOTE QcontentSMTP* >User_start %User_end DQUOTE ) %Quoted_addr; diff --git a/src/ragel/smtp_base.rl b/src/ragel/smtp_base.rl index cb4f066bc..eefc430d5 100644 --- a/src/ragel/smtp_base.rl +++ b/src/ragel/smtp_base.rl @@ -9,26 +9,27 @@ CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n"); DQUOTE = '"'; + utf8_cont = 0x80..0xbf; + utf8_2c = 0xc0..0xdf utf8_cont; + utf8_3c = 0xe0..0xef utf8_cont utf8_cont; + utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont; + UTF8_non_ascii = utf8_2c | utf8_3c | utf8_4c; + # Printable US-ASCII characters not including specials atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" | - "-" | "`" | "{" | "|" | "}" | "~"; + "-" | "`" | "{" | "|" | "}" | "~" | UTF8_non_ascii; # Printable US-ASCII characters not including "[", "]", or "\" - dtext = 33..90 | 94..126; + dtext = 33..90 | 94..126 | UTF8_non_ascii; # Printable US-ASCII characters not including "(", ")", or "\" - ctext = 33..39 | 42..91 | 93..126; + ctext = 33..39 | 42..91 | 93..126 | UTF8_non_ascii; - dcontent = 33..90 | 94..126; - Let_dig = alpha | digit; - Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig; + dcontent = 33..90 | 94..126 | UTF8_non_ascii; + Let_dig = alpha | digit | UTF8_non_ascii; + Ldh_str = ( Let_dig | "_" | "-" )* Let_dig; quoted_pairSMTP = "\\" 32..126; - qtextSMTP = 32..33 | 35..91 | 93..126; - utf8_cont = 0x80..0xbf; - utf8_2c = 0xc0..0xdf utf8_cont; - utf8_3c = 0xe0..0xef utf8_cont utf8_cont; - utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont; - textUTF8 = qtextSMTP | utf8_2c | utf8_3c | utf8_4c; + qtextSMTP = 32..33 | 35..91 | 93..126 | UTF8_non_ascii; Atom = atext+; Dot_string = Atom ("." Atom)*; dot_atom_text = atext+ ("." atext+)*; |