aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-09-27 16:53:57 +0600
committerGitHub <noreply@github.com>2024-09-27 16:53:57 +0600
commit705f647b879a8c29219faa549862864fee8375c6 (patch)
tree4221c6d163d6bce5bc9066667c50fcfe9a88b363
parent0b397e439ffdaa58ca709db91ac51d30c80cbd50 (diff)
parentf8700e56ec5659f331f0aca0b28eec43a4e7cb33 (diff)
downloadrspamd-705f647b879a8c29219faa549862864fee8375c6.tar.gz
rspamd-705f647b879a8c29219faa549862864fee8375c6.zip
Merge pull request #5056 from rspamd/vstakhov-utf8-mime
[Feature] MIME UTF8 support
-rw-r--r--conf/options.inc3
-rw-r--r--rules/regexp/headers.lua21
-rw-r--r--src/libserver/cfg_file.h1
-rw-r--r--src/libserver/cfg_rcl.cxx8
-rw-r--r--src/libserver/cfg_utils.cxx1
-rw-r--r--src/lua/lua_config.c24
-rw-r--r--src/plugins/lua/milter_headers.lua11
-rw-r--r--src/ragel/content_disposition.rl2
-rw-r--r--src/ragel/smtp_address.rl1
-rw-r--r--src/ragel/smtp_base.rl25
10 files changed, 74 insertions, 23 deletions
diff --git a/conf/options.inc b/conf/options.inc
index d5cf60d7b..fd9b662ab 100644
--- a/conf/options.inc
+++ b/conf/options.inc
@@ -70,3 +70,6 @@ task_timeout = 8s;
# Emit soft reject when timeout takes place
soft_reject_on_timeout = false;
+
+# Use utf8 mode for mime operations (use if your MTA announces SMTPUTF8 support)
+enable_mime_utf = false;
diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua
index ef4d532bf..daa1e378d 100644
--- a/rules/regexp/headers.lua
+++ b/rules/regexp/headers.lua
@@ -53,9 +53,22 @@ reconf['TO_NEEDS_ENCODING'] = {
score = 1.0,
mime_only = true,
description = 'To header needs encoding',
- group = 'headers'
+ group = 'headers',
}
+if rspamd_config:is_mime_utf() then
+ -- Disable some of the rules preserving the underlying logic
+ reconf['FROM_NEEDS_ENCODING'].condition = function()
+ return false
+ end
+ reconf['TO_NEEDS_ENCODING'].condition = function()
+ return false
+ end
+ reconf['SUBJECT_NEEDS_ENCODING'].condition = function()
+ return false
+ end
+end
+
-- Detects that there is no space in From header (e.g. Some Name<some@host>)
reconf['R_NO_SPACE_IN_FROM'] = {
re = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X',
@@ -713,12 +726,6 @@ reconf['HEADER_DATE_EMPTY_DELIMITER'] = {
}
-- Definitions of received headers regexp
-reconf['RCVD_ILLEGAL_CHARS'] = {
- re = 'Received=/[\\x80-\\xff]/X',
- score = 4.0,
- description = 'Received header has raw illegal character',
- group = 'headers'
-}
local MAIL_RU_Return_Path = 'Return-path=/^\\s*<.+\\@mail\\.ru>$/iX'
local MAIL_RU_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@mail\\.ru>$/iX'
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index fa784f2a2..f59c6ff89 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -367,6 +367,7 @@ struct rspamd_config {
gboolean public_groups_only; /**< Output merely public groups everywhere */
enum rspamd_gtube_patterns_policy gtube_patterns_policy; /**< Enable test patterns */
gboolean enable_css_parser; /**< Enable css parsing in HTML */
+ gboolean enable_mime_utf; /**< Enable utf8 mime parsing */
gsize max_cores_size; /**< maximum size occupied by rspamd core files */
gsize max_cores_count; /**< maximum number of core files */
diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx
index 270678491..79509e12e 100644
--- a/src/libserver/cfg_rcl.cxx
+++ b/src/libserver/cfg_rcl.cxx
@@ -1929,7 +1929,13 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
rspamd_rcl_parse_struct_boolean,
G_STRUCT_OFFSET(struct rspamd_config, enable_css_parser),
0,
- "Enable CSS parser (experimental)");
+ "Enable CSS parser");
+ rspamd_rcl_add_default_handler(sub,
+ "enable_mime_utf",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, enable_mime_utf),
+ 0,
+ "Enable UTF8 mode for mime");
rspamd_rcl_add_default_handler(sub,
"enable_experimental",
rspamd_rcl_parse_struct_boolean,
diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx
index d8696e72d..38adf8390 100644
--- a/src/libserver/cfg_utils.cxx
+++ b/src/libserver/cfg_utils.cxx
@@ -341,6 +341,7 @@ rspamd_config_new(enum rspamd_config_init_flags flags)
cfg->heartbeat_interval = 10.0;
cfg->enable_css_parser = true;
+ cfg->enable_mime_utf = false;
cfg->script_modules = g_ptr_array_new();
REF_INIT_RETAIN(cfg, rspamd_config_free);
diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c
index be4dd7081..e3f8b2e57 100644
--- a/src/lua/lua_config.c
+++ b/src/lua/lua_config.c
@@ -795,6 +795,13 @@ LUA_FUNCTION_DEF(config, get_cpu_flags);
LUA_FUNCTION_DEF(config, has_torch);
/***
+ * @method rspamd_config:is_mime_utf()
+ * Returns true if Rspamd is configured to use UTF for mime processing
+ * @return {boolean} true if mime utf is enabled
+ */
+LUA_FUNCTION_DEF(config, is_mime_utf);
+
+/***
* @method rspamd_config:experimental_enabled()
* Returns true if experimental plugins are enabled
* @return {boolean} true if experimental plugins are enabled
@@ -921,6 +928,7 @@ static const struct luaL_reg configlib_m[] = {
LUA_INTERFACE_DEF(config, set_peak_cb),
LUA_INTERFACE_DEF(config, get_cpu_flags),
LUA_INTERFACE_DEF(config, has_torch),
+ LUA_INTERFACE_DEF(config, is_mime_utf),
LUA_INTERFACE_DEF(config, experimental_enabled),
LUA_INTERFACE_DEF(config, load_ucl),
LUA_INTERFACE_DEF(config, parse_rcl),
@@ -4229,6 +4237,22 @@ lua_config_has_torch(lua_State *L)
}
static int
+lua_config_is_mime_utf(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_config *cfg = lua_check_config(L, 1);
+
+ if (cfg != NULL) {
+ lua_pushboolean(L, cfg->enable_mime_utf);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static int
lua_config_experimental_enabled(lua_State *L)
{
LUA_TRACE_POINT;
diff --git a/src/plugins/lua/milter_headers.lua b/src/plugins/lua/milter_headers.lua
index b53a45457..58a227fec 100644
--- a/src/plugins/lua/milter_headers.lua
+++ b/src/plugins/lua/milter_headers.lua
@@ -22,7 +22,7 @@ end
-- A plugin that provides common header manipulations
local logger = require "rspamd_logger"
-local util = require "rspamd_util"
+local rspamd_util = require "rspamd_util"
local N = 'milter_headers'
local lua_util = require "lua_util"
local lua_maps = require "lua_maps"
@@ -30,7 +30,7 @@ local lua_mime = require "lua_mime"
local ts = require("tableshape").types
local E = {}
-local HOSTNAME = util.get_hostname()
+local HOSTNAME = rspamd_util.get_hostname()
local settings = {
remove_upstream_spam_flag = true;
@@ -213,6 +213,13 @@ local function milter_headers(task)
if not add[hname] then
add[hname] = {}
end
+ if rspamd_config:is_mime_utf8() then
+ if not rspamd_util.is_valid_utf8(value) then
+ value = rspamd_util.mime_header_encode(value)
+ end
+ else
+ value = rspamd_util.mime_header_encode(value)
+ end
table.insert(add[hname], {
order = (order or settings.default_headers_order or -1),
value = lua_util.fold_header(task, hname, value, stop_chars)
diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl
index 862015ea1..93d3c9d3d 100644
--- a/src/ragel/content_disposition.rl
+++ b/src/ragel/content_disposition.rl
@@ -7,7 +7,7 @@
balanced_ccontent := ccontent* ')' @{ fret; };
comment = "(" (FWS? ccontent)* FWS? ")";
CFWS = ((FWS? comment)+ FWS?) | FWS;
- qcontent = qtextSMTP | quoted_pairSMTP | textUTF8;
+ qcontent = qtextSMTP | quoted_pairSMTP;
quoted_string = CFWS?
(DQUOTE
(((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl
index 0caf1a65e..eb0fc2d9d 100644
--- a/src/ragel/smtp_address.rl
+++ b/src/ragel/smtp_address.rl
@@ -24,6 +24,7 @@
# SMTP address spec
# Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2
+ # Additions from rfc6532 (smtputf8): https://tools.ietf.org/html/rfc6532#section-3.2
QcontentSMTP = qtextSMTP | quoted_pairSMTP %User_has_backslash;
Quoted_string = ( DQUOTE QcontentSMTP* >User_start %User_end DQUOTE ) %Quoted_addr;
diff --git a/src/ragel/smtp_base.rl b/src/ragel/smtp_base.rl
index cb4f066bc..eefc430d5 100644
--- a/src/ragel/smtp_base.rl
+++ b/src/ragel/smtp_base.rl
@@ -9,26 +9,27 @@
CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n");
DQUOTE = '"';
+ utf8_cont = 0x80..0xbf;
+ utf8_2c = 0xc0..0xdf utf8_cont;
+ utf8_3c = 0xe0..0xef utf8_cont utf8_cont;
+ utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont;
+ UTF8_non_ascii = utf8_2c | utf8_3c | utf8_4c;
+
# Printable US-ASCII characters not including specials
atext = alpha | digit | "!" | "#" | "$" | "%" | "&" |
"'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" |
- "-" | "`" | "{" | "|" | "}" | "~";
+ "-" | "`" | "{" | "|" | "}" | "~" | UTF8_non_ascii;
# Printable US-ASCII characters not including "[", "]", or "\"
- dtext = 33..90 | 94..126;
+ dtext = 33..90 | 94..126 | UTF8_non_ascii;
# Printable US-ASCII characters not including "(", ")", or "\"
- ctext = 33..39 | 42..91 | 93..126;
+ ctext = 33..39 | 42..91 | 93..126 | UTF8_non_ascii;
- dcontent = 33..90 | 94..126;
- Let_dig = alpha | digit;
- Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig;
+ dcontent = 33..90 | 94..126 | UTF8_non_ascii;
+ Let_dig = alpha | digit | UTF8_non_ascii;
+ Ldh_str = ( Let_dig | "_" | "-" )* Let_dig;
quoted_pairSMTP = "\\" 32..126;
- qtextSMTP = 32..33 | 35..91 | 93..126;
- utf8_cont = 0x80..0xbf;
- utf8_2c = 0xc0..0xdf utf8_cont;
- utf8_3c = 0xe0..0xef utf8_cont utf8_cont;
- utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont;
- textUTF8 = qtextSMTP | utf8_2c | utf8_3c | utf8_4c;
+ qtextSMTP = 32..33 | 35..91 | 93..126 | UTF8_non_ascii;
Atom = atext+;
Dot_string = Atom ("." Atom)*;
dot_atom_text = atext+ ("." atext+)*;