aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--conf/options.inc3
-rw-r--r--rules/regexp/headers.lua21
-rw-r--r--src/client/rspamc.cxx2
-rw-r--r--src/fuzzy_storage.c6
-rw-r--r--src/libserver/cfg_file.h1
-rw-r--r--src/libserver/cfg_rcl.cxx8
-rw-r--r--src/libserver/cfg_utils.cxx1
-rw-r--r--src/libserver/protocol.c120
-rw-r--r--src/libserver/protocol_internal.h1
-rw-r--r--src/libserver/task.c46
-rw-r--r--src/libserver/task.h3
-rw-r--r--src/lua/lua_config.c24
-rw-r--r--src/plugins/lua/milter_headers.lua11
-rw-r--r--src/ragel/content_disposition.rl2
-rw-r--r--src/ragel/smtp_address.rl1
-rw-r--r--src/ragel/smtp_base.rl25
-rw-r--r--test/functional/lib/rspamd.robot2
17 files changed, 88 insertions, 189 deletions
diff --git a/conf/options.inc b/conf/options.inc
index d5cf60d7b..fd9b662ab 100644
--- a/conf/options.inc
+++ b/conf/options.inc
@@ -70,3 +70,6 @@ task_timeout = 8s;
# Emit soft reject when timeout takes place
soft_reject_on_timeout = false;
+
+# Use utf8 mode for mime operations (use if your MTA announces SMTPUTF8 support)
+enable_mime_utf = false;
diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua
index ef4d532bf..44fa964e3 100644
--- a/rules/regexp/headers.lua
+++ b/rules/regexp/headers.lua
@@ -53,9 +53,22 @@ reconf['TO_NEEDS_ENCODING'] = {
score = 1.0,
mime_only = true,
description = 'To header needs encoding',
- group = 'headers'
+ group = 'headers',
}
+if rspamd_config:is_mime_utf8() then
+ -- Disable some of the rules preserving the underlying logic
+ reconf['FROM_NEEDS_ENCODING'].condition = function()
+ return false
+ end
+ reconf['TO_NEEDS_ENCODING'].condition = function()
+ return false
+ end
+ reconf['SUBJECT_NEEDS_ENCODING'].condition = function()
+ return false
+ end
+end
+
-- Detects that there is no space in From header (e.g. Some Name<some@host>)
reconf['R_NO_SPACE_IN_FROM'] = {
re = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X',
@@ -713,12 +726,6 @@ reconf['HEADER_DATE_EMPTY_DELIMITER'] = {
}
-- Definitions of received headers regexp
-reconf['RCVD_ILLEGAL_CHARS'] = {
- re = 'Received=/[\\x80-\\xff]/X',
- score = 4.0,
- description = 'Received header has raw illegal character',
- group = 'headers'
-}
local MAIL_RU_Return_Path = 'Return-path=/^\\s*<.+\\@mail\\.ru>$/iX'
local MAIL_RU_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@mail\\.ru>$/iX'
diff --git a/src/client/rspamc.cxx b/src/client/rspamc.cxx
index 1c67e4167..31a4aaf24 100644
--- a/src/client/rspamc.cxx
+++ b/src/client/rspamc.cxx
@@ -31,6 +31,7 @@
#include <cstdint>
#include <cstdio>
#include <cmath>
+#include <locale>
#include "frozen/string.h"
#include "frozen/unordered_map.h"
@@ -2180,6 +2181,7 @@ int main(int argc, char **argv, char **env)
{
auto *kwattrs = g_queue_new();
+ std::locale::global(std::locale(""));
read_cmd_line(&argc, &argv);
tty = isatty(STDOUT_FILENO);
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index 3064d45a0..f21992a94 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -2475,6 +2475,12 @@ rspamd_fuzzy_stat_to_ucl(struct rspamd_fuzzy_storage_ctx *ctx, gboolean ip_stat)
rspamd_fuzzy_key_stat_iter(pk_iter, fuzzy_key, keys_obj, ip_stat);
});
+ if (ctx->dynamic_keys) {
+ kh_foreach(ctx->dynamic_keys, pk_iter, fuzzy_key, {
+ rspamd_fuzzy_key_stat_iter(pk_iter, fuzzy_key, keys_obj, ip_stat);
+ });
+ }
+
ucl_object_insert_key(obj, keys_obj, "keys", 0, false);
/* Now generic stats */
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index fa784f2a2..f59c6ff89 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -367,6 +367,7 @@ struct rspamd_config {
gboolean public_groups_only; /**< Output merely public groups everywhere */
enum rspamd_gtube_patterns_policy gtube_patterns_policy; /**< Enable test patterns */
gboolean enable_css_parser; /**< Enable css parsing in HTML */
+ gboolean enable_mime_utf; /**< Enable utf8 mime parsing */
gsize max_cores_size; /**< maximum size occupied by rspamd core files */
gsize max_cores_count; /**< maximum number of core files */
diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx
index 270678491..79509e12e 100644
--- a/src/libserver/cfg_rcl.cxx
+++ b/src/libserver/cfg_rcl.cxx
@@ -1929,7 +1929,13 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
rspamd_rcl_parse_struct_boolean,
G_STRUCT_OFFSET(struct rspamd_config, enable_css_parser),
0,
- "Enable CSS parser (experimental)");
+ "Enable CSS parser");
+ rspamd_rcl_add_default_handler(sub,
+ "enable_mime_utf",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET(struct rspamd_config, enable_mime_utf),
+ 0,
+ "Enable UTF8 mode for mime");
rspamd_rcl_add_default_handler(sub,
"enable_experimental",
rspamd_rcl_parse_struct_boolean,
diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx
index d8696e72d..38adf8390 100644
--- a/src/libserver/cfg_utils.cxx
+++ b/src/libserver/cfg_utils.cxx
@@ -341,6 +341,7 @@ rspamd_config_new(enum rspamd_config_init_flags flags)
cfg->heartbeat_interval = 10.0;
cfg->enable_css_parser = true;
+ cfg->enable_mime_utf = false;
cfg->script_modules = g_ptr_array_new();
REF_INIT_RETAIN(cfg, rspamd_config_free);
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index ee2192913..a86111ff2 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -720,12 +720,6 @@ rspamd_protocol_handle_headers(struct rspamd_task *task,
break;
case 'm':
case 'M':
- IF_HEADER(MLEN_HEADER)
- {
- msg_debug_protocol("read message length header, value: %T",
- hv_tok);
- task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL;
- }
IF_HEADER(MTA_TAG_HEADER)
{
char *mta_tag;
@@ -782,120 +776,6 @@ if (!has_ip) {
return TRUE;
}
-#define BOOL_TO_FLAG(val, flags, flag) \
- do { \
- if ((val)) (flags) |= (flag); \
- else \
- (flags) &= ~(flag); \
- } while (0)
-
-gboolean
-rspamd_protocol_parse_task_flags(rspamd_mempool_t *pool,
- const ucl_object_t *obj,
- gpointer ud,
- struct rspamd_rcl_section *section,
- GError **err)
-{
- struct rspamd_rcl_struct_parser *pd = ud;
- int *target;
- const char *key;
- gboolean value;
-
- target = (int *) (((char *) pd->user_struct) + pd->offset);
- key = ucl_object_key(obj);
- value = ucl_object_toboolean(obj);
-
- if (key != NULL) {
- if (g_ascii_strcasecmp(key, "pass_all") == 0) {
- BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_PASS_ALL);
- }
- else if (g_ascii_strcasecmp(key, "no_log") == 0) {
- BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_NO_LOG);
- }
- }
-
- return TRUE;
-}
-
-static struct rspamd_rcl_sections_map *control_parser = NULL;
-
-RSPAMD_CONSTRUCTOR(rspamd_protocol_control_parser_ctor)
-{
-
- struct rspamd_rcl_section *sub = rspamd_rcl_add_section(&control_parser, NULL,
- "*",
- NULL,
- NULL,
- UCL_OBJECT,
- FALSE,
- TRUE);
- /* Default handlers */
- rspamd_rcl_add_default_handler(sub,
- "ip",
- rspamd_rcl_parse_struct_addr,
- G_STRUCT_OFFSET(struct rspamd_task, from_addr),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "from",
- rspamd_rcl_parse_struct_mime_addr,
- G_STRUCT_OFFSET(struct rspamd_task, from_envelope),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "rcpt",
- rspamd_rcl_parse_struct_mime_addr,
- G_STRUCT_OFFSET(struct rspamd_task, rcpt_envelope),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "helo",
- rspamd_rcl_parse_struct_string,
- G_STRUCT_OFFSET(struct rspamd_task, helo),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "user",
- rspamd_rcl_parse_struct_string,
- G_STRUCT_OFFSET(struct rspamd_task, auth_user),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "pass_all",
- rspamd_protocol_parse_task_flags,
- G_STRUCT_OFFSET(struct rspamd_task, flags),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "json",
- rspamd_protocol_parse_task_flags,
- G_STRUCT_OFFSET(struct rspamd_task, flags),
- 0,
- NULL);
-}
-
-RSPAMD_DESTRUCTOR(rspamd_protocol_control_parser_dtor)
-{
- rspamd_rcl_sections_free(control_parser);
-}
-
-gboolean
-rspamd_protocol_handle_control(struct rspamd_task *task,
- const ucl_object_t *control)
-{
- GError *err = NULL;
-
- if (!rspamd_rcl_parse(control_parser, task->cfg, task, task->task_pool,
- control, &err)) {
- msg_warn_protocol("cannot parse control block: %e", err);
- g_error_free(err);
-
- return FALSE;
- }
-
- return TRUE;
-}
-
gboolean
rspamd_protocol_handle_request(struct rspamd_task *task,
struct rspamd_http_message *msg)
diff --git a/src/libserver/protocol_internal.h b/src/libserver/protocol_internal.h
index e55e54851..11f21430e 100644
--- a/src/libserver/protocol_internal.h
+++ b/src/libserver/protocol_internal.h
@@ -79,7 +79,6 @@ extern "C" {
#define DELIVER_TO_HEADER "Deliver-To"
#define NO_LOG_HEADER "Log"
#define LOG_TAG_HEADER "Log-Tag"
-#define MLEN_HEADER "Message-Length"
#define USER_AGENT_HEADER "User-Agent"
#define MTA_TAG_HEADER "MTA-Tag"
#define PROFILE_HEADER "Profile"
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 637f401a9..833046470 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -323,9 +323,6 @@ gboolean
rspamd_task_load_message(struct rspamd_task *task,
struct rspamd_http_message *msg, const char *start, gsize len)
{
- unsigned int control_len, r;
- struct ucl_parser *parser;
- ucl_object_t *control_obj;
char filepath[PATH_MAX], *fp;
int fd, flen;
gulong offset = 0, shmem_size = 0;
@@ -349,8 +346,8 @@ rspamd_task_load_message(struct rspamd_task *task,
if (tok) {
/* Shared memory part */
- r = rspamd_strlcpy(filepath, tok->begin,
- MIN(sizeof(filepath), tok->len + 1));
+ size_t r = rspamd_strlcpy(filepath, tok->begin,
+ MIN(sizeof(filepath), tok->len + 1));
rspamd_url_decode(filepath, filepath, r + 1);
flen = strlen(filepath);
@@ -448,8 +445,8 @@ rspamd_task_load_message(struct rspamd_task *task,
if (tok) {
debug_task("want to scan file %T", tok);
- r = rspamd_strlcpy(filepath, tok->begin,
- MIN(sizeof(filepath), tok->len + 1));
+ size_t r = rspamd_strlcpy(filepath, tok->begin,
+ MIN(sizeof(filepath), tok->len + 1));
rspamd_url_decode(filepath, filepath, r + 1);
flen = strlen(filepath);
@@ -626,41 +623,6 @@ rspamd_task_load_message(struct rspamd_task *task,
task->flags |= RSPAMD_TASK_FLAG_EMPTY;
}
- if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL) {
- rspamd_ftok_t *hv = rspamd_task_get_request_header(task, MLEN_HEADER);
- gulong message_len = 0;
-
- if (!hv || !rspamd_strtoul(hv->begin, hv->len, &message_len) ||
- task->msg.len < message_len) {
- msg_warn_task("message has invalid message length: %ul and total len: %ul",
- message_len, task->msg.len);
- g_set_error(&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
- "Invalid length");
- return FALSE;
- }
-
- control_len = task->msg.len - message_len;
-
- if (control_len > 0) {
- parser = ucl_parser_new(UCL_PARSER_KEY_LOWERCASE);
-
- if (!ucl_parser_add_chunk(parser, task->msg.begin, control_len)) {
- msg_warn_task("processing of control chunk failed: %s",
- ucl_parser_get_error(parser));
- ucl_parser_free(parser);
- }
- else {
- control_obj = ucl_parser_get_object(parser);
- ucl_parser_free(parser);
- rspamd_protocol_handle_control(task, control_obj);
- ucl_object_unref(control_obj);
- }
-
- task->msg.begin += control_len;
- task->msg.len -= control_len;
- }
- }
-
return TRUE;
}
diff --git a/src/libserver/task.h b/src/libserver/task.h
index 7e6341a84..6be350098 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -115,9 +115,6 @@ enum rspamd_task_stage {
#define RSPAMD_TASK_FLAG_MESSAGE_REWRITE (1u << 24u)
#define RSPAMD_TASK_FLAG_MAX_SHIFT (24u)
-
-/* Request has a JSON control block */
-#define RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL (1u << 0u)
/* Request has been done by a local client */
#define RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT (1u << 1u)
/* Request has been sent via milter */
diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c
index be4dd7081..0b4d208b4 100644
--- a/src/lua/lua_config.c
+++ b/src/lua/lua_config.c
@@ -795,6 +795,13 @@ LUA_FUNCTION_DEF(config, get_cpu_flags);
LUA_FUNCTION_DEF(config, has_torch);
/***
+ * @method rspamd_config:is_mime_utf8()
+ * Returns true if Rspamd is configured to use UTF for mime processing
+ * @return {boolean} true if mime utf is enabled
+ */
+LUA_FUNCTION_DEF(config, is_mime_utf8);
+
+/***
* @method rspamd_config:experimental_enabled()
* Returns true if experimental plugins are enabled
* @return {boolean} true if experimental plugins are enabled
@@ -921,6 +928,7 @@ static const struct luaL_reg configlib_m[] = {
LUA_INTERFACE_DEF(config, set_peak_cb),
LUA_INTERFACE_DEF(config, get_cpu_flags),
LUA_INTERFACE_DEF(config, has_torch),
+ LUA_INTERFACE_DEF(config, is_mime_utf8),
LUA_INTERFACE_DEF(config, experimental_enabled),
LUA_INTERFACE_DEF(config, load_ucl),
LUA_INTERFACE_DEF(config, parse_rcl),
@@ -4229,6 +4237,22 @@ lua_config_has_torch(lua_State *L)
}
static int
+lua_config_is_mime_utf8(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_config *cfg = lua_check_config(L, 1);
+
+ if (cfg != NULL) {
+ lua_pushboolean(L, cfg->enable_mime_utf);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static int
lua_config_experimental_enabled(lua_State *L)
{
LUA_TRACE_POINT;
diff --git a/src/plugins/lua/milter_headers.lua b/src/plugins/lua/milter_headers.lua
index b53a45457..58a227fec 100644
--- a/src/plugins/lua/milter_headers.lua
+++ b/src/plugins/lua/milter_headers.lua
@@ -22,7 +22,7 @@ end
-- A plugin that provides common header manipulations
local logger = require "rspamd_logger"
-local util = require "rspamd_util"
+local rspamd_util = require "rspamd_util"
local N = 'milter_headers'
local lua_util = require "lua_util"
local lua_maps = require "lua_maps"
@@ -30,7 +30,7 @@ local lua_mime = require "lua_mime"
local ts = require("tableshape").types
local E = {}
-local HOSTNAME = util.get_hostname()
+local HOSTNAME = rspamd_util.get_hostname()
local settings = {
remove_upstream_spam_flag = true;
@@ -213,6 +213,13 @@ local function milter_headers(task)
if not add[hname] then
add[hname] = {}
end
+ if rspamd_config:is_mime_utf8() then
+ if not rspamd_util.is_valid_utf8(value) then
+ value = rspamd_util.mime_header_encode(value)
+ end
+ else
+ value = rspamd_util.mime_header_encode(value)
+ end
table.insert(add[hname], {
order = (order or settings.default_headers_order or -1),
value = lua_util.fold_header(task, hname, value, stop_chars)
diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl
index 862015ea1..93d3c9d3d 100644
--- a/src/ragel/content_disposition.rl
+++ b/src/ragel/content_disposition.rl
@@ -7,7 +7,7 @@
balanced_ccontent := ccontent* ')' @{ fret; };
comment = "(" (FWS? ccontent)* FWS? ")";
CFWS = ((FWS? comment)+ FWS?) | FWS;
- qcontent = qtextSMTP | quoted_pairSMTP | textUTF8;
+ qcontent = qtextSMTP | quoted_pairSMTP;
quoted_string = CFWS?
(DQUOTE
(((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl
index 0caf1a65e..eb0fc2d9d 100644
--- a/src/ragel/smtp_address.rl
+++ b/src/ragel/smtp_address.rl
@@ -24,6 +24,7 @@
# SMTP address spec
# Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2
+ # Additions from rfc6532 (smtputf8): https://tools.ietf.org/html/rfc6532#section-3.2
QcontentSMTP = qtextSMTP | quoted_pairSMTP %User_has_backslash;
Quoted_string = ( DQUOTE QcontentSMTP* >User_start %User_end DQUOTE ) %Quoted_addr;
diff --git a/src/ragel/smtp_base.rl b/src/ragel/smtp_base.rl
index cb4f066bc..eefc430d5 100644
--- a/src/ragel/smtp_base.rl
+++ b/src/ragel/smtp_base.rl
@@ -9,26 +9,27 @@
CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n");
DQUOTE = '"';
+ utf8_cont = 0x80..0xbf;
+ utf8_2c = 0xc0..0xdf utf8_cont;
+ utf8_3c = 0xe0..0xef utf8_cont utf8_cont;
+ utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont;
+ UTF8_non_ascii = utf8_2c | utf8_3c | utf8_4c;
+
# Printable US-ASCII characters not including specials
atext = alpha | digit | "!" | "#" | "$" | "%" | "&" |
"'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" |
- "-" | "`" | "{" | "|" | "}" | "~";
+ "-" | "`" | "{" | "|" | "}" | "~" | UTF8_non_ascii;
# Printable US-ASCII characters not including "[", "]", or "\"
- dtext = 33..90 | 94..126;
+ dtext = 33..90 | 94..126 | UTF8_non_ascii;
# Printable US-ASCII characters not including "(", ")", or "\"
- ctext = 33..39 | 42..91 | 93..126;
+ ctext = 33..39 | 42..91 | 93..126 | UTF8_non_ascii;
- dcontent = 33..90 | 94..126;
- Let_dig = alpha | digit;
- Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig;
+ dcontent = 33..90 | 94..126 | UTF8_non_ascii;
+ Let_dig = alpha | digit | UTF8_non_ascii;
+ Ldh_str = ( Let_dig | "_" | "-" )* Let_dig;
quoted_pairSMTP = "\\" 32..126;
- qtextSMTP = 32..33 | 35..91 | 93..126;
- utf8_cont = 0x80..0xbf;
- utf8_2c = 0xc0..0xdf utf8_cont;
- utf8_3c = 0xe0..0xef utf8_cont utf8_cont;
- utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont;
- textUTF8 = qtextSMTP | utf8_2c | utf8_3c | utf8_4c;
+ qtextSMTP = 32..33 | 35..91 | 93..126 | UTF8_non_ascii;
Atom = atext+;
Dot_string = Atom ("." Atom)*;
dot_atom_text = atext+ ("." atext+)*;
diff --git a/test/functional/lib/rspamd.robot b/test/functional/lib/rspamd.robot
index de4e5285f..68bcb66fb 100644
--- a/test/functional/lib/rspamd.robot
+++ b/test/functional/lib/rspamd.robot
@@ -106,6 +106,8 @@ Expect Added Header
... msg=add_headers block was not present in protocol response
Dictionary Should Contain Key ${SCAN_RESULT}[milter][add_headers] ${header_name}
... msg=${header_name} was not added
+ Dictionary Should Contain Key ${SCAN_RESULT}[milter][add_headers][${header_name}] value
+ ... msg=no value field in ${header_name} index: ${SCAN_RESULT}[milter][add_headers][${header_name}]
Should Be Equal ${SCAN_RESULT}[milter][add_headers][${header_name}][value] ${header_value}
Should Be Equal as Numbers ${SCAN_RESULT}[milter][add_headers][${header_name}][order] ${pos}