diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-18 16:31:15 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-18 16:31:15 +0000 |
commit | f286abaac361de836a276172ce9e46e4c058b75d (patch) | |
tree | 235a4b6684111d6db31d527f6e7918f7f9020cf1 /src | |
parent | a282883e6f9d70a787970e92dc3d7644661cd8a3 (diff) | |
download | rspamd-f286abaac361de836a276172ce9e46e4c058b75d.tar.gz rspamd-f286abaac361de836a276172ce9e46e4c058b75d.zip |
[Fix] More fixes to rfc2047 encoding
Diffstat (limited to 'src')
-rw-r--r-- | src/libmime/mime_headers.c | 39 | ||||
-rw-r--r-- | src/libmime/mime_headers.h | 5 | ||||
-rw-r--r-- | src/libserver/protocol.c | 2 | ||||
-rw-r--r-- | src/lua/lua_util.c | 13 |
4 files changed, 43 insertions, 16 deletions
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index e4d2ca458..a511f5e36 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -816,7 +816,7 @@ rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in, } char * -rspamd_mime_header_encode(const char *in, gsize len) +rspamd_mime_header_encode(const char *in, gsize len, bool is_structured) { static const size_t max_token_size = 76 - 12; /* 12 is the length of "=?UTF-8?Q??="; */ GString *outbuf = g_string_sized_new(len); @@ -831,16 +831,17 @@ rspamd_mime_header_encode(const char *in, gsize len) p++; } else { - size_t remain = end - p; - gsize next_offset = rspamd_memcspn(p, " \r\n()", MIN(max_token_size, remain)); - const char *q = p + next_offset; + const char *q = end; size_t piece_len = q - p, encoded_len = 0; /* Check if the piece contains non-ASCII characters */ - gboolean has_non_ascii = FALSE; + gboolean need_encoding = FALSE; + size_t unencoded_prefix = 0, unencoded_suffix = 0; for (size_t i = 0; i < piece_len; i++) { - if ((unsigned char) p[i] >= 128) { - has_non_ascii = TRUE; + unsigned char c = p[i]; + if (c >= 128 || (is_structured && !g_ascii_isalnum(c))) { + need_encoding = TRUE; + unencoded_suffix = 0; encoded_len += 3; if (encoded_len > max_token_size) { @@ -853,21 +854,41 @@ rspamd_mime_header_encode(const char *in, gsize len) else { encoded_len++; + if (!need_encoding) { + unencoded_prefix++; + } + else { + unencoded_suffix++; + } + if (encoded_len > max_token_size) { piece_len = i; q = p + piece_len; /* No more space */ break; } + + if (need_encoding && (c == '(' || c == ')')) { + /* If we need to encode, we must stop on comments characters */ + piece_len = i + 1; + q = p + piece_len; + /* No more space */ + break; + } } } - if (has_non_ascii) { + if (need_encoding) { + g_string_append_len(outbuf, p, unencoded_prefix); + p += unencoded_prefix; g_string_append(outbuf, "=?UTF-8?Q?"); /* Do encode */ - encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, max_token_size + 3); + encoded_len = rspamd_encode_qp2047_buf(p, piece_len - unencoded_prefix - unencoded_suffix, + encode_buf, max_token_size + 3); + p += piece_len - unencoded_prefix - unencoded_suffix; g_string_append_len(outbuf, encode_buf, encoded_len); g_string_append(outbuf, "?="); + g_string_append_len(outbuf, p, unencoded_suffix); } else { /* No transformation */ diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index 9f89daece..290f94799 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -1,5 +1,5 @@ /* - * Copyright 2023 Vsevolod Stakhov + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -100,9 +100,10 @@ char *rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in, * Encode mime header if needed * @param in * @param len + * @param is_structured if true, then we encode as structured header (e.g. encode all non alpha-numeric characters) * @return newly allocated encoded header */ -char *rspamd_mime_header_encode(const char *in, gsize len); +char *rspamd_mime_header_encode(const char *in, gsize len, bool is_structured); /** * Generate new unique message id diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 2dc641dfe..1196d2d14 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -1046,7 +1046,7 @@ rspamd_protocol_rewrite_subject(struct rspamd_task *task) g_string_append_len(subj_buf, c, p - c); } - res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len); + res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len, false); rspamd_mempool_add_destructor(task->task_pool, (rspamd_mempool_destruct_t) g_free, diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 251d1e1e7..e92e4977a 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -644,9 +644,10 @@ LUA_FUNCTION_DEF(util, get_hostname); LUA_FUNCTION_DEF(util, parse_content_type); /*** - * @function util.mime_header_encode(hdr) + * @function util.mime_header_encode(hdr[, is_structured]) * Encodes header if needed * @param {string} hdr input header + * @param {boolean} is_structured if true, then we encode as structured header (e.g. encode all non alpha-numeric characters) * @return encoded header */ LUA_FUNCTION_DEF(util, mime_header_encode); @@ -2406,15 +2407,19 @@ static int lua_util_mime_header_encode(lua_State *L) { LUA_TRACE_POINT; - gsize len; - const char *hdr = luaL_checklstring(L, 1, &len); + struct rspamd_lua_text *hdr = lua_check_text_or_string(L, 1); char *encoded; + bool is_structured = false; if (!hdr) { return luaL_error(L, "invalid arguments"); } - encoded = rspamd_mime_header_encode(hdr, len); + if (lua_isboolean(L, 2)) { + is_structured = lua_toboolean(L, 2); + } + + encoded = rspamd_mime_header_encode(hdr->start, hdr->len, is_structured); lua_pushstring(L, encoded); g_free(encoded); |