aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-11-18 16:31:15 +0000
committerVsevolod Stakhov <vsevolod@rspamd.com>2024-11-18 16:31:15 +0000
commitf286abaac361de836a276172ce9e46e4c058b75d (patch)
tree235a4b6684111d6db31d527f6e7918f7f9020cf1
parenta282883e6f9d70a787970e92dc3d7644661cd8a3 (diff)
downloadrspamd-f286abaac361de836a276172ce9e46e4c058b75d.tar.gz
rspamd-f286abaac361de836a276172ce9e46e4c058b75d.zip
[Fix] More fixes to rfc2047 encoding
-rw-r--r--src/libmime/mime_headers.c39
-rw-r--r--src/libmime/mime_headers.h5
-rw-r--r--src/libserver/protocol.c2
-rw-r--r--src/lua/lua_util.c13
-rw-r--r--test/rspamd_cxx_unit_rfc2047.hxx29
5 files changed, 58 insertions, 30 deletions
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index e4d2ca458..a511f5e36 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -816,7 +816,7 @@ rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in,
}
char *
-rspamd_mime_header_encode(const char *in, gsize len)
+rspamd_mime_header_encode(const char *in, gsize len, bool is_structured)
{
static const size_t max_token_size = 76 - 12; /* 12 is the length of "=?UTF-8?Q??="; */
GString *outbuf = g_string_sized_new(len);
@@ -831,16 +831,17 @@ rspamd_mime_header_encode(const char *in, gsize len)
p++;
}
else {
- size_t remain = end - p;
- gsize next_offset = rspamd_memcspn(p, " \r\n()", MIN(max_token_size, remain));
- const char *q = p + next_offset;
+ const char *q = end;
size_t piece_len = q - p, encoded_len = 0;
/* Check if the piece contains non-ASCII characters */
- gboolean has_non_ascii = FALSE;
+ gboolean need_encoding = FALSE;
+ size_t unencoded_prefix = 0, unencoded_suffix = 0;
for (size_t i = 0; i < piece_len; i++) {
- if ((unsigned char) p[i] >= 128) {
- has_non_ascii = TRUE;
+ unsigned char c = p[i];
+ if (c >= 128 || (is_structured && !g_ascii_isalnum(c))) {
+ need_encoding = TRUE;
+ unencoded_suffix = 0;
encoded_len += 3;
if (encoded_len > max_token_size) {
@@ -853,21 +854,41 @@ rspamd_mime_header_encode(const char *in, gsize len)
else {
encoded_len++;
+ if (!need_encoding) {
+ unencoded_prefix++;
+ }
+ else {
+ unencoded_suffix++;
+ }
+
if (encoded_len > max_token_size) {
piece_len = i;
q = p + piece_len;
/* No more space */
break;
}
+
+ if (need_encoding && (c == '(' || c == ')')) {
+ /* If we need to encode, we must stop on comments characters */
+ piece_len = i + 1;
+ q = p + piece_len;
+ /* No more space */
+ break;
+ }
}
}
- if (has_non_ascii) {
+ if (need_encoding) {
+ g_string_append_len(outbuf, p, unencoded_prefix);
+ p += unencoded_prefix;
g_string_append(outbuf, "=?UTF-8?Q?");
/* Do encode */
- encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, max_token_size + 3);
+ encoded_len = rspamd_encode_qp2047_buf(p, piece_len - unencoded_prefix - unencoded_suffix,
+ encode_buf, max_token_size + 3);
+ p += piece_len - unencoded_prefix - unencoded_suffix;
g_string_append_len(outbuf, encode_buf, encoded_len);
g_string_append(outbuf, "?=");
+ g_string_append_len(outbuf, p, unencoded_suffix);
}
else {
/* No transformation */
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index 9f89daece..290f94799 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -100,9 +100,10 @@ char *rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in,
* Encode mime header if needed
* @param in
* @param len
+ * @param is_structured if true, then we encode as structured header (e.g. encode all non alpha-numeric characters)
* @return newly allocated encoded header
*/
-char *rspamd_mime_header_encode(const char *in, gsize len);
+char *rspamd_mime_header_encode(const char *in, gsize len, bool is_structured);
/**
* Generate new unique message id
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index 2dc641dfe..1196d2d14 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -1046,7 +1046,7 @@ rspamd_protocol_rewrite_subject(struct rspamd_task *task)
g_string_append_len(subj_buf, c, p - c);
}
- res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len);
+ res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len, false);
rspamd_mempool_add_destructor(task->task_pool,
(rspamd_mempool_destruct_t) g_free,
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 251d1e1e7..e92e4977a 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -644,9 +644,10 @@ LUA_FUNCTION_DEF(util, get_hostname);
LUA_FUNCTION_DEF(util, parse_content_type);
/***
- * @function util.mime_header_encode(hdr)
+ * @function util.mime_header_encode(hdr[, is_structured])
* Encodes header if needed
* @param {string} hdr input header
+ * @param {boolean} is_structured if true, then we encode as structured header (e.g. encode all non alpha-numeric characters)
* @return encoded header
*/
LUA_FUNCTION_DEF(util, mime_header_encode);
@@ -2406,15 +2407,19 @@ static int
lua_util_mime_header_encode(lua_State *L)
{
LUA_TRACE_POINT;
- gsize len;
- const char *hdr = luaL_checklstring(L, 1, &len);
+ struct rspamd_lua_text *hdr = lua_check_text_or_string(L, 1);
char *encoded;
+ bool is_structured = false;
if (!hdr) {
return luaL_error(L, "invalid arguments");
}
- encoded = rspamd_mime_header_encode(hdr, len);
+ if (lua_isboolean(L, 2)) {
+ is_structured = lua_toboolean(L, 2);
+ }
+
+ encoded = rspamd_mime_header_encode(hdr->start, hdr->len, is_structured);
lua_pushstring(L, encoded);
g_free(encoded);
diff --git a/test/rspamd_cxx_unit_rfc2047.hxx b/test/rspamd_cxx_unit_rfc2047.hxx
index e66c79340..25afd15d5 100644
--- a/test/rspamd_cxx_unit_rfc2047.hxx
+++ b/test/rspamd_cxx_unit_rfc2047.hxx
@@ -21,6 +21,7 @@
#include "doctest/doctest.h"
#include <string>
+#include "libutil/mem_pool.h"
#include "libmime/mime_headers.h"
TEST_SUITE("rfc2047 encode")
@@ -28,7 +29,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles ASCII-only input")
{
const char *input = "Hello World";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "Hello World";
CHECK(output == expected_output);
@@ -38,7 +39,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles input with non-ASCII characters")
{
const char *input = "Hello Мир";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?=";
CHECK(output == expected_output);
@@ -48,7 +49,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles mixed input with separators")
{
const char *input = "ололо (ололо test) test";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= "
"(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test) test";
@@ -59,7 +60,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles multiple spaces and separators")
{
const char *input = "Привет мир\nКак дела?";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82?= "
"=?UTF-8?Q?=D0=BC=D0=B8=D1=80?=\n"
@@ -72,7 +73,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles empty input")
{
const char *input = "";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr ? output_cstr : "");
std::string expected_output = "";
CHECK(output == expected_output);
@@ -82,7 +83,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles input with only separators")
{
const char *input = " \r\n()";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = " \r\n()";
CHECK(output == expected_output);
@@ -92,7 +93,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles non-ASCII separators")
{
const char *input = "こんにちは(世界)";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?="
"(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)";
@@ -103,7 +104,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles input starting with separator")
{
const char *input = " (Hello)";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = " (Hello)";
CHECK(output == expected_output);
@@ -113,7 +114,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles input ending with separator")
{
const char *input = "Hello) ";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "Hello) ";
CHECK(output == expected_output);
@@ -123,7 +124,7 @@ TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles consecutive non-ASCII pieces")
{
const char *input = "你好世界";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?=";
CHECK(output == expected_output);
@@ -133,7 +134,7 @@ TEST_SUITE("rfc2047 encode")
{
// Input string consisting of repeated non-ASCII characters
const char *input = "これはとても長いテキストで、エンコードされたワードが76文字を超える必要があります。";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7?="
"=?UTF-8?Q?=E3=81=84=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81?="
@@ -151,7 +152,7 @@ TEST_SUITE("rfc2047 encode")
// Input string consisting of repeated ASCII characters
std::string input_str(100, 'A');// 100 'A's
const char *input = input_str.c_str();
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = input_str;
@@ -164,7 +165,7 @@ TEST_SUITE("rfc2047 encode")
// Input string with mix of ASCII and non-ASCII characters forming long pieces
const char *input = "ASCII_Text "
"これは非常に長い非ASCIIテキストで、エンコードが必要になります。";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
// Expected output: ASCII text as-is, non-ASCII text encoded and split accordingly
@@ -184,7 +185,7 @@ TEST_SUITE("rfc2047 encode")
const char *input =
"非常に長い非ASCII文字列を使用してエンコードワードの分割をテストします。"
"データが長すぎる場合、正しく分割されるべきです。";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output =