diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-18 16:31:15 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-18 16:31:15 +0000 |
commit | f286abaac361de836a276172ce9e46e4c058b75d (patch) | |
tree | 235a4b6684111d6db31d527f6e7918f7f9020cf1 | |
parent | a282883e6f9d70a787970e92dc3d7644661cd8a3 (diff) | |
download | rspamd-f286abaac361de836a276172ce9e46e4c058b75d.tar.gz rspamd-f286abaac361de836a276172ce9e46e4c058b75d.zip |
[Fix] More fixes to rfc2047 encoding
-rw-r--r-- | src/libmime/mime_headers.c | 39 | ||||
-rw-r--r-- | src/libmime/mime_headers.h | 5 | ||||
-rw-r--r-- | src/libserver/protocol.c | 2 | ||||
-rw-r--r-- | src/lua/lua_util.c | 13 | ||||
-rw-r--r-- | test/rspamd_cxx_unit_rfc2047.hxx | 29 |
5 files changed, 58 insertions, 30 deletions
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index e4d2ca458..a511f5e36 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -816,7 +816,7 @@ rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in, } char * -rspamd_mime_header_encode(const char *in, gsize len) +rspamd_mime_header_encode(const char *in, gsize len, bool is_structured) { static const size_t max_token_size = 76 - 12; /* 12 is the length of "=?UTF-8?Q??="; */ GString *outbuf = g_string_sized_new(len); @@ -831,16 +831,17 @@ rspamd_mime_header_encode(const char *in, gsize len) p++; } else { - size_t remain = end - p; - gsize next_offset = rspamd_memcspn(p, " \r\n()", MIN(max_token_size, remain)); - const char *q = p + next_offset; + const char *q = end; size_t piece_len = q - p, encoded_len = 0; /* Check if the piece contains non-ASCII characters */ - gboolean has_non_ascii = FALSE; + gboolean need_encoding = FALSE; + size_t unencoded_prefix = 0, unencoded_suffix = 0; for (size_t i = 0; i < piece_len; i++) { - if ((unsigned char) p[i] >= 128) { - has_non_ascii = TRUE; + unsigned char c = p[i]; + if (c >= 128 || (is_structured && !g_ascii_isalnum(c))) { + need_encoding = TRUE; + unencoded_suffix = 0; encoded_len += 3; if (encoded_len > max_token_size) { @@ -853,21 +854,41 @@ rspamd_mime_header_encode(const char *in, gsize len) else { encoded_len++; + if (!need_encoding) { + unencoded_prefix++; + } + else { + unencoded_suffix++; + } + if (encoded_len > max_token_size) { piece_len = i; q = p + piece_len; /* No more space */ break; } + + if (need_encoding && (c == '(' || c == ')')) { + /* If we need to encode, we must stop on comments characters */ + piece_len = i + 1; + q = p + piece_len; + /* No more space */ + break; + } } } - if (has_non_ascii) { + if (need_encoding) { + g_string_append_len(outbuf, p, unencoded_prefix); + p += unencoded_prefix; g_string_append(outbuf, "=?UTF-8?Q?"); /* Do encode */ - encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, max_token_size + 3); + encoded_len = rspamd_encode_qp2047_buf(p, piece_len - unencoded_prefix - unencoded_suffix, + encode_buf, max_token_size + 3); + p += piece_len - unencoded_prefix - unencoded_suffix; g_string_append_len(outbuf, encode_buf, encoded_len); g_string_append(outbuf, "?="); + g_string_append_len(outbuf, p, unencoded_suffix); } else { /* No transformation */ diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index 9f89daece..290f94799 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -1,5 +1,5 @@ /* - * Copyright 2023 Vsevolod Stakhov + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -100,9 +100,10 @@ char *rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in, * Encode mime header if needed * @param in * @param len + * @param is_structured if true, then we encode as structured header (e.g. encode all non alpha-numeric characters) * @return newly allocated encoded header */ -char *rspamd_mime_header_encode(const char *in, gsize len); +char *rspamd_mime_header_encode(const char *in, gsize len, bool is_structured); /** * Generate new unique message id diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 2dc641dfe..1196d2d14 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -1046,7 +1046,7 @@ rspamd_protocol_rewrite_subject(struct rspamd_task *task) g_string_append_len(subj_buf, c, p - c); } - res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len); + res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len, false); rspamd_mempool_add_destructor(task->task_pool, (rspamd_mempool_destruct_t) g_free, diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 251d1e1e7..e92e4977a 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -644,9 +644,10 @@ LUA_FUNCTION_DEF(util, get_hostname); LUA_FUNCTION_DEF(util, parse_content_type); /*** - * @function util.mime_header_encode(hdr) + * @function util.mime_header_encode(hdr[, is_structured]) * Encodes header if needed * @param {string} hdr input header + * @param {boolean} is_structured if true, then we encode as structured header (e.g. encode all non alpha-numeric characters) * @return encoded header */ LUA_FUNCTION_DEF(util, mime_header_encode); @@ -2406,15 +2407,19 @@ static int lua_util_mime_header_encode(lua_State *L) { LUA_TRACE_POINT; - gsize len; - const char *hdr = luaL_checklstring(L, 1, &len); + struct rspamd_lua_text *hdr = lua_check_text_or_string(L, 1); char *encoded; + bool is_structured = false; if (!hdr) { return luaL_error(L, "invalid arguments"); } - encoded = rspamd_mime_header_encode(hdr, len); + if (lua_isboolean(L, 2)) { + is_structured = lua_toboolean(L, 2); + } + + encoded = rspamd_mime_header_encode(hdr->start, hdr->len, is_structured); lua_pushstring(L, encoded); g_free(encoded); diff --git a/test/rspamd_cxx_unit_rfc2047.hxx b/test/rspamd_cxx_unit_rfc2047.hxx index e66c79340..25afd15d5 100644 --- a/test/rspamd_cxx_unit_rfc2047.hxx +++ b/test/rspamd_cxx_unit_rfc2047.hxx @@ -21,6 +21,7 @@ #include "doctest/doctest.h" #include <string> +#include "libutil/mem_pool.h" #include "libmime/mime_headers.h" TEST_SUITE("rfc2047 encode") @@ -28,7 +29,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles ASCII-only input") { const char *input = "Hello World"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = "Hello World"; CHECK(output == expected_output); @@ -38,7 +39,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles input with non-ASCII characters") { const char *input = "Hello Мир"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?="; CHECK(output == expected_output); @@ -48,7 +49,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles mixed input with separators") { const char *input = "ололо (ололо test) test"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= " "(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test) test"; @@ -59,7 +60,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles multiple spaces and separators") { const char *input = "Привет мир\nКак дела?"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82?= " "=?UTF-8?Q?=D0=BC=D0=B8=D1=80?=\n" @@ -72,7 +73,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles empty input") { const char *input = ""; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr ? output_cstr : ""); std::string expected_output = ""; CHECK(output == expected_output); @@ -82,7 +83,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles input with only separators") { const char *input = " \r\n()"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = " \r\n()"; CHECK(output == expected_output); @@ -92,7 +93,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles non-ASCII separators") { const char *input = "こんにちは(世界)"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?=" "(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)"; @@ -103,7 +104,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles input starting with separator") { const char *input = " (Hello)"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = " (Hello)"; CHECK(output == expected_output); @@ -113,7 +114,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles input ending with separator") { const char *input = "Hello) "; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = "Hello) "; CHECK(output == expected_output); @@ -123,7 +124,7 @@ TEST_SUITE("rfc2047 encode") TEST_CASE("rspamd_mime_header_encode handles consecutive non-ASCII pieces") { const char *input = "你好世界"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?="; CHECK(output == expected_output); @@ -133,7 +134,7 @@ TEST_SUITE("rfc2047 encode") { // Input string consisting of repeated non-ASCII characters const char *input = "これはとても長いテキストで、エンコードされたワードが76文字を超える必要があります。"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7?=" "=?UTF-8?Q?=E3=81=84=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81?=" @@ -151,7 +152,7 @@ TEST_SUITE("rfc2047 encode") // Input string consisting of repeated ASCII characters std::string input_str(100, 'A');// 100 'A's const char *input = input_str.c_str(); - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = input_str; @@ -164,7 +165,7 @@ TEST_SUITE("rfc2047 encode") // Input string with mix of ASCII and non-ASCII characters forming long pieces const char *input = "ASCII_Text " "これは非常に長い非ASCIIテキストで、エンコードが必要になります。"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); // Expected output: ASCII text as-is, non-ASCII text encoded and split accordingly @@ -184,7 +185,7 @@ TEST_SUITE("rfc2047 encode") const char *input = "非常に長い非ASCII文字列を使用してエンコードワードの分割をテストします。" "データが長すぎる場合、正しく分割されるべきです。"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); std::string output(output_cstr); std::string expected_output = |