}
char *
-rspamd_mime_header_encode(const char *in, gsize len)
+rspamd_mime_header_encode(const char *in, gsize len, bool is_structured)
{
static const size_t max_token_size = 76 - 12; /* 12 is the length of "=?UTF-8?Q??="; */
GString *outbuf = g_string_sized_new(len);
p++;
}
else {
- size_t remain = end - p;
- gsize next_offset = rspamd_memcspn(p, " \r\n()", MIN(max_token_size, remain));
- const char *q = p + next_offset;
+ const char *q = end;
size_t piece_len = q - p, encoded_len = 0;
/* Check if the piece contains non-ASCII characters */
- gboolean has_non_ascii = FALSE;
+ gboolean need_encoding = FALSE;
+ size_t unencoded_prefix = 0, unencoded_suffix = 0;
for (size_t i = 0; i < piece_len; i++) {
- if ((unsigned char) p[i] >= 128) {
- has_non_ascii = TRUE;
+ unsigned char c = p[i];
+ if (c >= 128 || (is_structured && !g_ascii_isalnum(c))) {
+ need_encoding = TRUE;
+ unencoded_suffix = 0;
encoded_len += 3;
if (encoded_len > max_token_size) {
else {
encoded_len++;
+ if (!need_encoding) {
+ unencoded_prefix++;
+ }
+ else {
+ unencoded_suffix++;
+ }
+
if (encoded_len > max_token_size) {
piece_len = i;
q = p + piece_len;
/* No more space */
break;
}
+
+ if (need_encoding && (c == '(' || c == ')')) {
+ /* If we need to encode, we must stop on comments characters */
+ piece_len = i + 1;
+ q = p + piece_len;
+ /* No more space */
+ break;
+ }
}
}
- if (has_non_ascii) {
+ if (need_encoding) {
+ g_string_append_len(outbuf, p, unencoded_prefix);
+ p += unencoded_prefix;
g_string_append(outbuf, "=?UTF-8?Q?");
/* Do encode */
- encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, max_token_size + 3);
+ encoded_len = rspamd_encode_qp2047_buf(p, piece_len - unencoded_prefix - unencoded_suffix,
+ encode_buf, max_token_size + 3);
+ p += piece_len - unencoded_prefix - unencoded_suffix;
g_string_append_len(outbuf, encode_buf, encoded_len);
g_string_append(outbuf, "?=");
+ g_string_append_len(outbuf, p, unencoded_suffix);
}
else {
/* No transformation */
#include "doctest/doctest.h"
#include <string>
+#include "libutil/mem_pool.h"
#include "libmime/mime_headers.h"
TEST_SUITE("rfc2047 encode")
TEST_CASE("rspamd_mime_header_encode handles ASCII-only input")
{
const char *input = "Hello World";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "Hello World";
CHECK(output == expected_output);
TEST_CASE("rspamd_mime_header_encode handles input with non-ASCII characters")
{
const char *input = "Hello Мир";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?=";
CHECK(output == expected_output);
TEST_CASE("rspamd_mime_header_encode handles mixed input with separators")
{
const char *input = "ололо (ололо test) test";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= "
"(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test) test";
TEST_CASE("rspamd_mime_header_encode handles multiple spaces and separators")
{
const char *input = "Привет мир\nКак дела?";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82?= "
"=?UTF-8?Q?=D0=BC=D0=B8=D1=80?=\n"
TEST_CASE("rspamd_mime_header_encode handles empty input")
{
const char *input = "";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr ? output_cstr : "");
std::string expected_output = "";
CHECK(output == expected_output);
TEST_CASE("rspamd_mime_header_encode handles input with only separators")
{
const char *input = " \r\n()";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = " \r\n()";
CHECK(output == expected_output);
TEST_CASE("rspamd_mime_header_encode handles non-ASCII separators")
{
const char *input = "こんにちは(世界)";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?="
"(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)";
TEST_CASE("rspamd_mime_header_encode handles input starting with separator")
{
const char *input = " (Hello)";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = " (Hello)";
CHECK(output == expected_output);
TEST_CASE("rspamd_mime_header_encode handles input ending with separator")
{
const char *input = "Hello) ";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "Hello) ";
CHECK(output == expected_output);
TEST_CASE("rspamd_mime_header_encode handles consecutive non-ASCII pieces")
{
const char *input = "你好世界";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?=";
CHECK(output == expected_output);
{
// Input string consisting of repeated non-ASCII characters
const char *input = "これはとても長いテキストで、エンコードされたワードが76文字を超える必要があります。";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7?="
"=?UTF-8?Q?=E3=81=84=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81?="
// Input string consisting of repeated ASCII characters
std::string input_str(100, 'A');// 100 'A's
const char *input = input_str.c_str();
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output = input_str;
// Input string with mix of ASCII and non-ASCII characters forming long pieces
const char *input = "ASCII_Text "
"これは非常に長い非ASCIIテキストで、エンコードが必要になります。";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
// Expected output: ASCII text as-is, non-ASCII text encoded and split accordingly
const char *input =
"非常に長い非ASCII文字列を使用してエンコードワードの分割をテストします。"
"データが長すぎる場合、正しく分割されるべきです。";
- char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false);
std::string output(output_cstr);
std::string expected_output =