aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libmime/mime_headers.c14
-rw-r--r--test/rspamd_cxx_unit.cxx7
-rw-r--r--test/rspamd_cxx_unit_rfc2047.hxx212
3 files changed, 226 insertions, 7 deletions
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 9d11210f3..3565eefba 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -818,10 +818,9 @@ rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in,
char *
rspamd_mime_header_encode(const char *in, gsize len)
{
- static const size_t max_token_size = 76;
+ static const size_t max_token_size = 76 - (sizeof("=?UTF-8?Q? ?=") - 3);
GString *outbuf = g_string_sized_new(len);
- size_t encode_buf_size = max_token_size;
- char *encode_buf = g_alloca(encode_buf_size + 3);
+ char *encode_buf = g_alloca(max_token_size + 3);
const char *p = in;
const char *end = in + len;
@@ -853,13 +852,20 @@ rspamd_mime_header_encode(const char *in, gsize len)
}
else {
encoded_len++;
+
+ if (encoded_len > max_token_size) {
+ piece_len = i - 1;
+ q = p + piece_len;
+ /* No more space */
+ break;
+ }
}
}
if (has_non_ascii) {
g_string_append(outbuf, "=?UTF-8?Q?");
/* Do encode */
- gssize encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, encode_buf_size);
+ encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, max_token_size);
g_string_append_len(outbuf, encode_buf, encoded_len);
g_string_append(outbuf, "?=");
}
diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx
index b7cb0c6bf..ff323fb85 100644
--- a/test/rspamd_cxx_unit.cxx
+++ b/test/rspamd_cxx_unit.cxx
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2021 Vsevolod Stakhov
+/*
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,6 +25,7 @@
#include "rspamd_cxx_local_ptr.hxx"
#include "rspamd_cxx_unit_dkim.hxx"
#include "rspamd_cxx_unit_cryptobox.hxx"
+#include "rspamd_cxx_unit_rfc2047.hxx"
static gboolean verbose = false;
static const GOptionEntry entries[] =
diff --git a/test/rspamd_cxx_unit_rfc2047.hxx b/test/rspamd_cxx_unit_rfc2047.hxx
new file mode 100644
index 000000000..6f2a42414
--- /dev/null
+++ b/test/rspamd_cxx_unit_rfc2047.hxx
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_RSPAMD_CXX_UNIT_RFC2047_HXX
+#define RSPAMD_RSPAMD_CXX_UNIT_RFC2047_HXX
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+#include <string>
+#include "libmime/mime_headers.h"
+
+TEST_SUITE("rfc2047 encode")
+{
+ TEST_CASE("rspamd_mime_header_encode handles ASCII-only input")
+ {
+ const char *input = "Hello World";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = "Hello World";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles input with non-ASCII characters")
+ {
+ const char *input = "Hello Мир";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?=";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles mixed input with separators")
+ {
+ const char *input = "ололо (ололо test) test";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= "
+ "(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test) test";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles multiple spaces and separators")
+ {
+ const char *input = "Привет мир\nКак дела?";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82?= "
+ "=?UTF-8?Q?=D0=BC=D0=B8=D1=80?=\n"
+ "=?UTF-8?Q?=D0=9A=D0=B0=D0=BA?= "
+ "=?UTF-8?Q?=D0=B4=D0=B5=D0=BB=D0=B0=3F?=";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles empty input")
+ {
+ const char *input = "";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr ? output_cstr : "");
+ std::string expected_output = "";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles input with only separators")
+ {
+ const char *input = " \r\n()";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = " \r\n()";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles non-ASCII separators")
+ {
+ const char *input = "こんにちは(世界)";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?="
+ "(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles input starting with separator")
+ {
+ const char *input = " (Hello)";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = " (Hello)";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles input ending with separator")
+ {
+ const char *input = "Hello) ";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = "Hello) ";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles consecutive non-ASCII pieces")
+ {
+ const char *input = "你好世界";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?=";
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+ TEST_CASE("rspamd_mime_header_encode handles long non-ASCII input requiring encoded-word splitting")
+ {
+ // Input string consisting of repeated non-ASCII characters
+ const char *input = "これはとても長いテキストで、エンコードされたワードが76文字を超える必要があります。";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+
+ // Expected output with proper splitting into multiple encoded-words
+ // The actual encoding would produce a long string; we need to split it into parts
+ // Each encoded-word should be less than or equal to 76 characters (including the '=?UTF-8?Q?' prefix and '?=' suffix)
+ // For our mock, we'll simulate the splitting
+
+ // For simplicity in this test, we assume that the encoded output, after encoding and wrapping with '=?UTF-8?Q?' and '?=', is split correctly.
+
+ // Construct the expected output manually (in practice, you may want to write a helper to split it)
+ std::string expected_output = "=?UTF-8?Q?"
+ "=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7=E3=81=84=E3=83=86=E3=82=AD?= "
+ "=?UTF-8?Q?=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=81=95?= "
+ "=?UTF-8?Q?=E3=82=8C=E3=81=9F=E3=83=AF=E3=83=BC=E3=83=89=E3=81=8C76=E6=96=87=E5=AD=97=E3=82=92=E8?= "
+ "=?UTF-8?Q?=B6=85=E3=81=88=E3=82=8B=E5=BF=85=E8=A6=81=E3=81=8C=E3=81=82=E3=82=8A=E3=81=BE=E3=81=99?=.";
+
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles long ASCII input without encoding")
+ {
+ // Input string consisting of repeated ASCII characters
+ std::string input_str(100, 'A');// 100 'A's
+ const char *input = input_str.c_str();
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+ std::string expected_output = input_str;
+
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("rspamd_mime_header_encode handles long mixed input requiring encoded-word splitting")
+ {
+ // Input string with mix of ASCII and non-ASCII characters forming long pieces
+ const char *input = "ASCII_Text "
+ "これは非常に長い非ASCIIテキストで、エンコードが必要になります。";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+
+ // Expected output: ASCII text as-is, non-ASCII text encoded and split accordingly
+ std::string expected_output = "ASCII_Text "
+ "=?UTF-8?Q?"
+ "=E3=81=93=E3=82=8C=E3=81=AF=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E3=83=86?= "
+ "=?UTF-8?Q?=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89?= "
+ "=?UTF-8?Q?=E3=81=8C=E5=BF=85=E8=A6=81=E3=81=AB=E3=81=AA=E3=82=8A=E3=81=BE=E3=81=99=E3=80=82?=";
+
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+
+ TEST_CASE("process_string handles very long non-ASCII word requiring multiple splits")
+ {
+ const char *input =
+ "非常に長い非ASCII文字列を使用してエンコードワードの分割をテストします。"
+ "データが長すぎる場合、正しく分割されるべきです。";
+ char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+ std::string output(output_cstr);
+
+ std::string expected_output =
+ "=?UTF-8?Q?=E9=9D=9E=E5=B8=B6=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E6?="
+ "=?UTF-8?Q?=96=87=E5=AD=97=E5=88=97=E3=82=92=E4=BD=BF=E7=94=A8=E3=81=97?="
+ "=?UTF-8?Q?=E3=81=A6=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=83=AF?="
+ "=?UTF-8?Q?=E3=83=BC=E3=83=89=E3=81=AE=E5=88=86=E5=89=B2=E3=82=92=E3=83=86?="
+ "=?UTF-8?Q?=E3=82=B9=E3=83=88=E3=81=97=E3=81=BE=E3=81=99=E3=80=82=E3=83=87?="
+ "=?UTF-8?Q?=E3=83=BC=E3=82=BF=E3=81=8C=E9=95=B7=E3=81=99=E3=81=8E=E3=82=8B?="
+ "=?UTF-8?Q?=E5=A0=B4=E5=90=88=E3=80=81=E6=AD=A3=E3=81=97=E3=81=8F=E5=88=86?="
+ "=?UTF-8?Q?=E5=89=B2=E3=81=95=E3=82=8C=E3=82=8B=E3=81=B9=E3=81=8D=E3=81=A7?="
+ "=?UTF-8?Q?=E3=81=99=E3=80=82?=";// ≤76 chars
+
+ CHECK(output == expected_output);
+ g_free(output_cstr);
+ }
+}
+#endif