From 0806e4d11bcc08bdc3b8efbf55c372f844b0a722 Mon Sep 17 00:00:00 2001
From: Vsevolod Stakhov <vsevolod@rspamd.com>
Date: Sat, 16 Nov 2024 17:46:44 +0000
Subject: [PATCH] [Fix] Some more fixes

---
 src/libmime/mime_headers.c       |  14 +-
 test/rspamd_cxx_unit.cxx         |   7 +-
 test/rspamd_cxx_unit_rfc2047.hxx | 212 +++++++++++++++++++++++++++++++
 3 files changed, 226 insertions(+), 7 deletions(-)
 create mode 100644 test/rspamd_cxx_unit_rfc2047.hxx
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 9d11210f3..3565eefba 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -818,10 +818,9 @@ rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in,
 char *
 rspamd_mime_header_encode(const char *in, gsize len)
 {
-	static const size_t max_token_size = 76;
+	static const size_t max_token_size = 76 - (sizeof("=?UTF-8?Q? ?=") - 3);
 	GString *outbuf = g_string_sized_new(len);
-	size_t encode_buf_size = max_token_size;
-	char *encode_buf = g_alloca(encode_buf_size + 3);
+	char *encode_buf = g_alloca(max_token_size + 3);
 	const char *p = in;
 	const char *end = in + len;
 
@@ -853,13 +852,20 @@ rspamd_mime_header_encode(const char *in, gsize len)
 				}
 				else {
 					encoded_len++;
+
+					if (encoded_len > max_token_size) {
+						piece_len = i - 1;
+						q = p + piece_len;
+						/* No more space */
+						break;
+					}
 				}
 			}
 
 			if (has_non_ascii) {
 				g_string_append(outbuf, "=?UTF-8?Q?");
 				/* Do encode */
-				gssize encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, encode_buf_size);
+				encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, max_token_size);
 				g_string_append_len(outbuf, encode_buf, encoded_len);
 				g_string_append(outbuf, "?=");
 			}
diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx
index b7cb0c6bf..ff323fb85 100644
--- a/test/rspamd_cxx_unit.cxx
+++ b/test/rspamd_cxx_unit.cxx
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2021 Vsevolod Stakhov
+/*
+ * Copyright 2024 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,6 +25,7 @@
 #include "rspamd_cxx_local_ptr.hxx"
 #include "rspamd_cxx_unit_dkim.hxx"
 #include "rspamd_cxx_unit_cryptobox.hxx"
+#include "rspamd_cxx_unit_rfc2047.hxx"
 
 static gboolean verbose = false;
 static const GOptionEntry entries[] =
diff --git a/test/rspamd_cxx_unit_rfc2047.hxx b/test/rspamd_cxx_unit_rfc2047.hxx
new file mode 100644
index 000000000..6f2a42414
--- /dev/null
+++ b/test/rspamd_cxx_unit_rfc2047.hxx
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_RSPAMD_CXX_UNIT_RFC2047_HXX
+#define RSPAMD_RSPAMD_CXX_UNIT_RFC2047_HXX
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+#include <string>
+#include "libmime/mime_headers.h"
+
+TEST_SUITE("rfc2047 encode")
+{
+	TEST_CASE("rspamd_mime_header_encode handles ASCII-only input")
+	{
+		const char *input = "Hello World";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = "Hello World";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles input with non-ASCII characters")
+	{
+		const char *input = "Hello ÐÐ¸Ñ";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?=";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles mixed input with separators")
+	{
+		const char *input = "Ð¾Ð»Ð¾Ð»Ð¾ (Ð¾Ð»Ð¾Ð»Ð¾ test)    test";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= "
+									  "(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test)    test";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles multiple spaces and separators")
+	{
+		const char *input = "ÐÑÐ¸Ð²ÐµÑ    Ð¼Ð¸Ñ\nÐÐ°Ðº Ð´ÐµÐ»Ð°?";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82?=    "
+									  "=?UTF-8?Q?=D0=BC=D0=B8=D1=80?=\n"
+									  "=?UTF-8?Q?=D0=9A=D0=B0=D0=BA?= "
+									  "=?UTF-8?Q?=D0=B4=D0=B5=D0=BB=D0=B0=3F?=";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles empty input")
+	{
+		const char *input = "";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr ? output_cstr : "");
+		std::string expected_output = "";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles input with only separators")
+	{
+		const char *input = " \r\n()";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = " \r\n()";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles non-ASCII separators")
+	{
+		const char *input = "ããã«ã¡ã¯(ä¸ç)";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?="
+									  "(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles input starting with separator")
+	{
+		const char *input = " (Hello)";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = " (Hello)";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles input ending with separator")
+	{
+		const char *input = "Hello) ";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = "Hello) ";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles consecutive non-ASCII pieces")
+	{
+		const char *input = "ä½ å¥½ä¸ç";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?=";
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+	TEST_CASE("rspamd_mime_header_encode handles long non-ASCII input requiring encoded-word splitting")
+	{
+		// Input string consisting of repeated non-ASCII characters
+		const char *input = "ããã¯ã¨ã¦ãé·ããã­ã¹ãã§ãã¨ã³ã³ã¼ããããã¯ã¼ãã76æå­ãè¶ããå¿è¦ãããã¾ãã";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+
+		// Expected output with proper splitting into multiple encoded-words
+		// The actual encoding would produce a long string; we need to split it into parts
+		// Each encoded-word should be less than or equal to 76 characters (including the '=?UTF-8?Q?' prefix and '?=' suffix)
+		// For our mock, we'll simulate the splitting
+
+		// For simplicity in this test, we assume that the encoded output, after encoding and wrapping with '=?UTF-8?Q?' and '?=', is split correctly.
+
+		// Construct the expected output manually (in practice, you may want to write a helper to split it)
+		std::string expected_output = "=?UTF-8?Q?"
+									  "=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7=E3=81=84=E3=83=86=E3=82=AD?= "
+									  "=?UTF-8?Q?=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=81=95?= "
+									  "=?UTF-8?Q?=E3=82=8C=E3=81=9F=E3=83=AF=E3=83=BC=E3=83=89=E3=81=8C76=E6=96=87=E5=AD=97=E3=82=92=E8?= "
+									  "=?UTF-8?Q?=B6=85=E3=81=88=E3=82=8B=E5=BF=85=E8=A6=81=E3=81=8C=E3=81=82=E3=82=8A=E3=81=BE=E3=81=99?=.";
+
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles long ASCII input without encoding")
+	{
+		// Input string consisting of repeated ASCII characters
+		std::string input_str(100, 'A');// 100 'A's
+		const char *input = input_str.c_str();
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+		std::string expected_output = input_str;
+
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("rspamd_mime_header_encode handles long mixed input requiring encoded-word splitting")
+	{
+		// Input string with mix of ASCII and non-ASCII characters forming long pieces
+		const char *input = "ASCII_Text "
+							"ããã¯éå¸¸ã«é·ãéASCIIãã­ã¹ãã§ãã¨ã³ã³ã¼ããå¿è¦ã«ãªãã¾ãã";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+
+		// Expected output: ASCII text as-is, non-ASCII text encoded and split accordingly
+		std::string expected_output = "ASCII_Text "
+									  "=?UTF-8?Q?"
+									  "=E3=81=93=E3=82=8C=E3=81=AF=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E3=83=86?= "
+									  "=?UTF-8?Q?=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89?= "
+									  "=?UTF-8?Q?=E3=81=8C=E5=BF=85=E8=A6=81=E3=81=AB=E3=81=AA=E3=82=8A=E3=81=BE=E3=81=99=E3=80=82?=";
+
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+
+	TEST_CASE("process_string handles very long non-ASCII word requiring multiple splits")
+	{
+		const char *input =
+			"éå¸¸ã«é·ãéASCIIæå­åãä½¿ç¨ãã¦ã¨ã³ã³ã¼ãã¯ã¼ãã®åå²ããã¹ããã¾ãã"
+			"ãã¼ã¿ãé·ãããå ´åãæ­£ããåå²ãããã¹ãã§ãã";
+		char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+		std::string output(output_cstr);
+
+		std::string expected_output =
+			"=?UTF-8?Q?=E9=9D=9E=E5=B8=B6=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E6?="
+			"=?UTF-8?Q?=96=87=E5=AD=97=E5=88=97=E3=82=92=E4=BD=BF=E7=94=A8=E3=81=97?="
+			"=?UTF-8?Q?=E3=81=A6=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=83=AF?="
+			"=?UTF-8?Q?=E3=83=BC=E3=83=89=E3=81=AE=E5=88=86=E5=89=B2=E3=82=92=E3=83=86?="
+			"=?UTF-8?Q?=E3=82=B9=E3=83=88=E3=81=97=E3=81=BE=E3=81=99=E3=80=82=E3=83=87?="
+			"=?UTF-8?Q?=E3=83=BC=E3=82=BF=E3=81=8C=E9=95=B7=E3=81=99=E3=81=8E=E3=82=8B?="
+			"=?UTF-8?Q?=E5=A0=B4=E5=90=88=E3=80=81=E6=AD=A3=E3=81=97=E3=81=8F=E5=88=86?="
+			"=?UTF-8?Q?=E5=89=B2=E3=81=95=E3=82=8C=E3=82=8B=E3=81=B9=E3=81=8D=E3=81=A7?="
+			"=?UTF-8?Q?=E3=81=99=E3=80=82?=";// â¤76 chars
+
+		CHECK(output == expected_output);
+		g_free(output_cstr);
+	}
+}
+#endif
-- 
2.39.5