[Fix] Some more fixes

author Vsevolod Stakhov <vsevolod@rspamd.com>

Sat, 16 Nov 2024 17:46:44 +0000 (17:46 +0000)

committer Vsevolod Stakhov <vsevolod@rspamd.com>

Sat, 16 Nov 2024 17:46:44 +0000 (17:46 +0000)
author Vsevolod Stakhov <vsevolod@rspamd.com>
Sat, 16 Nov 2024 17:46:44 +0000 (17:46 +0000)
committer Vsevolod Stakhov <vsevolod@rspamd.com>
Sat, 16 Nov 2024 17:46:44 +0000 (17:46 +0000)
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c

index 9d11210f3762b2bf92fcbd5b384a36671a02c8cb..3565eefba90899980f6be8519940a2cdf1d13de0 100644 (file)
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -818,10 +818,9 @@ rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in,
  char *
  rspamd_mime_header_encode(const char *in, gsize len)
  {
-       static const size_t max_token_size = 76;
+       static const size_t max_token_size = 76 - (sizeof("=?UTF-8?Q? ?=") - 3);
         GString *outbuf = g_string_sized_new(len);
-       size_t encode_buf_size = max_token_size;
-       char *encode_buf = g_alloca(encode_buf_size + 3);
+       char *encode_buf = g_alloca(max_token_size + 3);
         const char *p = in;
         const char *end = in + len;
  
@@ -853,13 +852,20 @@ rspamd_mime_header_encode(const char *in, gsize len)
                                 }
                                 else {
                                         encoded_len++;
+
+                                       if (encoded_len > max_token_size) {
+                                               piece_len = i - 1;
+                                               q = p + piece_len;
+                                               /* No more space */
+                                               break;
+                                       }
                                 }
                         }
  
                         if (has_non_ascii) {
                                 g_string_append(outbuf, "=?UTF-8?Q?");
                                 /* Do encode */
-                               gssize encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, encode_buf_size);
+                               encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, max_token_size);
                                 g_string_append_len(outbuf, encode_buf, encoded_len);
                                 g_string_append(outbuf, "?=");
                         }
diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx

index b7cb0c6bf40c8d22540fc0af7693e6e406d4b2e6..ff323fb85d3384c83944b5ddc453f0f068fb1e7f 100644 (file)
--- a/test/rspamd_cxx_unit.cxx
+++ b/test/rspamd_cxx_unit.cxx
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2021 Vsevolod Stakhov
+/*
+ * Copyright 2024 Vsevolod Stakhov
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,6 +25,7 @@
  #include "rspamd_cxx_local_ptr.hxx"
  #include "rspamd_cxx_unit_dkim.hxx"
  #include "rspamd_cxx_unit_cryptobox.hxx"
+#include "rspamd_cxx_unit_rfc2047.hxx"
  
  static gboolean verbose = false;
  static const GOptionEntry entries[] =
diff --git a/test/rspamd_cxx_unit_rfc2047.hxx b/test/rspamd_cxx_unit_rfc2047.hxx

new file mode 100644 (file)

index 0000000..6f2a424
--- /dev/null
+++ b/test/rspamd_cxx_unit_rfc2047.hxx
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_RSPAMD_CXX_UNIT_RFC2047_HXX
+#define RSPAMD_RSPAMD_CXX_UNIT_RFC2047_HXX
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+#include <string>
+#include "libmime/mime_headers.h"
+
+TEST_SUITE("rfc2047 encode")
+{
+       TEST_CASE("rspamd_mime_header_encode handles ASCII-only input")
+       {
+               const char *input = "Hello World";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = "Hello World";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles input with non-ASCII characters")
+       {
+               const char *input = "Hello Мир";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?=";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles mixed input with separators")
+       {
+               const char *input = "ололо (ололо test)    test";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= "
+                                                                         "(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test)    test";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles multiple spaces and separators")
+       {
+               const char *input = "Привет    мир\nКак дела?";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82?=    "
+                                                                         "=?UTF-8?Q?=D0=BC=D0=B8=D1=80?=\n"
+                                                                         "=?UTF-8?Q?=D0=9A=D0=B0=D0=BA?= "
+                                                                         "=?UTF-8?Q?=D0=B4=D0=B5=D0=BB=D0=B0=3F?=";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles empty input")
+       {
+               const char *input = "";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr ? output_cstr : "");
+               std::string expected_output = "";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles input with only separators")
+       {
+               const char *input = " \r\n()";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = " \r\n()";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles non-ASCII separators")
+       {
+               const char *input = "こんにちは(世界)";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?="
+                                                                         "(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles input starting with separator")
+       {
+               const char *input = " (Hello)";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = " (Hello)";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles input ending with separator")
+       {
+               const char *input = "Hello) ";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = "Hello) ";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles consecutive non-ASCII pieces")
+       {
+               const char *input = "你好世界";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?=";
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+       TEST_CASE("rspamd_mime_header_encode handles long non-ASCII input requiring encoded-word splitting")
+       {
+               // Input string consisting of repeated non-ASCII characters
+               const char *input = "これはとても長いテキストで、エンコードされたワードが76文字を超える必要があります。";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+
+               // Expected output with proper splitting into multiple encoded-words
+               // The actual encoding would produce a long string; we need to split it into parts
+               // Each encoded-word should be less than or equal to 76 characters (including the '=?UTF-8?Q?' prefix and '?=' suffix)
+               // For our mock, we'll simulate the splitting
+
+               // For simplicity in this test, we assume that the encoded output, after encoding and wrapping with '=?UTF-8?Q?' and '?=', is split correctly.
+
+               // Construct the expected output manually (in practice, you may want to write a helper to split it)
+               std::string expected_output = "=?UTF-8?Q?"
+                                                                         "=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7=E3=81=84=E3=83=86=E3=82=AD?= "
+                                                                         "=?UTF-8?Q?=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=81=95?= "
+                                                                         "=?UTF-8?Q?=E3=82=8C=E3=81=9F=E3=83=AF=E3=83=BC=E3=83=89=E3=81=8C76=E6=96=87=E5=AD=97=E3=82=92=E8?= "
+                                                                         "=?UTF-8?Q?=B6=85=E3=81=88=E3=82=8B=E5=BF=85=E8=A6=81=E3=81=8C=E3=81=82=E3=82=8A=E3=81=BE=E3=81=99?=.";
+
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles long ASCII input without encoding")
+       {
+               // Input string consisting of repeated ASCII characters
+               std::string input_str(100, 'A');// 100 'A's
+               const char *input = input_str.c_str();
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+               std::string expected_output = input_str;
+
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("rspamd_mime_header_encode handles long mixed input requiring encoded-word splitting")
+       {
+               // Input string with mix of ASCII and non-ASCII characters forming long pieces
+               const char *input = "ASCII_Text "
+                                                       "これは非常に長い非ASCIIテキストで、エンコードが必要になります。";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+
+               // Expected output: ASCII text as-is, non-ASCII text encoded and split accordingly
+               std::string expected_output = "ASCII_Text "
+                                                                         "=?UTF-8?Q?"
+                                                                         "=E3=81=93=E3=82=8C=E3=81=AF=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E3=83=86?= "
+                                                                         "=?UTF-8?Q?=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89?= "
+                                                                         "=?UTF-8?Q?=E3=81=8C=E5=BF=85=E8=A6=81=E3=81=AB=E3=81=AA=E3=82=8A=E3=81=BE=E3=81=99=E3=80=82?=";
+
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+
+       TEST_CASE("process_string handles very long non-ASCII word requiring multiple splits")
+       {
+               const char *input =
+                       "非常に長い非ASCII文字列を使用してエンコードワードの分割をテストします。"
+                       "データが長すぎる場合、正しく分割されるべきです。";
+               char *output_cstr = rspamd_mime_header_encode(input, strlen(input));
+               std::string output(output_cstr);
+
+               std::string expected_output =
+                       "=?UTF-8?Q?=E9=9D=9E=E5=B8=B6=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E6?="
+                       "=?UTF-8?Q?=96=87=E5=AD=97=E5=88=97=E3=82=92=E4=BD=BF=E7=94=A8=E3=81=97?="
+                       "=?UTF-8?Q?=E3=81=A6=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=83=AF?="
+                       "=?UTF-8?Q?=E3=83=BC=E3=83=89=E3=81=AE=E5=88=86=E5=89=B2=E3=82=92=E3=83=86?="
+                       "=?UTF-8?Q?=E3=82=B9=E3=83=88=E3=81=97=E3=81=BE=E3=81=99=E3=80=82=E3=83=87?="
+                       "=?UTF-8?Q?=E3=83=BC=E3=82=BF=E3=81=8C=E9=95=B7=E3=81=99=E3=81=8E=E3=82=8B?="
+                       "=?UTF-8?Q?=E5=A0=B4=E5=90=88=E3=80=81=E6=AD=A3=E3=81=97=E3=81=8F=E5=88=86?="
+                       "=?UTF-8?Q?=E5=89=B2=E3=81=95=E3=82=8C=E3=82=8B=E3=81=B9=E3=81=8D=E3=81=A7?="
+                       "=?UTF-8?Q?=E3=81=99=E3=80=82?=";// ≤76 chars
+
+               CHECK(output == expected_output);
+               g_free(output_cstr);
+       }
+}
+#endif
author	Vsevolod Stakhov <vsevolod@rspamd.com>
	Sat, 16 Nov 2024 17:46:44 +0000 (17:46 +0000)
committer	Vsevolod Stakhov <vsevolod@rspamd.com>
	Sat, 16 Nov 2024 17:46:44 +0000 (17:46 +0000)
src/libmime/mime_headers.c		patch \| blob \| history
test/rspamd_cxx_unit.cxx		patch \| blob \| history
test/rspamd_cxx_unit_rfc2047.hxx	[new file with mode: 0644]	patch \| blob