From 0806e4d11bcc08bdc3b8efbf55c372f844b0a722 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 16 Nov 2024 17:46:44 +0000 Subject: [PATCH] [Fix] Some more fixes --- src/libmime/mime_headers.c | 14 +- test/rspamd_cxx_unit.cxx | 7 +- test/rspamd_cxx_unit_rfc2047.hxx | 212 +++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 7 deletions(-) create mode 100644 test/rspamd_cxx_unit_rfc2047.hxx diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index 9d11210f3..3565eefba 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -818,10 +818,9 @@ rspamd_mime_header_decode(rspamd_mempool_t *pool, const char *in, char * rspamd_mime_header_encode(const char *in, gsize len) { - static const size_t max_token_size = 76; + static const size_t max_token_size = 76 - (sizeof("=?UTF-8?Q? ?=") - 3); GString *outbuf = g_string_sized_new(len); - size_t encode_buf_size = max_token_size; - char *encode_buf = g_alloca(encode_buf_size + 3); + char *encode_buf = g_alloca(max_token_size + 3); const char *p = in; const char *end = in + len; @@ -853,13 +852,20 @@ rspamd_mime_header_encode(const char *in, gsize len) } else { encoded_len++; + + if (encoded_len > max_token_size) { + piece_len = i - 1; + q = p + piece_len; + /* No more space */ + break; + } } } if (has_non_ascii) { g_string_append(outbuf, "=?UTF-8?Q?"); /* Do encode */ - gssize encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, encode_buf_size); + encoded_len = rspamd_encode_qp2047_buf(p, piece_len, encode_buf, max_token_size); g_string_append_len(outbuf, encode_buf, encoded_len); g_string_append(outbuf, "?="); } diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx index b7cb0c6bf..ff323fb85 100644 --- a/test/rspamd_cxx_unit.cxx +++ b/test/rspamd_cxx_unit.cxx @@ -1,11 +1,11 @@ -/*- - * Copyright 2021 Vsevolod Stakhov +/* + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -25,6 +25,7 @@ #include "rspamd_cxx_local_ptr.hxx" #include "rspamd_cxx_unit_dkim.hxx" #include "rspamd_cxx_unit_cryptobox.hxx" +#include "rspamd_cxx_unit_rfc2047.hxx" static gboolean verbose = false; static const GOptionEntry entries[] = diff --git a/test/rspamd_cxx_unit_rfc2047.hxx b/test/rspamd_cxx_unit_rfc2047.hxx new file mode 100644 index 000000000..6f2a42414 --- /dev/null +++ b/test/rspamd_cxx_unit_rfc2047.hxx @@ -0,0 +1,212 @@ +/* + * Copyright 2024 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_RSPAMD_CXX_UNIT_RFC2047_HXX +#define RSPAMD_RSPAMD_CXX_UNIT_RFC2047_HXX + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +#include +#include "libmime/mime_headers.h" + +TEST_SUITE("rfc2047 encode") +{ + TEST_CASE("rspamd_mime_header_encode handles ASCII-only input") + { + const char *input = "Hello World"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = "Hello World"; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles input with non-ASCII characters") + { + const char *input = "Hello Мир"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?="; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles mixed input with separators") + { + const char *input = "ололо (ололо test) test"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= " + "(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test) test"; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles multiple spaces and separators") + { + const char *input = "Привет мир\nКак дела?"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82?= " + "=?UTF-8?Q?=D0=BC=D0=B8=D1=80?=\n" + "=?UTF-8?Q?=D0=9A=D0=B0=D0=BA?= " + "=?UTF-8?Q?=D0=B4=D0=B5=D0=BB=D0=B0=3F?="; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles empty input") + { + const char *input = ""; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr ? output_cstr : ""); + std::string expected_output = ""; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles input with only separators") + { + const char *input = " \r\n()"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = " \r\n()"; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles non-ASCII separators") + { + const char *input = "こんにちは(世界)"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?=" + "(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)"; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles input starting with separator") + { + const char *input = " (Hello)"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = " (Hello)"; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles input ending with separator") + { + const char *input = "Hello) "; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = "Hello) "; + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles consecutive non-ASCII pieces") + { + const char *input = "你好世界"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?="; + CHECK(output == expected_output); + g_free(output_cstr); + } + TEST_CASE("rspamd_mime_header_encode handles long non-ASCII input requiring encoded-word splitting") + { + // Input string consisting of repeated non-ASCII characters + const char *input = "これはとても長いテキストで、エンコードされたワードが76文字を超える必要があります。"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + + // Expected output with proper splitting into multiple encoded-words + // The actual encoding would produce a long string; we need to split it into parts + // Each encoded-word should be less than or equal to 76 characters (including the '=?UTF-8?Q?' prefix and '?=' suffix) + // For our mock, we'll simulate the splitting + + // For simplicity in this test, we assume that the encoded output, after encoding and wrapping with '=?UTF-8?Q?' and '?=', is split correctly. + + // Construct the expected output manually (in practice, you may want to write a helper to split it) + std::string expected_output = "=?UTF-8?Q?" + "=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7=E3=81=84=E3=83=86=E3=82=AD?= " + "=?UTF-8?Q?=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=81=95?= " + "=?UTF-8?Q?=E3=82=8C=E3=81=9F=E3=83=AF=E3=83=BC=E3=83=89=E3=81=8C76=E6=96=87=E5=AD=97=E3=82=92=E8?= " + "=?UTF-8?Q?=B6=85=E3=81=88=E3=82=8B=E5=BF=85=E8=A6=81=E3=81=8C=E3=81=82=E3=82=8A=E3=81=BE=E3=81=99?=."; + + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles long ASCII input without encoding") + { + // Input string consisting of repeated ASCII characters + std::string input_str(100, 'A');// 100 'A's + const char *input = input_str.c_str(); + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + std::string expected_output = input_str; + + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("rspamd_mime_header_encode handles long mixed input requiring encoded-word splitting") + { + // Input string with mix of ASCII and non-ASCII characters forming long pieces + const char *input = "ASCII_Text " + "これは非常に長い非ASCIIテキストで、エンコードが必要になります。"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + + // Expected output: ASCII text as-is, non-ASCII text encoded and split accordingly + std::string expected_output = "ASCII_Text " + "=?UTF-8?Q?" + "=E3=81=93=E3=82=8C=E3=81=AF=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E3=83=86?= " + "=?UTF-8?Q?=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89?= " + "=?UTF-8?Q?=E3=81=8C=E5=BF=85=E8=A6=81=E3=81=AB=E3=81=AA=E3=82=8A=E3=81=BE=E3=81=99=E3=80=82?="; + + CHECK(output == expected_output); + g_free(output_cstr); + } + + TEST_CASE("process_string handles very long non-ASCII word requiring multiple splits") + { + const char *input = + "非常に長い非ASCII文字列を使用してエンコードワードの分割をテストします。" + "データが長すぎる場合、正しく分割されるべきです。"; + char *output_cstr = rspamd_mime_header_encode(input, strlen(input)); + std::string output(output_cstr); + + std::string expected_output = + "=?UTF-8?Q?=E9=9D=9E=E5=B8=B6=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E6?=" + "=?UTF-8?Q?=96=87=E5=AD=97=E5=88=97=E3=82=92=E4=BD=BF=E7=94=A8=E3=81=97?=" + "=?UTF-8?Q?=E3=81=A6=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=83=AF?=" + "=?UTF-8?Q?=E3=83=BC=E3=83=89=E3=81=AE=E5=88=86=E5=89=B2=E3=82=92=E3=83=86?=" + "=?UTF-8?Q?=E3=82=B9=E3=83=88=E3=81=97=E3=81=BE=E3=81=99=E3=80=82=E3=83=87?=" + "=?UTF-8?Q?=E3=83=BC=E3=82=BF=E3=81=8C=E9=95=B7=E3=81=99=E3=81=8E=E3=82=8B?=" + "=?UTF-8?Q?=E5=A0=B4=E5=90=88=E3=80=81=E6=AD=A3=E3=81=97=E3=81=8F=E5=88=86?=" + "=?UTF-8?Q?=E5=89=B2=E3=81=95=E3=82=8C=E3=82=8B=E3=81=B9=E3=81=8D=E3=81=A7?=" + "=?UTF-8?Q?=E3=81=99=E3=80=82?=";// ≤76 chars + + CHECK(output == expected_output); + g_free(output_cstr); + } +} +#endif -- 2.39.5