From: Vsevolod Stakhov Date: Mon, 18 Nov 2024 16:51:44 +0000 (+0000) Subject: [Test] Test encode with decode X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=4c1bfe96ff449fb68b8b363f8679ada84a2f82de;p=rspamd.git [Test] Test encode with decode --- diff --git a/test/rspamd_cxx_unit_rfc2047.hxx b/test/rspamd_cxx_unit_rfc2047.hxx index cdd37d882..e403f99ed 100644 --- a/test/rspamd_cxx_unit_rfc2047.hxx +++ b/test/rspamd_cxx_unit_rfc2047.hxx @@ -32,7 +32,44 @@ TEST_SUITE("rfc2047 encode") rspamd_mempool_t *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), "rfc2047", 0); std::vector> cases = { {"Hello World", "Hello World"}, - {"Hello Мир", "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?="}}; + {"Hello Мир", "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?="}, + {"ололо (ололо test) test", "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= " + "(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test) test"}, + {"Привет мир Как дела?", "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82____=D0=BC=D0=B8=D1=80_=D0?=" + "=?UTF-8?Q?=9A=D0=B0=D0=BA_=D0=B4=D0=B5=D0=BB=D0=B0?=?"}, + {"", ""}, + {"こんにちは(世界)", "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?=" + "(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)"}, + {"(Hello)", "(Hello)"}, + {"Hello)", "Hello)"}, + {"你好世界", "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?="}, + {"これはとても長いテキストで、エンコードされたワードが76文字を超える必要があります。", + "=?UTF-8?Q?=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7?=" + "=?UTF-8?Q?=E3=81=84=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81?=" + "=?UTF-8?Q?=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=81=95=E3=82=8C?=" + "=?UTF-8?Q?=E3=81=9F=E3=83=AF=E3=83=BC=E3=83=89=E3=81=8C76=E6=96=87=E5=AD?=" + "=?UTF-8?Q?=97=E3=82=92=E8=B6=85=E3=81=88=E3=82=8B=E5=BF=85=E8=A6=81=E3=81?=" + "=?UTF-8?Q?=8C=E3=81=82=E3=82=8A=E3=81=BE=E3=81=99=E3=80=82?="}, + {"ASCII_Text " + "これは非常に長い非ASCIIテキストで、エンコードが必要になります。", + "ASCII_Text " + "=?UTF-8?Q?=E3=81=93=E3=82=8C=E3=81=AF=E9=9D=9E=E5=B8=B8=E3=81?=" + "=?UTF-8?Q?=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E3=83=86=E3=82=AD=E3=82=B9?=" + "=?UTF-8?Q?=E3=83=88=E3=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC?=" + "=?UTF-8?Q?=E3=83=89=E3=81=8C=E5=BF=85=E8=A6=81=E3=81=AB=E3=81=AA=E3=82=8A?=" + "=?UTF-8?Q?=E3=81=BE=E3=81=99=E3=80=82?="}, + {"非常に長い非ASCII文字列を使用してエンコードワードの分割をテストします。" + "データが長すぎる場合、正しく分割されるべきです。", + "=?UTF-8?Q?=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E6?=" + "=?UTF-8?Q?=96=87=E5=AD=97=E5=88=97=E3=82=92=E4=BD=BF=E7=94=A8=E3=81=97=E3?=" + "=?UTF-8?Q?=81=A6=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=83=AF=E3?=" + "=?UTF-8?Q?=83=BC=E3=83=89=E3=81=AE=E5=88=86=E5=89=B2=E3=82=92=E3=83=86=E3?=" + "=?UTF-8?Q?=82=B9=E3=83=88=E3=81=97=E3=81=BE=E3=81=99=E3=80=82=E3=83=87=E3?=" + "=?UTF-8?Q?=83=BC=E3=82=BF=E3=81=8C=E9=95=B7=E3=81=99=E3=81=8E=E3=82=8B=E5?=" + "=?UTF-8?Q?=A0=B4=E5=90=88=E3=80=81=E6=AD=A3=E3=81=97=E3=81=8F=E5=88=86=E5?=" + "=?UTF-8?Q?=89=B2=E3=81=95=E3=82=8C=E3=82=8B=E3=81=B9=E3=81=8D=E3=81=A7=E3?=" + "=?UTF-8?Q?=81=99=E3=80=82?="}, + }; for (const auto &c: cases) { SUBCASE(c.first.c_str()) @@ -46,7 +83,7 @@ TEST_SUITE("rfc2047 encode") char *decoded_cstr = rspamd_mime_header_decode(pool, output_cstr, strlen(output_cstr), &invalid_utf); std::string decoded(decoded_cstr); CHECK(invalid_utf == FALSE); - CHECK(decoded == input); + CHECK(decoded == c.first); g_free(output_cstr); } } @@ -54,117 +91,6 @@ TEST_SUITE("rfc2047 encode") rspamd_mempool_delete(pool); } - TEST_CASE("rspamd_mime_header_encode handles input with non-ASCII characters") - { - const char *input = "Hello Мир"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = "Hello =?UTF-8?Q?=D0=9C=D0=B8=D1=80?="; - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("rspamd_mime_header_encode handles mixed input with separators") - { - const char *input = "ололо (ололо test) test"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = "=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= " - "(=?UTF-8?Q?=D0=BE=D0=BB=D0=BE=D0=BB=D0=BE?= test) test"; - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("rspamd_mime_header_encode handles multiple spaces and separators") - { - const char *input = "Привет мир\nКак дела?"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = "=?UTF-8?Q?=D0=9F=D1=80=D0=B8=D0=B2=D0=B5=D1=82?= " - "=?UTF-8?Q?=D0=BC=D0=B8=D1=80?=\n" - "=?UTF-8?Q?=D0=9A=D0=B0=D0=BA?= " - "=?UTF-8?Q?=D0=B4=D0=B5=D0=BB=D0=B0=3F?="; - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("rspamd_mime_header_encode handles empty input") - { - const char *input = ""; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr ? output_cstr : ""); - std::string expected_output = ""; - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("rspamd_mime_header_encode handles input with only separators") - { - const char *input = " \r\n()"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = " \r\n()"; - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("rspamd_mime_header_encode handles non-ASCII separators") - { - const char *input = "こんにちは(世界)"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?=" - "(=?UTF-8?Q?=E4=B8=96=E7=95=8C?=)"; - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("rspamd_mime_header_encode handles input starting with separator") - { - const char *input = " (Hello)"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = " (Hello)"; - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("rspamd_mime_header_encode handles input ending with separator") - { - const char *input = "Hello) "; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = "Hello) "; - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("rspamd_mime_header_encode handles consecutive non-ASCII pieces") - { - const char *input = "你好世界"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = "=?UTF-8?Q?=E4=BD=A0=E5=A5=BD=E4=B8=96=E7=95=8C?="; - CHECK(output == expected_output); - g_free(output_cstr); - } - TEST_CASE("rspamd_mime_header_encode handles long non-ASCII input requiring encoded-word splitting") - { - // Input string consisting of repeated non-ASCII characters - const char *input = "これはとても長いテキストで、エンコードされたワードが76文字を超える必要があります。"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - std::string expected_output = "=?UTF-8?Q?=E3=81=93=E3=82=8C=E3=81=AF=E3=81=A8=E3=81=A6=E3=82=82=E9=95=B7?=" - "=?UTF-8?Q?=E3=81=84=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88=E3=81=A7=E3=80=81?=" - "=?UTF-8?Q?=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=81=95=E3=82=8C?=" - "=?UTF-8?Q?=E3=81=9F=E3=83=AF=E3=83=BC=E3=83=89=E3=81=8C76=E6=96=87=E5=AD?=" - "=?UTF-8?Q?=97=E3=82=92=E8=B6=85=E3=81=88=E3=82=8B=E5=BF=85=E8=A6=81=E3=81?=" - "=?UTF-8?Q?=8C=E3=81=82=E3=82=8A=E3=81=BE=E3=81=99=E3=80=82?="; - - CHECK(output == expected_output); - g_free(output_cstr); - } - TEST_CASE("rspamd_mime_header_encode handles long ASCII input without encoding") { // Input string consisting of repeated ASCII characters @@ -177,48 +103,5 @@ TEST_SUITE("rfc2047 encode") CHECK(output == expected_output); g_free(output_cstr); } - - TEST_CASE("rspamd_mime_header_encode handles long mixed input requiring encoded-word splitting") - { - // Input string with mix of ASCII and non-ASCII characters forming long pieces - const char *input = "ASCII_Text " - "これは非常に長い非ASCIIテキストで、エンコードが必要になります。"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - - // Expected output: ASCII text as-is, non-ASCII text encoded and split accordingly - std::string expected_output = "ASCII_Text " - "=?UTF-8?Q?=E3=81=93=E3=82=8C=E3=81=AF=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7?=" - "=?UTF-8?Q?=E3=81=84=E9=9D=9EASCII=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88=E3?=" - "=?UTF-8?Q?=81=A7=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3?=" - "=?UTF-8?Q?=81=8C=E5=BF=85=E8=A6=81=E3=81=AB=E3=81=AA=E3=82=8A=E3=81=BE=E3?=" - "=?UTF-8?Q?=81=99=E3=80=82?="; - - CHECK(output == expected_output); - g_free(output_cstr); - } - - TEST_CASE("process_string handles very long non-ASCII word requiring multiple splits") - { - const char *input = - "非常に長い非ASCII文字列を使用してエンコードワードの分割をテストします。" - "データが長すぎる場合、正しく分割されるべきです。"; - char *output_cstr = rspamd_mime_header_encode(input, strlen(input), false); - std::string output(output_cstr); - - std::string expected_output = - "=?UTF-8?Q?=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84=E9=9D=9EASCII=E6?=" - "=?UTF-8?Q?=96=87=E5=AD=97=E5=88=97=E3=82=92=E4=BD=BF=E7=94=A8=E3=81=97=E3?=" - "=?UTF-8?Q?=81=A6=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89=E3=83=AF=E3?=" - "=?UTF-8?Q?=83=BC=E3=83=89=E3=81=AE=E5=88=86=E5=89=B2=E3=82=92=E3=83=86=E3?=" - "=?UTF-8?Q?=82=B9=E3=83=88=E3=81=97=E3=81=BE=E3=81=99=E3=80=82=E3=83=87=E3?=" - "=?UTF-8?Q?=83=BC=E3=82=BF=E3=81=8C=E9=95=B7=E3=81=99=E3=81=8E=E3=82=8B=E5?=" - "=?UTF-8?Q?=A0=B4=E5=90=88=E3=80=81=E6=AD=A3=E3=81=97=E3=81=8F=E5=88=86=E5?=" - "=?UTF-8?Q?=89=B2=E3=81=95=E3=82=8C=E3=82=8B=E3=81=B9=E3=81=8D=E3=81=A7=E3?=" - "=?UTF-8?Q?=81=99=E3=80=82?=";// ≤76 chars - - CHECK(output == expected_output); - g_free(output_cstr); - } } #endif