diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-07-26 10:49:23 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-07-26 10:49:23 +0100 |
commit | 537a7180a0d5132c11636c4fd8b1450cd99d352c (patch) | |
tree | fb9f8c84955a411bdffbd6371ea32f2716fb3687 /src/libserver/html/html_tests.cxx | |
parent | 5fd7a90fdaa33f52c59bdb0ca84451e5c1e22365 (diff) | |
download | rspamd-537a7180a0d5132c11636c4fd8b1450cd99d352c.tar.gz rspamd-537a7180a0d5132c11636c4fd8b1450cd99d352c.zip |
[Rework] Use clang-format to unify formatting in all sources
No meaningful changes.
Diffstat (limited to 'src/libserver/html/html_tests.cxx')
-rw-r--r-- | src/libserver/html/html_tests.cxx | 250 |
1 files changed, 134 insertions, 116 deletions
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx index 2492337bf..2fe6702df 100644 --- a/src/libserver/html/html_tests.cxx +++ b/src/libserver/html/html_tests.cxx @@ -31,48 +31,50 @@ namespace rspamd::html { * Tests part */ -TEST_SUITE("html") { -TEST_CASE("html parsing") +TEST_SUITE("html") { + TEST_CASE("html parsing") + { - const std::vector<std::pair<std::string, std::string>> cases{ - {"<html><!DOCTYPE html><body>", "+html;++xml;++body;"}, - {"<html><div><div></div></div></html>", "+html;++div;+++div;"}, - {"<html><div><div></div></html>", "+html;++div;+++div;"}, - {"<html><div><div></div></html></div>", "+html;++div;+++div;"}, - {"<p><p><a></p></a></a>", "+p;++p;+++a;"}, + const std::vector<std::pair<std::string, std::string>> cases{ + {"<html><!DOCTYPE html><body>", "+html;++xml;++body;"}, + {"<html><div><div></div></div></html>", "+html;++div;+++div;"}, + {"<html><div><div></div></html>", "+html;++div;+++div;"}, + {"<html><div><div></div></html></div>", "+html;++div;+++div;"}, + {"<p><p><a></p></a></a>", "+p;++p;+++a;"}, {"<div><a href=\"http://example.com\"></div></a>", "+div;++a;"}, /* Broken, as I don't know how the hell this should be really parsed */ //{"<html><!DOCTYPE html><body><head><body></body></html></body></html>", // "+html;++xml;++body;+++head;+++body;"} - }; + }; - rspamd_url_init(NULL); - auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), - "html", 0); - struct rspamd_task fake_task; - memset(&fake_task, 0, sizeof(fake_task)); - fake_task.task_pool = pool; + rspamd_url_init(NULL); + auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), + "html", 0); + struct rspamd_task fake_task; + memset(&fake_task, 0, sizeof(fake_task)); + fake_task.task_pool = pool; - for (const auto &c : cases) { - SUBCASE((std::string("extract tags from: ") + c.first).c_str()) { - GByteArray *tmp = g_byte_array_sized_new(c.first.size()); - g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size()); - auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr); - CHECK(hc != nullptr); - auto dump = html_debug_structure(*hc); - CHECK(c.second == dump); - g_byte_array_free(tmp, TRUE); + for (const auto &c: cases) { + SUBCASE((std::string("extract tags from: ") + c.first).c_str()) + { + GByteArray *tmp = g_byte_array_sized_new(c.first.size()); + g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size()); + auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr); + CHECK(hc != nullptr); + auto dump = html_debug_structure(*hc); + CHECK(c.second == dump); + g_byte_array_free(tmp, TRUE); + } } - } - rspamd_mempool_delete(pool); -} + rspamd_mempool_delete(pool); + } -TEST_CASE("html text extraction") -{ - using namespace std::string_literals; - const std::vector<std::pair<std::string, std::string>> cases{ + TEST_CASE("html text extraction") + { + using namespace std::string_literals; + const std::vector<std::pair<std::string, std::string>> cases{ {"test", "test"}, {"test\0"s, "test\uFFFD"s}, {"test\0test"s, "test\uFFFDtest"s}, @@ -90,13 +92,15 @@ TEST_CASE("html text extraction") {" <body>\n" " <!-- escape content -->\n" " a b a > b a < b a & b 'a "a"\n" - " </body>", R"|(a b a > b a < b a & b 'a "a")|"}, + " </body>", + R"|(a b a > b a < b a & b 'a "a")|"}, /* XML tags */ {"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n" " <!DOCTYPE html\n" " PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n" " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" - "<body>test</body>", "test"}, + "<body>test</body>", + "test"}, {"<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"></head>" " <body>\n" " <p><br>\n" @@ -104,9 +108,11 @@ TEST_CASE("html text extraction") " <div class=\"moz-forward-container\"><br>\n" " <br>\n" " test</div>" - "</body>", "\n\n\ntest\n"}, + "</body>", + "\n\n\ntest\n"}, {"<div>fi<span style=\"FONT-SIZE: 0px\">le </span>" - "sh<span style=\"FONT-SIZE: 0px\">aring </span></div>", "fish\n"}, + "sh<span style=\"FONT-SIZE: 0px\">aring </span></div>", + "fish\n"}, /* FIXME: broken until rework of css parser */ //{"<div>fi<span style=\"FONT-SIZE: 0px\">le </span>" // "sh<span style=\"FONT-SIZE: 0px\">aring </div>foo</span>", "fish\nfoo"}, @@ -126,7 +132,8 @@ TEST_CASE("html text extraction") " </P>\n" " <b>stuff</p>?\n" " </body>\n" - "</html>", "Hello, world! test \ndata<>\nstuff\n?"}, + "</html>", + "Hello, world! test \ndata<>\nstuff\n?"}, {"<p><!--comment-->test</br></hr><br>", "test\n"}, /* Tables */ {"<table>\n" @@ -138,7 +145,8 @@ TEST_CASE("html text extraction") " <td>data1</td>\n" " <td>data2</td>\n" " </tr>\n" - " </table>", "heada headb\ndata1 data2\n"}, + " </table>", + "heada headb\ndata1 data2\n"}, /* Invalid closing br and hr + comment */ {" <body>\n" " <!-- page content -->\n" @@ -146,17 +154,20 @@ TEST_CASE("html text extraction") " <div>\n" " content inside div\n" " </div>\n" - " </body>", "Hello, world!\ntest\ncontentmore content\ncontent inside div\n"}, + " </body>", + "Hello, world!\ntest\ncontentmore content\ncontent inside div\n"}, /* First closing tag */ {"</head>\n" "<body>\n" "<p> Hello. I have some bad news.\n" "<br /> <br /> <br /> <strong> <br /> <br /> <br /> <br /> <br /> <br /> <br /> <br /> </strong><span> <br /> </span>test</p>\n" "</body>\n" - "</html>", "Hello. I have some bad news. \n\n\n\n\n\n\n\n\n\n\n\ntest\n"}, + "</html>", + "Hello. I have some bad news. \n\n\n\n\n\n\n\n\n\n\n\ntest\n"}, /* Invalid tags */ {"lol <sht> omg </sht> oh my!\n" - "<name>words words</name> goodbye","lol omg oh my! words words goodbye"}, + "<name>words words</name> goodbye", + "lol omg oh my! words words goodbye"}, /* Invisible stuff */ {"<div style=\"color:#555555;font-family:Arial, 'Helvetica Neue', Helvetica, sans-serif;line-height:1.2;padding-top:10px;padding-right:10px;padding-bottom:10px;padding-left:10px;font-style: italic;\">\n" "<p style=\"font-size: 11px; line-height: 1.2; color: #555555; font-family: Arial, 'Helvetica Neue', Helvetica, sans-serif; mso-line-height-alt: 14px; margin: 0;\">\n" @@ -166,75 +177,81 @@ TEST_CASE("html text extraction") "<span style=\"color:#FFFFFF; \">kreyes</span>\n" "<p style=\"font-size: 11px; line-height: 1.2; color: #555555; font-family: Arial, 'Helvetica Neue', Helvetica, sans-serif; mso-line-height-alt: 14px; margin: 0;\">\n" " </p>", - " Sincerely,\n Skype Web\n"}, + " Sincerely,\n Skype Web\n"}, {"lala<p hidden>fafa</p>", "lala"}, {"<table style=\"FONT-SIZE: 0px;\"><tbody><tr><td>\n" "DONKEY\n" - "</td></tr></tbody></table>", ""}, + "</td></tr></tbody></table>", + ""}, /* bgcolor propagation */ {"<a style=\"display: inline-block; color: #ffffff; background-color: #00aff0;\">\n" "<span style=\"color: #00aff0;\">F</span>Rev<span style=\"opacity: 1;\"></span></span>ie<span style=\"opacity: 1;\"></span>" "</span>w<span style=\"color: #00aff0;\">F<span style=\"opacity: 1;\">ΜΉ</span></span>", - " Review"}, + " Review"}, {"<td style=\"color:#ffffff\" bgcolor=\"#005595\">\n" "hello world\n" - "</td>", "hello world"}, + "</td>", + "hello world"}, /* Colors */ {"goodbye <span style=\"COLOR: rgb(64,64,64)\">cruel</span>" - "<span>world</span>", "goodbye cruelworld"}, + "<span>world</span>", + "goodbye cruelworld"}, /* Font-size propagation */ {"<p style=\"font-size: 11pt;line-height:22px\">goodbye <span style=\"font-size:0px\">cruel</span>world</p>", - "goodbye world\n"}, + "goodbye world\n"}, /* Newline before tag -> must be space */ {"goodbye <span style=\"COLOR: rgb(64,64,64)\">cruel</span>\n" - "<span>world</span>", "goodbye cruel world"}, + "<span>world</span>", + "goodbye cruel world"}, /* Head tag with some stuff */ {"<html><head><p>oh my god</head><body></body></html>", "oh my god\n"}, {"<html><head><title>oh my god</head><body></body></html>", ""}, {"<html><body><html><head>displayed</body></html></body></html>", "displayed"}, - }; + }; - rspamd_url_init(NULL); - auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), - "html", 0); - struct rspamd_task fake_task; - memset(&fake_task, 0, sizeof(fake_task)); - fake_task.task_pool = pool; + rspamd_url_init(NULL); + auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), + "html", 0); + struct rspamd_task fake_task; + memset(&fake_task, 0, sizeof(fake_task)); + fake_task.task_pool = pool; - auto replace_newlines = [](std::string &str) { - auto start_pos = 0; - while((start_pos = str.find("\n", start_pos, 1)) != std::string::npos) { - str.replace(start_pos, 1, "\\n", 2); - start_pos += 2; - } - }; + auto replace_newlines = [](std::string &str) { + auto start_pos = 0; + while ((start_pos = str.find("\n", start_pos, 1)) != std::string::npos) { + str.replace(start_pos, 1, "\\n", 2); + start_pos += 2; + } + }; - auto i = 1; - for (const auto &c : cases) { - SUBCASE((fmt::format("html extraction case {}", i)).c_str()) { - GByteArray *tmp = g_byte_array_sized_new(c.first.size()); - g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size()); - auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr); - CHECK(hc != nullptr); - replace_newlines(hc->parsed); - auto expected = c.second; - replace_newlines(expected); - CHECK(hc->parsed == expected); - g_byte_array_free(tmp, TRUE); + auto i = 1; + for (const auto &c: cases) { + SUBCASE((fmt::format("html extraction case {}", i)).c_str()) + { + GByteArray *tmp = g_byte_array_sized_new(c.first.size()); + g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size()); + auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr); + CHECK(hc != nullptr); + replace_newlines(hc->parsed); + auto expected = c.second; + replace_newlines(expected); + CHECK(hc->parsed == expected); + g_byte_array_free(tmp, TRUE); + } + i++; } - i ++; - } - rspamd_mempool_delete(pool); -} + rspamd_mempool_delete(pool); + } -TEST_CASE("html urls extraction") -{ - using namespace std::string_literals; - const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{ + TEST_CASE("html urls extraction") + { + using namespace std::string_literals; + const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{ {"<style></style><a href=\"https://www.example.com\">yolo</a>", - {"https://www.example.com"}, "yolo"}, + {"https://www.example.com"}, + "yolo"}, {"<a href=\"https://example.com\">test</a>", {"https://example.com"}, "test"}, {"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}, "hello"}, {"<html>\n" @@ -242,45 +259,46 @@ TEST_CASE("html urls extraction") "<body>\n" "<a href=\"https://www.example.com\">hello</a>\n" "</body>\n" - "</html>", {"https://www.example.com"}, "hello"}, - }; + "</html>", + {"https://www.example.com"}, + "hello"}, + }; - rspamd_url_init(NULL); - auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), - "html", 0); - struct rspamd_task fake_task; - memset(&fake_task, 0, sizeof(fake_task)); - fake_task.task_pool = pool; + rspamd_url_init(NULL); + auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), + "html", 0); + struct rspamd_task fake_task; + memset(&fake_task, 0, sizeof(fake_task)); + fake_task.task_pool = pool; - auto i = 1; - for (const auto &c : cases) { - SUBCASE((fmt::format("html url extraction case {}", i)).c_str()) { - GPtrArray *purls = g_ptr_array_new(); - auto input = std::get<0>(c); - GByteArray *tmp = g_byte_array_sized_new(input.size()); - g_byte_array_append(tmp, (const guint8 *)input.data(), input.size()); - auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, purls, true, nullptr); - CHECK(hc != nullptr); - auto &expected_text = std::get<2>(c); - if (expected_text.has_value()) { - CHECK(hc->parsed == expected_text.value()); + auto i = 1; + for (const auto &c: cases) { + SUBCASE((fmt::format("html url extraction case {}", i)).c_str()) + { + GPtrArray *purls = g_ptr_array_new(); + auto input = std::get<0>(c); + GByteArray *tmp = g_byte_array_sized_new(input.size()); + g_byte_array_append(tmp, (const guint8 *) input.data(), input.size()); + auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, purls, true, nullptr); + CHECK(hc != nullptr); + auto &expected_text = std::get<2>(c); + if (expected_text.has_value()) { + CHECK(hc->parsed == expected_text.value()); + } + const auto &expected_urls = std::get<1>(c); + CHECK(expected_urls.size() == purls->len); + for (auto j = 0; j < expected_urls.size(); ++j) { + auto *url = (rspamd_url *) g_ptr_array_index(purls, j); + CHECK(expected_urls[j] == std::string{url->string, url->urllen}); + } + g_byte_array_free(tmp, TRUE); + g_ptr_array_free(purls, TRUE); } - const auto &expected_urls = std::get<1>(c); - CHECK(expected_urls.size() == purls->len); - for (auto j = 0; j < expected_urls.size(); ++j) { - auto *url = (rspamd_url *)g_ptr_array_index(purls, j); - CHECK(expected_urls[j] == std::string{url->string, url->urllen}); - } - g_byte_array_free(tmp, TRUE); - g_ptr_array_free(purls, TRUE); + ++i; } - ++i; - } - - rspamd_mempool_delete(pool); -} + rspamd_mempool_delete(pool); + } } } /* namespace rspamd::html */ - |