aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver/html/html_tests.cxx
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2023-07-26 10:49:23 +0100
committerVsevolod Stakhov <vsevolod@rspamd.com>2023-07-26 10:49:23 +0100
commit537a7180a0d5132c11636c4fd8b1450cd99d352c (patch)
treefb9f8c84955a411bdffbd6371ea32f2716fb3687 /src/libserver/html/html_tests.cxx
parent5fd7a90fdaa33f52c59bdb0ca84451e5c1e22365 (diff)
downloadrspamd-537a7180a0d5132c11636c4fd8b1450cd99d352c.tar.gz
rspamd-537a7180a0d5132c11636c4fd8b1450cd99d352c.zip
[Rework] Use clang-format to unify formatting in all sources
No meaningful changes.
Diffstat (limited to 'src/libserver/html/html_tests.cxx')
-rw-r--r--src/libserver/html/html_tests.cxx250
1 files changed, 134 insertions, 116 deletions
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index 2492337bf..2fe6702df 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -31,48 +31,50 @@ namespace rspamd::html {
* Tests part
*/
-TEST_SUITE("html") {
-TEST_CASE("html parsing")
+TEST_SUITE("html")
{
+ TEST_CASE("html parsing")
+ {
- const std::vector<std::pair<std::string, std::string>> cases{
- {"<html><!DOCTYPE html><body>", "+html;++xml;++body;"},
- {"<html><div><div></div></div></html>", "+html;++div;+++div;"},
- {"<html><div><div></div></html>", "+html;++div;+++div;"},
- {"<html><div><div></div></html></div>", "+html;++div;+++div;"},
- {"<p><p><a></p></a></a>", "+p;++p;+++a;"},
+ const std::vector<std::pair<std::string, std::string>> cases{
+ {"<html><!DOCTYPE html><body>", "+html;++xml;++body;"},
+ {"<html><div><div></div></div></html>", "+html;++div;+++div;"},
+ {"<html><div><div></div></html>", "+html;++div;+++div;"},
+ {"<html><div><div></div></html></div>", "+html;++div;+++div;"},
+ {"<p><p><a></p></a></a>", "+p;++p;+++a;"},
{"<div><a href=\"http://example.com\"></div></a>", "+div;++a;"},
/* Broken, as I don't know how the hell this should be really parsed */
//{"<html><!DOCTYPE html><body><head><body></body></html></body></html>",
// "+html;++xml;++body;+++head;+++body;"}
- };
+ };
- rspamd_url_init(NULL);
- auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
- "html", 0);
- struct rspamd_task fake_task;
- memset(&fake_task, 0, sizeof(fake_task));
- fake_task.task_pool = pool;
+ rspamd_url_init(NULL);
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "html", 0);
+ struct rspamd_task fake_task;
+ memset(&fake_task, 0, sizeof(fake_task));
+ fake_task.task_pool = pool;
- for (const auto &c : cases) {
- SUBCASE((std::string("extract tags from: ") + c.first).c_str()) {
- GByteArray *tmp = g_byte_array_sized_new(c.first.size());
- g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size());
- auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr);
- CHECK(hc != nullptr);
- auto dump = html_debug_structure(*hc);
- CHECK(c.second == dump);
- g_byte_array_free(tmp, TRUE);
+ for (const auto &c: cases) {
+ SUBCASE((std::string("extract tags from: ") + c.first).c_str())
+ {
+ GByteArray *tmp = g_byte_array_sized_new(c.first.size());
+ g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size());
+ auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr);
+ CHECK(hc != nullptr);
+ auto dump = html_debug_structure(*hc);
+ CHECK(c.second == dump);
+ g_byte_array_free(tmp, TRUE);
+ }
}
- }
- rspamd_mempool_delete(pool);
-}
+ rspamd_mempool_delete(pool);
+ }
-TEST_CASE("html text extraction")
-{
- using namespace std::string_literals;
- const std::vector<std::pair<std::string, std::string>> cases{
+ TEST_CASE("html text extraction")
+ {
+ using namespace std::string_literals;
+ const std::vector<std::pair<std::string, std::string>> cases{
{"test", "test"},
{"test\0"s, "test\uFFFD"s},
{"test\0test"s, "test\uFFFDtest"s},
@@ -90,13 +92,15 @@ TEST_CASE("html text extraction")
{" <body>\n"
" <!-- escape content -->\n"
" a&nbsp;b a &gt; b a &lt; b a &amp; b &apos;a &quot;a&quot;\n"
- " </body>", R"|(a b a > b a < b a & b 'a "a")|"},
+ " </body>",
+ R"|(a b a > b a < b a & b 'a "a")|"},
/* XML tags */
{"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n"
" <!DOCTYPE html\n"
" PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n"
" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n"
- "<body>test</body>", "test"},
+ "<body>test</body>",
+ "test"},
{"<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"></head>"
" <body>\n"
" <p><br>\n"
@@ -104,9 +108,11 @@ TEST_CASE("html text extraction")
" <div class=\"moz-forward-container\"><br>\n"
" <br>\n"
" test</div>"
- "</body>", "\n\n\ntest\n"},
+ "</body>",
+ "\n\n\ntest\n"},
{"<div>fi<span style=\"FONT-SIZE: 0px\">le </span>"
- "sh<span style=\"FONT-SIZE: 0px\">aring </span></div>", "fish\n"},
+ "sh<span style=\"FONT-SIZE: 0px\">aring </span></div>",
+ "fish\n"},
/* FIXME: broken until rework of css parser */
//{"<div>fi<span style=\"FONT-SIZE: 0px\">le </span>"
// "sh<span style=\"FONT-SIZE: 0px\">aring </div>foo</span>", "fish\nfoo"},
@@ -126,7 +132,8 @@ TEST_CASE("html text extraction")
" </P>\n"
" <b>stuff</p>?\n"
" </body>\n"
- "</html>", "Hello, world! test \ndata<>\nstuff\n?"},
+ "</html>",
+ "Hello, world! test \ndata<>\nstuff\n?"},
{"<p><!--comment-->test</br></hr><br>", "test\n"},
/* Tables */
{"<table>\n"
@@ -138,7 +145,8 @@ TEST_CASE("html text extraction")
" <td>data1</td>\n"
" <td>data2</td>\n"
" </tr>\n"
- " </table>", "heada headb\ndata1 data2\n"},
+ " </table>",
+ "heada headb\ndata1 data2\n"},
/* Invalid closing br and hr + comment */
{" <body>\n"
" <!-- page content -->\n"
@@ -146,17 +154,20 @@ TEST_CASE("html text extraction")
" <div>\n"
" content inside div\n"
" </div>\n"
- " </body>", "Hello, world!\ntest\ncontentmore content\ncontent inside div\n"},
+ " </body>",
+ "Hello, world!\ntest\ncontentmore content\ncontent inside div\n"},
/* First closing tag */
{"</head>\n"
"<body>\n"
"<p> Hello. I have some bad news.\n"
"<br /> <br /> <br /> <strong> <br /> <br /> <br /> <br /> <br /> <br /> <br /> <br /> </strong><span> <br /> </span>test</p>\n"
"</body>\n"
- "</html>", "Hello. I have some bad news. \n\n\n\n\n\n\n\n\n\n\n\ntest\n"},
+ "</html>",
+ "Hello. I have some bad news. \n\n\n\n\n\n\n\n\n\n\n\ntest\n"},
/* Invalid tags */
{"lol <sht> omg </sht> oh my!\n"
- "<name>words words</name> goodbye","lol omg oh my! words words goodbye"},
+ "<name>words words</name> goodbye",
+ "lol omg oh my! words words goodbye"},
/* Invisible stuff */
{"<div style=\"color:#555555;font-family:Arial, 'Helvetica Neue', Helvetica, sans-serif;line-height:1.2;padding-top:10px;padding-right:10px;padding-bottom:10px;padding-left:10px;font-style: italic;\">\n"
"<p style=\"font-size: 11px; line-height: 1.2; color: #555555; font-family: Arial, 'Helvetica Neue', Helvetica, sans-serif; mso-line-height-alt: 14px; margin: 0;\">\n"
@@ -166,75 +177,81 @@ TEST_CASE("html text extraction")
"<span style=\"color:#FFFFFF; \">kreyes</span>\n"
"<p style=\"font-size: 11px; line-height: 1.2; color: #555555; font-family: Arial, 'Helvetica Neue', Helvetica, sans-serif; mso-line-height-alt: 14px; margin: 0;\">\n"
"&nbsp;</p>",
- " Sincerely,\n Skype Web\n"},
+ " Sincerely,\n Skype Web\n"},
{"lala<p hidden>fafa</p>", "lala"},
{"<table style=\"FONT-SIZE: 0px;\"><tbody><tr><td>\n"
"DONKEY\n"
- "</td></tr></tbody></table>", ""},
+ "</td></tr></tbody></table>",
+ ""},
/* bgcolor propagation */
{"<a style=\"display: inline-block; color: #ffffff; background-color: #00aff0;\">\n"
"<span style=\"color: #00aff0;\">F</span>Rev<span style=\"opacity: 1;\"></span></span>ie<span style=\"opacity: 1;\"></span>"
"</span>w<span style=\"color: #00aff0;\">F<span style=\"opacity: 1;\">ΜΉ</span></span>",
- " Review"},
+ " Review"},
{"<td style=\"color:#ffffff\" bgcolor=\"#005595\">\n"
"hello world\n"
- "</td>", "hello world"},
+ "</td>",
+ "hello world"},
/* Colors */
{"goodbye <span style=\"COLOR: rgb(64,64,64)\">cruel</span>"
- "<span>world</span>", "goodbye cruelworld"},
+ "<span>world</span>",
+ "goodbye cruelworld"},
/* Font-size propagation */
{"<p style=\"font-size: 11pt;line-height:22px\">goodbye <span style=\"font-size:0px\">cruel</span>world</p>",
- "goodbye world\n"},
+ "goodbye world\n"},
/* Newline before tag -> must be space */
{"goodbye <span style=\"COLOR: rgb(64,64,64)\">cruel</span>\n"
- "<span>world</span>", "goodbye cruel world"},
+ "<span>world</span>",
+ "goodbye cruel world"},
/* Head tag with some stuff */
{"<html><head><p>oh my god</head><body></body></html>", "oh my god\n"},
{"<html><head><title>oh my god</head><body></body></html>", ""},
{"<html><body><html><head>displayed</body></html></body></html>", "displayed"},
- };
+ };
- rspamd_url_init(NULL);
- auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
- "html", 0);
- struct rspamd_task fake_task;
- memset(&fake_task, 0, sizeof(fake_task));
- fake_task.task_pool = pool;
+ rspamd_url_init(NULL);
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "html", 0);
+ struct rspamd_task fake_task;
+ memset(&fake_task, 0, sizeof(fake_task));
+ fake_task.task_pool = pool;
- auto replace_newlines = [](std::string &str) {
- auto start_pos = 0;
- while((start_pos = str.find("\n", start_pos, 1)) != std::string::npos) {
- str.replace(start_pos, 1, "\\n", 2);
- start_pos += 2;
- }
- };
+ auto replace_newlines = [](std::string &str) {
+ auto start_pos = 0;
+ while ((start_pos = str.find("\n", start_pos, 1)) != std::string::npos) {
+ str.replace(start_pos, 1, "\\n", 2);
+ start_pos += 2;
+ }
+ };
- auto i = 1;
- for (const auto &c : cases) {
- SUBCASE((fmt::format("html extraction case {}", i)).c_str()) {
- GByteArray *tmp = g_byte_array_sized_new(c.first.size());
- g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size());
- auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr);
- CHECK(hc != nullptr);
- replace_newlines(hc->parsed);
- auto expected = c.second;
- replace_newlines(expected);
- CHECK(hc->parsed == expected);
- g_byte_array_free(tmp, TRUE);
+ auto i = 1;
+ for (const auto &c: cases) {
+ SUBCASE((fmt::format("html extraction case {}", i)).c_str())
+ {
+ GByteArray *tmp = g_byte_array_sized_new(c.first.size());
+ g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size());
+ auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, nullptr, true, nullptr);
+ CHECK(hc != nullptr);
+ replace_newlines(hc->parsed);
+ auto expected = c.second;
+ replace_newlines(expected);
+ CHECK(hc->parsed == expected);
+ g_byte_array_free(tmp, TRUE);
+ }
+ i++;
}
- i ++;
- }
- rspamd_mempool_delete(pool);
-}
+ rspamd_mempool_delete(pool);
+ }
-TEST_CASE("html urls extraction")
-{
- using namespace std::string_literals;
- const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{
+ TEST_CASE("html urls extraction")
+ {
+ using namespace std::string_literals;
+ const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{
{"<style></style><a href=\"https://www.example.com\">yolo</a>",
- {"https://www.example.com"}, "yolo"},
+ {"https://www.example.com"},
+ "yolo"},
{"<a href=\"https://example.com\">test</a>", {"https://example.com"}, "test"},
{"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}, "hello"},
{"<html>\n"
@@ -242,45 +259,46 @@ TEST_CASE("html urls extraction")
"<body>\n"
"<a href=\"https://www.example.com\">hello</a>\n"
"</body>\n"
- "</html>", {"https://www.example.com"}, "hello"},
- };
+ "</html>",
+ {"https://www.example.com"},
+ "hello"},
+ };
- rspamd_url_init(NULL);
- auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
- "html", 0);
- struct rspamd_task fake_task;
- memset(&fake_task, 0, sizeof(fake_task));
- fake_task.task_pool = pool;
+ rspamd_url_init(NULL);
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "html", 0);
+ struct rspamd_task fake_task;
+ memset(&fake_task, 0, sizeof(fake_task));
+ fake_task.task_pool = pool;
- auto i = 1;
- for (const auto &c : cases) {
- SUBCASE((fmt::format("html url extraction case {}", i)).c_str()) {
- GPtrArray *purls = g_ptr_array_new();
- auto input = std::get<0>(c);
- GByteArray *tmp = g_byte_array_sized_new(input.size());
- g_byte_array_append(tmp, (const guint8 *)input.data(), input.size());
- auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, purls, true, nullptr);
- CHECK(hc != nullptr);
- auto &expected_text = std::get<2>(c);
- if (expected_text.has_value()) {
- CHECK(hc->parsed == expected_text.value());
+ auto i = 1;
+ for (const auto &c: cases) {
+ SUBCASE((fmt::format("html url extraction case {}", i)).c_str())
+ {
+ GPtrArray *purls = g_ptr_array_new();
+ auto input = std::get<0>(c);
+ GByteArray *tmp = g_byte_array_sized_new(input.size());
+ g_byte_array_append(tmp, (const guint8 *) input.data(), input.size());
+ auto *hc = html_process_input(&fake_task, tmp, nullptr, nullptr, purls, true, nullptr);
+ CHECK(hc != nullptr);
+ auto &expected_text = std::get<2>(c);
+ if (expected_text.has_value()) {
+ CHECK(hc->parsed == expected_text.value());
+ }
+ const auto &expected_urls = std::get<1>(c);
+ CHECK(expected_urls.size() == purls->len);
+ for (auto j = 0; j < expected_urls.size(); ++j) {
+ auto *url = (rspamd_url *) g_ptr_array_index(purls, j);
+ CHECK(expected_urls[j] == std::string{url->string, url->urllen});
+ }
+ g_byte_array_free(tmp, TRUE);
+ g_ptr_array_free(purls, TRUE);
}
- const auto &expected_urls = std::get<1>(c);
- CHECK(expected_urls.size() == purls->len);
- for (auto j = 0; j < expected_urls.size(); ++j) {
- auto *url = (rspamd_url *)g_ptr_array_index(purls, j);
- CHECK(expected_urls[j] == std::string{url->string, url->urllen});
- }
- g_byte_array_free(tmp, TRUE);
- g_ptr_array_free(purls, TRUE);
+ ++i;
}
- ++i;
- }
-
- rspamd_mempool_delete(pool);
-}
+ rspamd_mempool_delete(pool);
+ }
}
} /* namespace rspamd::html */
-