aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver/html
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-07-13 16:52:09 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-07-13 16:52:09 +0100
commit87ef0c44cef19ce6498fe5e595097fd09aeaf396 (patch)
tree6b1f366b01cc713e4a714b4cff45195cfeb02ba2 /src/libserver/html
parentedf974f4d07d08009fe51409d834cf4a0352e792 (diff)
downloadrspamd-87ef0c44cef19ce6498fe5e595097fd09aeaf396.tar.gz
rspamd-87ef0c44cef19ce6498fe5e595097fd09aeaf396.zip
[Minor] Ignore bogus head tags inside body
Diffstat (limited to 'src/libserver/html')
-rw-r--r--src/libserver/html/html.cxx3
-rw-r--r--src/libserver/html/html_tests.cxx2
2 files changed, 4 insertions, 1 deletions
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index cf12b0a01..51f8589e2 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1085,7 +1085,7 @@ html_append_tag_content(rspamd_mempool_t *pool,
return ret;
}
- else if (tag->id == Tag_HEAD) {
+ else if (tag->id == Tag_HEAD && (tag->flags & FL_IGNORE)) {
auto ret = tag->closing.end;
calculate_final_tag_offsets();
@@ -1706,6 +1706,7 @@ html_process_input(rspamd_mempool_t *pool,
if (html_document_state == html_document_state::doctype) {
if (cur_tag->id == Tag_HEAD || (cur_tag->flags & CM_HEAD)) {
html_document_state = html_document_state::head;
+ cur_tag->flags |= FL_IGNORE;
}
else if (cur_tag->id != Tag_HTML) {
html_document_state = html_document_state::body;
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index ac06a353b..1181e79ac 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -69,6 +69,7 @@ TEST_CASE("html text extraction")
{
using namespace std::string_literals;
const std::vector<std::pair<std::string, std::string>> cases{
+ {"<html><body><html><head>displayed</body></html></body></html>", "displayed"},
{"test", "test"},
{"test\0"s, "test\uFFFD"s},
{"test\0test"s, "test\uFFFDtest"s},
@@ -184,6 +185,7 @@ TEST_CASE("html text extraction")
/* Head tag with some stuff */
{"<html><head><p>oh my god</head><body></body></html>", "oh my god\n"},
{"<html><head><title>oh my god</head><body></body></html>", ""},
+
};
rspamd_url_init(NULL);