diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-13 16:52:09 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-13 16:52:09 +0100 |
commit | 87ef0c44cef19ce6498fe5e595097fd09aeaf396 (patch) | |
tree | 6b1f366b01cc713e4a714b4cff45195cfeb02ba2 /src/libserver/html | |
parent | edf974f4d07d08009fe51409d834cf4a0352e792 (diff) | |
download | rspamd-87ef0c44cef19ce6498fe5e595097fd09aeaf396.tar.gz rspamd-87ef0c44cef19ce6498fe5e595097fd09aeaf396.zip |
[Minor] Ignore bogus head tags inside body
Diffstat (limited to 'src/libserver/html')
-rw-r--r-- | src/libserver/html/html.cxx | 3 | ||||
-rw-r--r-- | src/libserver/html/html_tests.cxx | 2 |
2 files changed, 4 insertions, 1 deletions
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index cf12b0a01..51f8589e2 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1085,7 +1085,7 @@ html_append_tag_content(rspamd_mempool_t *pool, return ret; } - else if (tag->id == Tag_HEAD) { + else if (tag->id == Tag_HEAD && (tag->flags & FL_IGNORE)) { auto ret = tag->closing.end; calculate_final_tag_offsets(); @@ -1706,6 +1706,7 @@ html_process_input(rspamd_mempool_t *pool, if (html_document_state == html_document_state::doctype) { if (cur_tag->id == Tag_HEAD || (cur_tag->flags & CM_HEAD)) { html_document_state = html_document_state::head; + cur_tag->flags |= FL_IGNORE; } else if (cur_tag->id != Tag_HTML) { html_document_state = html_document_state::body; diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx index ac06a353b..1181e79ac 100644 --- a/src/libserver/html/html_tests.cxx +++ b/src/libserver/html/html_tests.cxx @@ -69,6 +69,7 @@ TEST_CASE("html text extraction") { using namespace std::string_literals; const std::vector<std::pair<std::string, std::string>> cases{ + {"<html><body><html><head>displayed</body></html></body></html>", "displayed"}, {"test", "test"}, {"test\0"s, "test\uFFFD"s}, {"test\0test"s, "test\uFFFDtest"s}, @@ -184,6 +185,7 @@ TEST_CASE("html text extraction") /* Head tag with some stuff */ {"<html><head><p>oh my god</head><body></body></html>", "oh my god\n"}, {"<html><head><title>oh my god</head><body></body></html>", ""}, + }; rspamd_url_init(NULL); |