]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Ignore bogus head tags inside body
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 13 Jul 2021 15:52:09 +0000 (16:52 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 13 Jul 2021 15:52:09 +0000 (16:52 +0100)
src/libserver/html/html.cxx
src/libserver/html/html_tests.cxx

index cf12b0a0186f22b475f0af4f6a4cb04bf10cdffc..51f8589e2bc70aed10a1551099def76590bb4b64 100644 (file)
@@ -1085,7 +1085,7 @@ html_append_tag_content(rspamd_mempool_t *pool,
 
                return ret;
        }
-       else if (tag->id == Tag_HEAD) {
+       else if (tag->id == Tag_HEAD && (tag->flags & FL_IGNORE)) {
                auto ret = tag->closing.end;
                calculate_final_tag_offsets();
 
@@ -1706,6 +1706,7 @@ html_process_input(rspamd_mempool_t *pool,
                                if (html_document_state == html_document_state::doctype) {
                                        if (cur_tag->id == Tag_HEAD || (cur_tag->flags & CM_HEAD)) {
                                                html_document_state = html_document_state::head;
+                                               cur_tag->flags |= FL_IGNORE;
                                        }
                                        else if (cur_tag->id != Tag_HTML) {
                                                html_document_state = html_document_state::body;
index ac06a353bb78ccd1a8997d30a548d3d21e35b123..1181e79ac004a399a5a017dd7eeff819120154e5 100644 (file)
@@ -69,6 +69,7 @@ TEST_CASE("html text extraction")
 {
        using namespace std::string_literals;
        const std::vector<std::pair<std::string, std::string>> cases{
+                       {"<html><body><html><head>displayed</body></html></body></html>", "displayed"},
                        {"test", "test"},
                        {"test\0"s, "test\uFFFD"s},
                        {"test\0test"s, "test\uFFFDtest"s},
@@ -184,6 +185,7 @@ TEST_CASE("html text extraction")
                        /* Head tag with some stuff */
                        {"<html><head><p>oh my god</head><body></body></html>", "oh my god\n"},
                        {"<html><head><title>oh my god</head><body></body></html>", ""},
+
        };
 
        rspamd_url_init(NULL);