]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Html: Fix one more corner case
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 14 Jul 2021 16:33:48 +0000 (17:33 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 14 Jul 2021 16:34:04 +0000 (17:34 +0100)
src/libserver/html/html.cxx
src/libserver/html/html_tests.cxx

index 51f8589e2bc70aed10a1551099def76590bb4b64..332229b506951b4f8acad670e3c9ef40071068d8 100644 (file)
@@ -1714,7 +1714,7 @@ html_process_input(rspamd_mempool_t *pool,
                                }
                                else if (html_document_state == html_document_state::head) {
                                        if (!(cur_tag->flags & (CM_EMPTY | CM_HEAD))) {
-                                               if (parent_tag && parent_tag->id == Tag_HEAD) {
+                                               if (parent_tag && (parent_tag->id == Tag_HEAD || !(parent_tag->flags & CM_HEAD))) {
                                                        /*
                                                         * As by standard, we have to close the HEAD tag
                                                         * and switch to the body state
@@ -1728,6 +1728,13 @@ html_process_input(rspamd_mempool_t *pool,
                                                else if (cur_tag->id == Tag_BODY) {
                                                        html_document_state = html_document_state::body;
                                                }
+                                               else {
+                                                       /*
+                                                        * For propagation in something like
+                                                        * <title><p><a>ololo</a></p></title> - should be unprocessed
+                                                        */
+                                                       cur_tag->flags |= CM_HEAD;
+                                               }
                                        }
                                }
 
index 4e87d7e2df33d2b1b28177edbae3fa1d1b6424d5..73f2ad81b072101e4014622add09f55d15d706bd 100644 (file)
@@ -223,6 +223,8 @@ TEST_CASE("html urls extraction")
 {
        using namespace std::string_literals;
        const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{
+                       {"<style></style><a href=\"https://www.example.com\">yolo</a>",
+                                       {"https://www.example.com"}, "yolo"},
                        {"<a href=\"https://example.com\">test</a>", {"https://example.com"}, "test"},
                        {"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}, "hello"},
                        {"<html>\n"
@@ -230,7 +232,7 @@ TEST_CASE("html urls extraction")
                         "<body>\n"
                         "<a href=\"https://www.example.com\">hello</a>\n"
                         "</body>\n"
-                        "</html>", {"https://www.example.com"}, "hello"}
+                        "</html>", {"https://www.example.com"}, "hello"},
        };
 
        rspamd_url_init(NULL);