From 8d6010f86f77c07645319ddca16bd3000f0dcca6 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 14 Jul 2021 17:33:48 +0100 Subject: [PATCH] [Minor] Html: Fix one more corner case --- src/libserver/html/html.cxx | 9 ++++++++- src/libserver/html/html_tests.cxx | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 51f8589e2..332229b50 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1714,7 +1714,7 @@ html_process_input(rspamd_mempool_t *pool, } else if (html_document_state == html_document_state::head) { if (!(cur_tag->flags & (CM_EMPTY | CM_HEAD))) { - if (parent_tag && parent_tag->id == Tag_HEAD) { + if (parent_tag && (parent_tag->id == Tag_HEAD || !(parent_tag->flags & CM_HEAD))) { /* * As by standard, we have to close the HEAD tag * and switch to the body state @@ -1728,6 +1728,13 @@ html_process_input(rspamd_mempool_t *pool, else if (cur_tag->id == Tag_BODY) { html_document_state = html_document_state::body; } + else { + /* + * For propagation in something like + * <p><a>ololo</a></p> - should be unprocessed + */ + cur_tag->flags |= CM_HEAD; + } } } diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx index 4e87d7e2d..73f2ad81b 100644 --- a/src/libserver/html/html_tests.cxx +++ b/src/libserver/html/html_tests.cxx @@ -223,6 +223,8 @@ TEST_CASE("html urls extraction") { using namespace std::string_literals; const std::vector, std::optional>> cases{ + {"yolo", + {"https://www.example.com"}, "yolo"}, {"test", {"https://example.com"}, "test"}, {"hello", {"http://example.com"}, "hello"}, {"\n" @@ -230,7 +232,7 @@ TEST_CASE("html urls extraction") "\n" "hello\n" "\n" - "", {"https://www.example.com"}, "hello"} + "", {"https://www.example.com"}, "hello"}, }; rspamd_url_init(NULL); -- 2.39.5