[Minor] Html: Fix one more corner case

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2021-07-14 17:33:48 +0100
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2021-07-14 17:34:04 +0100
commit: 8d6010f86f77c07645319ddca16bd3000f0dcca6 (patch)
tree: 7cdf7c782a975cb654469b75cb91463715c26b3c /src
parent: 4278c58ed2a4c5b779d809a27491125a8d80cc6e (diff)
download: rspamd-8d6010f86f77c07645319ddca16bd3000f0dcca6.tar.gz
rspamd-8d6010f86f77c07645319ddca16bd3000f0dcca6.zip
2 files changed, 11 insertions, 2 deletions
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 51f8589e2..332229b50 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1714,7 +1714,7 @@ html_process_input(rspamd_mempool_t *pool,
 				}
 				else if (html_document_state == html_document_state::head) {
 					if (!(cur_tag->flags & (CM_EMPTY | CM_HEAD))) {
-						if (parent_tag && parent_tag->id == Tag_HEAD) {
+						if (parent_tag && (parent_tag->id == Tag_HEAD || !(parent_tag->flags & CM_HEAD))) {
 							/*
 							 * As by standard, we have to close the HEAD tag
 							 * and switch to the body state
@@ -1728,6 +1728,13 @@ html_process_input(rspamd_mempool_t *pool,
 						else if (cur_tag->id == Tag_BODY) {
 							html_document_state = html_document_state::body;
 						}
+						else {
+							/*
+							 * For propagation in something like
+							 * <title><p><a>ololo</a></p></title> - should be unprocessed
+							 */
+							cur_tag->flags |= CM_HEAD;
+						}
 					}
 				}
 
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index 4e87d7e2d..73f2ad81b 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -223,6 +223,8 @@ TEST_CASE("html urls extraction")
 {
 	using namespace std::string_literals;
 	const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{
+			{"<style></style><a href=\"https://www.example.com\">yolo</a>",
+					{"https://www.example.com"}, "yolo"},
 			{"<a href=\"https://example.com\">test</a>", {"https://example.com"}, "test"},
 			{"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}, "hello"},
 			{"<html>\n"
@@ -230,7 +232,7 @@ TEST_CASE("html urls extraction")
 			 "<body>\n"
 			 "<a href=\"https://www.example.com\">hello</a>\n"
 			 "</body>\n"
-			 "</html>", {"https://www.example.com"}, "hello"}
+			 "</html>", {"https://www.example.com"}, "hello"},
 	};
 
 	rspamd_url_init(NULL);
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2021-07-14 17:33:48 +0100
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2021-07-14 17:34:04 +0100
commit	8d6010f86f77c07645319ddca16bd3000f0dcca6 (patch)
tree	7cdf7c782a975cb654469b75cb91463715c26b3c /src
parent	4278c58ed2a4c5b779d809a27491125a8d80cc6e (diff)
download	rspamd-8d6010f86f77c07645319ddca16bd3000f0dcca6.tar.gz rspamd-8d6010f86f77c07645319ddca16bd3000f0dcca6.zip