diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-14 17:33:48 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-14 17:34:04 +0100 |
commit | 8d6010f86f77c07645319ddca16bd3000f0dcca6 (patch) | |
tree | 7cdf7c782a975cb654469b75cb91463715c26b3c /src | |
parent | 4278c58ed2a4c5b779d809a27491125a8d80cc6e (diff) | |
download | rspamd-8d6010f86f77c07645319ddca16bd3000f0dcca6.tar.gz rspamd-8d6010f86f77c07645319ddca16bd3000f0dcca6.zip |
[Minor] Html: Fix one more corner case
Diffstat (limited to 'src')
-rw-r--r-- | src/libserver/html/html.cxx | 9 | ||||
-rw-r--r-- | src/libserver/html/html_tests.cxx | 4 |
2 files changed, 11 insertions, 2 deletions
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 51f8589e2..332229b50 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1714,7 +1714,7 @@ html_process_input(rspamd_mempool_t *pool, } else if (html_document_state == html_document_state::head) { if (!(cur_tag->flags & (CM_EMPTY | CM_HEAD))) { - if (parent_tag && parent_tag->id == Tag_HEAD) { + if (parent_tag && (parent_tag->id == Tag_HEAD || !(parent_tag->flags & CM_HEAD))) { /* * As by standard, we have to close the HEAD tag * and switch to the body state @@ -1728,6 +1728,13 @@ html_process_input(rspamd_mempool_t *pool, else if (cur_tag->id == Tag_BODY) { html_document_state = html_document_state::body; } + else { + /* + * For propagation in something like + * <title><p><a>ololo</a></p></title> - should be unprocessed + */ + cur_tag->flags |= CM_HEAD; + } } } diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx index 4e87d7e2d..73f2ad81b 100644 --- a/src/libserver/html/html_tests.cxx +++ b/src/libserver/html/html_tests.cxx @@ -223,6 +223,8 @@ TEST_CASE("html urls extraction") { using namespace std::string_literals; const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{ + {"<style></style><a href=\"https://www.example.com\">yolo</a>", + {"https://www.example.com"}, "yolo"}, {"<a href=\"https://example.com\">test</a>", {"https://example.com"}, "test"}, {"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}, "hello"}, {"<html>\n" @@ -230,7 +232,7 @@ TEST_CASE("html urls extraction") "<body>\n" "<a href=\"https://www.example.com\">hello</a>\n" "</body>\n" - "</html>", {"https://www.example.com"}, "hello"} + "</html>", {"https://www.example.com"}, "hello"}, }; rspamd_url_init(NULL); |