From c93a78780f425a36cb3d9d2a7b0a8eca8707831e Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 24 Jun 2021 17:30:20 +0100 Subject: [PATCH] [Project] Fix xml/sgml tags processing --- src/libserver/html/html.cxx | 17 ++++++++++++++--- src/libserver/html/html.h | 13 ------------- src/libserver/html/html_tag.hxx | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index e427c73bf..45094e7f8 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -851,8 +851,6 @@ html_process_img_tag(rspamd_mempool_t *pool, img = rspamd_mempool_alloc0_type (pool, struct html_image); img->tag = tag; - tag->flags |= FL_IMAGE; - for (const auto ¶m : tag->parameters) { @@ -1096,7 +1094,12 @@ html_append_tag_content(rspamd_mempool_t *pool, } if (!tag->block) { - is_visible = true; + if ((tag->flags & (FL_COMMENT|FL_XML))) { + is_visible = false; + } + else { + is_visible = true; + } } else if (!tag->block->is_visible()) { is_visible = false; @@ -1328,10 +1331,17 @@ html_process_input(rspamd_mempool_t *pool, break; case '!': state = sgml_tag; + hc->all_tags.emplace_back(std::make_unique()); + cur_tag = hc->all_tags.back().get(); + cur_tag->tag_start = c - start; p ++; break; case '?': state = xml_tag; + hc->all_tags.emplace_back(std::make_unique()); + cur_tag = hc->all_tags.back().get(); + cur_tag->tag_start = c - start; + cur_tag->flags |= FL_XML; hc->flags |= RSPAMD_HTML_FLAG_XML; p ++; break; @@ -1365,6 +1375,7 @@ html_process_input(rspamd_mempool_t *pool, p ++; break; case '-': + cur_tag->flags |= FL_COMMENT; state = comment_tag; p ++; break; diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h index 291e0cfda..b6307f88f 100644 --- a/src/libserver/html/html.h +++ b/src/libserver/html/html.h @@ -58,19 +58,6 @@ struct html_image { }; -/* Public tags flags */ -/* XML tag */ -#define FL_XML (1 << 23) -/* Closing tag */ -#define FL_CLOSING (1 << 24) -/* Fully closed tag (e.g. ) */ -#define FL_CLOSED (1 << 25) -#define FL_BROKEN (1 << 26) -#define FL_IGNORE (1 << 27) -#define FL_BLOCK (1 << 28) -#define FL_HREF (1 << 29) -#define FL_IMAGE (1 << 30) - /* Forwarded declaration */ struct rspamd_task; diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx index cad5368cf..9091b9060 100644 --- a/src/libserver/html/html_tag.hxx +++ b/src/libserver/html/html_tag.hxx @@ -40,6 +40,20 @@ enum class html_component_type : std::uint8_t { RSPAMD_HTML_COMPONENT_ALT, RSPAMD_HTML_COMPONENT_ID, }; + +/* Public tags flags */ +/* XML tag */ +#define FL_XML (1 << 23) +/* Closing tag */ +#define FL_CLOSING (1 << 24) +/* Fully closed tag (e.g. ) */ +#define FL_CLOSED (1 << 25) +#define FL_BROKEN (1 << 26) +#define FL_IGNORE (1 << 27) +#define FL_BLOCK (1 << 28) +#define FL_HREF (1 << 29) +#define FL_COMMENT (1 << 29) + /** * Returns component type from a string * @param st -- 2.39.5