From: Vsevolod Stakhov Date: Mon, 28 Jun 2021 10:51:31 +0000 (+0100) Subject: [Minor] Fix xml tags and comments processing X-Git-Tag: 3.0~235 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=ddda2007228617f8689f815e6a5f944b284ec5b0;p=rspamd.git [Minor] Fix xml tags and comments processing --- diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 8d312b733..c5d35105c 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -244,7 +244,7 @@ html_process_tag(rspamd_mempool_t *pool, if (!(tag->flags & (CM_EMPTY))) { /* Block tag */ - if ((tag->flags & (FL_CLOSING | FL_CLOSED))) { + if (tag->flags & FL_CLOSING) { /* Closed block tag */ if (parent == nullptr) { msg_debug_html ("bad parent node"); @@ -1178,21 +1178,21 @@ html_append_tag_content(rspamd_mempool_t *pool, return tag->content_offset; } - if (!tag->block) { - if ((tag->flags & (FL_COMMENT|FL_XML))) { + if ((tag->flags & (FL_COMMENT|FL_XML))) { + is_visible = false; + } + else { + if (!tag->block) { + is_visible = true; + } + else if (!tag->block->is_visible()) { is_visible = false; } else { - is_visible = true; + is_block = tag->block->has_display() && + tag->block->display == css::css_display_value::DISPLAY_BLOCK; } } - else if (!tag->block->is_visible()) { - is_visible = false; - } - else { - is_block = tag->block->has_display() && - tag->block->display == css::css_display_value::DISPLAY_BLOCK; - } if (is_block) { if (!hc->parsed.empty() && hc->parsed.back() != '\n') { @@ -1913,6 +1913,12 @@ TEST_CASE("html text extraction") { const std::vector> cases{ + /* XML tags */ + {"\n" + " \n" + "test", "test"}, {"test", "test"}, {"test ", "test"}, {"test foo, bar", "test foo, bar"}, @@ -1938,6 +1944,7 @@ TEST_CASE("html text extraction") //{"
file " // "sharing
foo", "fish\nfoo"}, {"

test", "test"}, + /* Complex html with bad tags */ {"\n" "\n" " \n" @@ -1953,7 +1960,7 @@ TEST_CASE("html text extraction") "

\n" " stuff

?\n" " \n" - "", "Hello, world! test\ndata<> \nstuff?"} + "", "Hello, world! test\ndata<> \nstuff?"}, }; rspamd_url_init(NULL);