From ddda2007228617f8689f815e6a5f944b284ec5b0 Mon Sep 17 00:00:00 2001
From: Vsevolod Stakhov
Date: Mon, 28 Jun 2021 11:51:31 +0100
Subject: [PATCH] [Minor] Fix xml tags and comments processing
---
src/libserver/html/html.cxx | 31 +++++++++++++++++++------------
1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 8d312b733..c5d35105c 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -244,7 +244,7 @@ html_process_tag(rspamd_mempool_t *pool,
if (!(tag->flags & (CM_EMPTY))) {
/* Block tag */
- if ((tag->flags & (FL_CLOSING | FL_CLOSED))) {
+ if (tag->flags & FL_CLOSING) {
/* Closed block tag */
if (parent == nullptr) {
msg_debug_html ("bad parent node");
@@ -1178,21 +1178,21 @@ html_append_tag_content(rspamd_mempool_t *pool,
return tag->content_offset;
}
- if (!tag->block) {
- if ((tag->flags & (FL_COMMENT|FL_XML))) {
+ if ((tag->flags & (FL_COMMENT|FL_XML))) {
+ is_visible = false;
+ }
+ else {
+ if (!tag->block) {
+ is_visible = true;
+ }
+ else if (!tag->block->is_visible()) {
is_visible = false;
}
else {
- is_visible = true;
+ is_block = tag->block->has_display() &&
+ tag->block->display == css::css_display_value::DISPLAY_BLOCK;
}
}
- else if (!tag->block->is_visible()) {
- is_visible = false;
- }
- else {
- is_block = tag->block->has_display() &&
- tag->block->display == css::css_display_value::DISPLAY_BLOCK;
- }
if (is_block) {
if (!hc->parsed.empty() && hc->parsed.back() != '\n') {
@@ -1913,6 +1913,12 @@ TEST_CASE("html text extraction")
{
const std::vector> cases{
+ /* XML tags */
+ {"\n"
+ " \n"
+ "test", "test"},
{"test", "test"},
{"test ", "test"},
{"test foo, bar", "test foo, bar"},
@@ -1938,6 +1944,7 @@ TEST_CASE("html text extraction")
//{"file "
// "sharing
foo", "fish\nfoo"},
{"test", "test"},
+ /* Complex html with bad tags */
{"\n"
"\n"
"
\n"
@@ -1953,7 +1960,7 @@ TEST_CASE("html text extraction")
"
\n"
" stuff?\n"
" \n"
- "", "Hello, world! test\ndata<> \nstuff?"}
+ "