]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Fix xml/sgml tags processing
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 24 Jun 2021 16:30:20 +0000 (17:30 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 24 Jun 2021 16:30:20 +0000 (17:30 +0100)
src/libserver/html/html.cxx
src/libserver/html/html.h
src/libserver/html/html_tag.hxx

index e427c73bfa22f228efad7ddfbe381bddfbad2e61..45094e7f82ad02de228316090c20b1197d090e5f 100644 (file)
@@ -851,8 +851,6 @@ html_process_img_tag(rspamd_mempool_t *pool,
 
        img = rspamd_mempool_alloc0_type (pool, struct html_image);
        img->tag = tag;
-       tag->flags |= FL_IMAGE;
-
 
        for (const auto &param : tag->parameters) {
 
@@ -1096,7 +1094,12 @@ html_append_tag_content(rspamd_mempool_t *pool,
        }
 
        if (!tag->block) {
-               is_visible = true;
+               if ((tag->flags & (FL_COMMENT|FL_XML))) {
+                       is_visible = false;
+               }
+               else {
+                       is_visible = true;
+               }
        }
        else if (!tag->block->is_visible()) {
                is_visible = false;
@@ -1328,10 +1331,17 @@ html_process_input(rspamd_mempool_t *pool,
                                break;
                        case '!':
                                state = sgml_tag;
+                               hc->all_tags.emplace_back(std::make_unique<html_tag>());
+                               cur_tag = hc->all_tags.back().get();
+                               cur_tag->tag_start = c - start;
                                p ++;
                                break;
                        case '?':
                                state = xml_tag;
+                               hc->all_tags.emplace_back(std::make_unique<html_tag>());
+                               cur_tag = hc->all_tags.back().get();
+                               cur_tag->tag_start = c - start;
+                               cur_tag->flags |= FL_XML;
                                hc->flags |= RSPAMD_HTML_FLAG_XML;
                                p ++;
                                break;
@@ -1365,6 +1375,7 @@ html_process_input(rspamd_mempool_t *pool,
                                p ++;
                                break;
                        case '-':
+                               cur_tag->flags |= FL_COMMENT;
                                state = comment_tag;
                                p ++;
                                break;
index 291e0cfda1bd1d8d6d537079c402ee3598a96e07..b6307f88f88021eb6f180e2dd49404128ed627bd 100644 (file)
@@ -58,19 +58,6 @@ struct html_image {
 };
 
 
-/* Public tags flags */
-/* XML tag */
-#define FL_XML          (1 << 23)
-/* Closing tag */
-#define FL_CLOSING      (1 << 24)
-/* Fully closed tag (e.g. <a attrs />) */
-#define FL_CLOSED       (1 << 25)
-#define FL_BROKEN       (1 << 26)
-#define FL_IGNORE       (1 << 27)
-#define FL_BLOCK        (1 << 28)
-#define FL_HREF         (1 << 29)
-#define FL_IMAGE        (1 << 30)
-
 /* Forwarded declaration */
 struct rspamd_task;
 
index cad5368cf65b710d61f7673a06c211e260d484da..9091b9060abc0a45413eeb60b5258ae0ffa60dc1 100644 (file)
@@ -40,6 +40,20 @@ enum class html_component_type : std::uint8_t {
        RSPAMD_HTML_COMPONENT_ALT,
        RSPAMD_HTML_COMPONENT_ID,
 };
+
+/* Public tags flags */
+/* XML tag */
+#define FL_XML          (1 << 23)
+/* Closing tag */
+#define FL_CLOSING      (1 << 24)
+/* Fully closed tag (e.g. <a attrs />) */
+#define FL_CLOSED       (1 << 25)
+#define FL_BROKEN       (1 << 26)
+#define FL_IGNORE       (1 << 27)
+#define FL_BLOCK        (1 << 28)
+#define FL_HREF         (1 << 29)
+#define FL_COMMENT      (1 << 29)
+
 /**
  * Returns component type from a string
  * @param st