]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Add process exceptions for invisible text
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 16 Jun 2021 14:22:48 +0000 (15:22 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 16 Jun 2021 14:22:48 +0000 (15:22 +0100)
src/libserver/html/html.cxx
src/libstat/tokenizers/tokenizers.c
src/libutil/util.h
src/rspamd.h
test/functional/messages/zerofont.eml

index 5c5157740c6b6c741eb49410b488c98433cddc43..b9c1a41cc3de3860a3301aefdef46a33ef7339d2 100644 (file)
@@ -1658,7 +1658,7 @@ html_process_input(rspamd_mempool_t *pool,
        }, html_content::traverse_type::POST_ORDER);
 
        /* Propagate styles */
-       hc->traverse_block_tags([&hc](const html_tag *tag) -> bool {
+       hc->traverse_block_tags([&hc, &exceptions,&pool](const html_tag *tag) -> bool {
                if (hc->css_style) {
                        auto *css_block = hc->css_style->check_tag_block(tag);
 
@@ -1674,6 +1674,60 @@ html_process_input(rspamd_mempool_t *pool,
                if (tag->block) {
                        tag->block->compute_visibility();
 
+                       if (exceptions) {
+                               if (!tag->block->is_visible()) {
+                                       if (tag->parent == nullptr || (tag->parent->block && tag->parent->block->is_visible())) {
+                                               /* Add exception for an invisible element */
+                                               auto * ex = rspamd_mempool_alloc_type (pool,struct rspamd_process_exception);
+                                               ex->pos = tag->content_offset;
+                                               ex->len = tag->content_length;
+                                               ex->type = RSPAMD_EXCEPTION_INVISIBLE;
+                                               ex->ptr = (void *)tag;
+
+                                               *exceptions = g_list_prepend(*exceptions, ex);
+                                       }
+                               }
+                               else if (*exceptions && tag->parent) {
+                                       /* Current block is visible, check if parent is invisible */
+                                       auto *ex = (struct rspamd_process_exception*)g_list_first(*exceptions)->data;
+
+                                       /*
+                                        * TODO: we need to handle the following cases:
+                                        * <inv><vis><inv> -< insert one more exception
+                                        * <vis><inv> -< increase content_offset decrease length
+                                        * <inv><vis> -< decrease length
+                                        */
+                                       if (ex && ex->type == RSPAMD_EXCEPTION_INVISIBLE &&
+                                               ex->ptr == (void *)tag->parent) {
+                                               auto *parent = tag->parent;
+
+                                               if (tag->content_offset + tag->content_length ==
+                                                       parent->content_offset + parent->content_length) {
+                                                       /* <inv><vis> */
+                                                       ex->len -= tag->content_length;
+                                               }
+                                               else if (tag->content_offset == parent->content_offset) {
+                                                       /* <vis><inv> */
+                                                       ex->len -= tag->content_length;
+                                                       ex->pos += tag->content_length;
+                                               }
+                                               else if (tag->content_offset > ex->pos) {
+                                                       auto *nex = rspamd_mempool_alloc_type (pool,
+                                                                       struct rspamd_process_exception);
+                                                       auto start_len = tag->content_offset - ex->pos;
+                                                       auto end_len = ex->len - tag->content_length - tag->content_length;
+                                                       nex->pos = tag->content_offset + tag->content_length;
+                                                       nex->len = end_len;
+                                                       nex->type = RSPAMD_EXCEPTION_INVISIBLE;
+                                                       nex->ptr = (void *)parent; /* ! */
+                                                       ex->len = start_len;
+                                                       *exceptions = g_list_prepend(*exceptions, ex);
+                                               }
+
+                                       }
+                               }
+                       }
+
                        for (const auto *cld_tag : tag->children) {
                                if (cld_tag->block) {
                                        cld_tag->block->propagate_block(*tag->block);
index f3b05240c81b6ea65c587c4153ef98d8142f239b..8d6d93addad6848003ba59973b9ddb60208c4e82 100644 (file)
@@ -275,6 +275,14 @@ rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res)
                g_array_append_val (res, token);
                token.flags = 0;
        }
+       else if (ex->type == RSPAMD_EXCEPTION_INVISIBLE) {
+               token.original.begin = "!!INV!!";
+               token.original.len = sizeof ("!!INV!!") - 1;
+               token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+
+               g_array_append_val (res, token);
+               token.flags = 0;
+       }
 }
 
 
index e947b0a54378b7ef5c3ee5c87e19f73c5b1360b1..9ee8a09ae9eeb9b27e6ef79c4b3e62aa99d1b199 100644 (file)
@@ -21,6 +21,22 @@ extern "C" {
 
 struct rspamd_config;
 
+enum rspamd_exception_type {
+       RSPAMD_EXCEPTION_NEWLINE = 0,
+       RSPAMD_EXCEPTION_URL,
+       RSPAMD_EXCEPTION_GENERIC,
+       RSPAMD_EXCEPTION_INVISIBLE,
+};
+/**
+ * Structure to point exception in text from processing
+ */
+struct rspamd_process_exception {
+       goffset pos;
+       guint len;
+       gpointer ptr;
+       enum rspamd_exception_type type;
+};
+
 /**
  * Create generic socket
  * @param af address family
index bc1ed8a862c620b2e56116e05f895a4900ac7b07..eb5ce541e6a2dfeaca51879e2d894d3b54bb752e 100644 (file)
@@ -316,21 +316,6 @@ struct rspamd_main {
        struct rspamd_http_context *http_ctx;
 };
 
-enum rspamd_exception_type {
-       RSPAMD_EXCEPTION_NEWLINE = 0,
-       RSPAMD_EXCEPTION_URL,
-       RSPAMD_EXCEPTION_GENERIC,
-};
-/**
- * Structure to point exception in text from processing
- */
-struct rspamd_process_exception {
-       goffset pos;
-       guint len;
-       gpointer ptr;
-       enum rspamd_exception_type type;
-};
-
 /**
  * Control session object
  */
index 79fa5ede4aa840d1c445f8a093c695e7d52d0c28..c5242d95d83f605852128bdf8bbc300522baa9d3 100644 (file)
@@ -13,5 +13,6 @@ Content-Type: text/html
   </head>
   <body class="activity-stream">
     <div>fi<span style="FONT-SIZE: 0px">le </span>sh<span style="FONT-SIZE: 0px">aring </span></div>
+  <a href="https://example.com">test url</a>
   </body>
 </html>