From acc4b6480944600f47dcc0458214afe5b569ab33 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 16 Jun 2021 15:22:48 +0100 Subject: [PATCH] [Project] Add process exceptions for invisible text --- src/libserver/html/html.cxx | 56 ++++++++++++++++++++++++++- src/libstat/tokenizers/tokenizers.c | 8 ++++ src/libutil/util.h | 16 ++++++++ src/rspamd.h | 15 ------- test/functional/messages/zerofont.eml | 1 + 5 files changed, 80 insertions(+), 16 deletions(-) diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 5c5157740..b9c1a41cc 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1658,7 +1658,7 @@ html_process_input(rspamd_mempool_t *pool, }, html_content::traverse_type::POST_ORDER); /* Propagate styles */ - hc->traverse_block_tags([&hc](const html_tag *tag) -> bool { + hc->traverse_block_tags([&hc, &exceptions,&pool](const html_tag *tag) -> bool { if (hc->css_style) { auto *css_block = hc->css_style->check_tag_block(tag); @@ -1674,6 +1674,60 @@ html_process_input(rspamd_mempool_t *pool, if (tag->block) { tag->block->compute_visibility(); + if (exceptions) { + if (!tag->block->is_visible()) { + if (tag->parent == nullptr || (tag->parent->block && tag->parent->block->is_visible())) { + /* Add exception for an invisible element */ + auto * ex = rspamd_mempool_alloc_type (pool,struct rspamd_process_exception); + ex->pos = tag->content_offset; + ex->len = tag->content_length; + ex->type = RSPAMD_EXCEPTION_INVISIBLE; + ex->ptr = (void *)tag; + + *exceptions = g_list_prepend(*exceptions, ex); + } + } + else if (*exceptions && tag->parent) { + /* Current block is visible, check if parent is invisible */ + auto *ex = (struct rspamd_process_exception*)g_list_first(*exceptions)->data; + + /* + * TODO: we need to handle the following cases: + * -< insert one more exception + * -< increase content_offset decrease length + * -< decrease length + */ + if (ex && ex->type == RSPAMD_EXCEPTION_INVISIBLE && + ex->ptr == (void *)tag->parent) { + auto *parent = tag->parent; + + if (tag->content_offset + tag->content_length == + parent->content_offset + parent->content_length) { + /* */ + ex->len -= tag->content_length; + } + else if (tag->content_offset == parent->content_offset) { + /* */ + ex->len -= tag->content_length; + ex->pos += tag->content_length; + } + else if (tag->content_offset > ex->pos) { + auto *nex = rspamd_mempool_alloc_type (pool, + struct rspamd_process_exception); + auto start_len = tag->content_offset - ex->pos; + auto end_len = ex->len - tag->content_length - tag->content_length; + nex->pos = tag->content_offset + tag->content_length; + nex->len = end_len; + nex->type = RSPAMD_EXCEPTION_INVISIBLE; + nex->ptr = (void *)parent; /* ! */ + ex->len = start_len; + *exceptions = g_list_prepend(*exceptions, ex); + } + + } + } + } + for (const auto *cld_tag : tag->children) { if (cld_tag->block) { cld_tag->block->propagate_block(*tag->block); diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index f3b05240c..8d6d93add 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -275,6 +275,14 @@ rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res) g_array_append_val (res, token); token.flags = 0; } + else if (ex->type == RSPAMD_EXCEPTION_INVISIBLE) { + token.original.begin = "!!INV!!"; + token.original.len = sizeof ("!!INV!!") - 1; + token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION; + + g_array_append_val (res, token); + token.flags = 0; + } } diff --git a/src/libutil/util.h b/src/libutil/util.h index e947b0a54..9ee8a09ae 100644 --- a/src/libutil/util.h +++ b/src/libutil/util.h @@ -21,6 +21,22 @@ extern "C" { struct rspamd_config; +enum rspamd_exception_type { + RSPAMD_EXCEPTION_NEWLINE = 0, + RSPAMD_EXCEPTION_URL, + RSPAMD_EXCEPTION_GENERIC, + RSPAMD_EXCEPTION_INVISIBLE, +}; +/** + * Structure to point exception in text from processing + */ +struct rspamd_process_exception { + goffset pos; + guint len; + gpointer ptr; + enum rspamd_exception_type type; +}; + /** * Create generic socket * @param af address family diff --git a/src/rspamd.h b/src/rspamd.h index bc1ed8a86..eb5ce541e 100644 --- a/src/rspamd.h +++ b/src/rspamd.h @@ -316,21 +316,6 @@ struct rspamd_main { struct rspamd_http_context *http_ctx; }; -enum rspamd_exception_type { - RSPAMD_EXCEPTION_NEWLINE = 0, - RSPAMD_EXCEPTION_URL, - RSPAMD_EXCEPTION_GENERIC, -}; -/** - * Structure to point exception in text from processing - */ -struct rspamd_process_exception { - goffset pos; - guint len; - gpointer ptr; - enum rspamd_exception_type type; -}; - /** * Control session object */ diff --git a/test/functional/messages/zerofont.eml b/test/functional/messages/zerofont.eml index 79fa5ede4..c5242d95d 100644 --- a/test/functional/messages/zerofont.eml +++ b/test/functional/messages/zerofont.eml @@ -13,5 +13,6 @@ Content-Type: text/html
file sharing
+ test url -- 2.39.5