From 40db5f6260c874c1bc5a2f1d8234310df10990f7 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 26 Mar 2021 20:54:37 +0000 Subject: [PATCH] [Project] Css: Enable conditional css parsing support from the HTML parser --- src/libmime/message.c | 3 ++- src/libserver/cfg_file.h | 1 + src/libserver/cfg_rcl.c | 6 ++++++ src/libserver/html.c | 45 +++++++++++++++++++++++++++++++++++++--- src/libserver/html.h | 3 ++- 5 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 8a9601fa7..9713a6bf5 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -769,7 +769,8 @@ rspamd_message_process_html_text_part (struct rspamd_task *task, text_part->utf_raw_content, &text_part->exceptions, MESSAGE_FIELD (task, urls), - text_part->mime_part->urls); + text_part->mime_part->urls, + task->cfg->enable_css_parser); if (text_part->utf_content->len == 0) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY; diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 9ef795d05..67f18e1e9 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -377,6 +377,7 @@ struct rspamd_config { gboolean soft_reject_on_timeout; /**< If true emit soft reject on task timeout (if not reject) */ gboolean public_groups_only; /**< Output merely public groups everywhere */ gboolean enable_test_patterns; /**< Enable test patterns */ + gboolean enable_css_parser; /**< Enable css parsing in HTML */ gsize max_cores_size; /**< maximum size occupied by rspamd core files */ gsize max_cores_count; /**< maximum number of core files */ diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index ffdc5e596..4891c4194 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -1999,6 +1999,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections) G_STRUCT_OFFSET (struct rspamd_config, enable_test_patterns), 0, "Enable test GTUBE like patterns (not for production!)"); + rspamd_rcl_add_default_handler (sub, + "enable_css_parser", + rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct rspamd_config, enable_css_parser), + 0, + "Enable CSS parser (experimental)"); rspamd_rcl_add_default_handler (sub, "enable_experimental", rspamd_rcl_parse_struct_boolean, diff --git a/src/libserver/html.c b/src/libserver/html.c index 3d9d540f5..b56f3ef32 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -24,6 +24,7 @@ #include "url.h" #include "contrib/libucl/khash.h" #include "libmime/images.h" +#include "css/css.h" #include #include @@ -2781,7 +2782,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, GByteArray *in, GList **exceptions, khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls) + GPtrArray *part_urls, + bool allow_css) { const guchar *p, *c, *end, *savep = NULL; guchar t; @@ -2809,6 +2811,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, xml_tag_end, content_ignore, content_write, + content_style, content_ignore_sp } state = parse_start; @@ -3118,6 +3121,36 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, p ++; break; + case content_style: { + + /* + * We just search for the first id == Tag_STYLE) { + state = content_style; + } + else { + state = content_ignore; + } } if (cur_tag->id != -1 && cur_tag->id < N_TAGS) { @@ -3387,5 +3425,6 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in) { - return rspamd_html_process_part_full (pool, hc, in, NULL, NULL, NULL); + return rspamd_html_process_part_full (pool, hc, in, NULL, + NULL, NULL, FALSE); } diff --git a/src/libserver/html.h b/src/libserver/html.h index fba412cb3..f8a5e18e4 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -147,7 +147,8 @@ GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in, GList **exceptions, khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls); + GPtrArray *part_urls, + bool allow_css); /* * Returns true if a specified tag has been seen in a part -- 2.39.5