diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-03-26 20:54:37 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-03-26 20:55:10 +0000 |
commit | 40db5f6260c874c1bc5a2f1d8234310df10990f7 (patch) | |
tree | 25f450a7ccc02010411ecfefd0b04f2904275096 /src/libserver | |
parent | 533946781a68fc227d61bd6333ef0214e20c05ba (diff) | |
download | rspamd-40db5f6260c874c1bc5a2f1d8234310df10990f7.tar.gz rspamd-40db5f6260c874c1bc5a2f1d8234310df10990f7.zip |
[Project] Css: Enable conditional css parsing support from the HTML parser
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/cfg_file.h | 1 | ||||
-rw-r--r-- | src/libserver/cfg_rcl.c | 6 | ||||
-rw-r--r-- | src/libserver/html.c | 45 | ||||
-rw-r--r-- | src/libserver/html.h | 3 |
4 files changed, 51 insertions, 4 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 9ef795d05..67f18e1e9 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -377,6 +377,7 @@ struct rspamd_config { gboolean soft_reject_on_timeout; /**< If true emit soft reject on task timeout (if not reject) */ gboolean public_groups_only; /**< Output merely public groups everywhere */ gboolean enable_test_patterns; /**< Enable test patterns */ + gboolean enable_css_parser; /**< Enable css parsing in HTML */ gsize max_cores_size; /**< maximum size occupied by rspamd core files */ gsize max_cores_count; /**< maximum number of core files */ diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index ffdc5e596..4891c4194 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -2000,6 +2000,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections) 0, "Enable test GTUBE like patterns (not for production!)"); rspamd_rcl_add_default_handler (sub, + "enable_css_parser", + rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct rspamd_config, enable_css_parser), + 0, + "Enable CSS parser (experimental)"); + rspamd_rcl_add_default_handler (sub, "enable_experimental", rspamd_rcl_parse_struct_boolean, G_STRUCT_OFFSET (struct rspamd_config, enable_experimental), diff --git a/src/libserver/html.c b/src/libserver/html.c index 3d9d540f5..b56f3ef32 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -24,6 +24,7 @@ #include "url.h" #include "contrib/libucl/khash.h" #include "libmime/images.h" +#include "css/css.h" #include <unicode/uversion.h> #include <unicode/ucnv.h> @@ -2781,7 +2782,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, GByteArray *in, GList **exceptions, khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls) + GPtrArray *part_urls, + bool allow_css) { const guchar *p, *c, *end, *savep = NULL; guchar t; @@ -2809,6 +2811,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, xml_tag_end, content_ignore, content_write, + content_style, content_ignore_sp } state = parse_start; @@ -3118,6 +3121,36 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, p ++; break; + case content_style: { + + /* + * We just search for the first </s substring and then pass + * the content to the parser (if needed) + */ + goffset end_style = rspamd_substring_search (p, end - p, + "</", 2); + if (end_style == -1 || g_ascii_tolower (p[end_style + 2]) != 's') { + /* Invalid style */ + state = content_ignore; + } + else { + + if (allow_css) { + GError *err = NULL; + (void)rspamd_css_parse_style (pool, p, end_style, &err); + + if (err) { + msg_info_pool ("cannot parse css: %e", err); + g_error_free (err); + } + } + + p += end_style; + state = tag_begin; + } + break; + } + case content_ignore_sp: if (!g_ascii_isspace (t)) { c = p; @@ -3173,7 +3206,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, need_decode = FALSE; } else { - state = content_ignore; + if (cur_tag->id == Tag_STYLE) { + state = content_style; + } + else { + state = content_ignore; + } } if (cur_tag->id != -1 && cur_tag->id < N_TAGS) { @@ -3387,5 +3425,6 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in) { - return rspamd_html_process_part_full (pool, hc, in, NULL, NULL, NULL); + return rspamd_html_process_part_full (pool, hc, in, NULL, + NULL, NULL, FALSE); } diff --git a/src/libserver/html.h b/src/libserver/html.h index fba412cb3..f8a5e18e4 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -147,7 +147,8 @@ GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in, GList **exceptions, khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls); + GPtrArray *part_urls, + bool allow_css); /* * Returns true if a specified tag has been seen in a part |