aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-03-26 20:54:37 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-03-26 20:55:10 +0000
commit40db5f6260c874c1bc5a2f1d8234310df10990f7 (patch)
tree25f450a7ccc02010411ecfefd0b04f2904275096 /src/libserver
parent533946781a68fc227d61bd6333ef0214e20c05ba (diff)
downloadrspamd-40db5f6260c874c1bc5a2f1d8234310df10990f7.tar.gz
rspamd-40db5f6260c874c1bc5a2f1d8234310df10990f7.zip
[Project] Css: Enable conditional css parsing support from the HTML parser
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/cfg_file.h1
-rw-r--r--src/libserver/cfg_rcl.c6
-rw-r--r--src/libserver/html.c45
-rw-r--r--src/libserver/html.h3
4 files changed, 51 insertions, 4 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 9ef795d05..67f18e1e9 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -377,6 +377,7 @@ struct rspamd_config {
gboolean soft_reject_on_timeout; /**< If true emit soft reject on task timeout (if not reject) */
gboolean public_groups_only; /**< Output merely public groups everywhere */
gboolean enable_test_patterns; /**< Enable test patterns */
+ gboolean enable_css_parser; /**< Enable css parsing in HTML */
gsize max_cores_size; /**< maximum size occupied by rspamd core files */
gsize max_cores_count; /**< maximum number of core files */
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index ffdc5e596..4891c4194 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -2000,6 +2000,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections)
0,
"Enable test GTUBE like patterns (not for production!)");
rspamd_rcl_add_default_handler (sub,
+ "enable_css_parser",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct rspamd_config, enable_css_parser),
+ 0,
+ "Enable CSS parser (experimental)");
+ rspamd_rcl_add_default_handler (sub,
"enable_experimental",
rspamd_rcl_parse_struct_boolean,
G_STRUCT_OFFSET (struct rspamd_config, enable_experimental),
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 3d9d540f5..b56f3ef32 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -24,6 +24,7 @@
#include "url.h"
#include "contrib/libucl/khash.h"
#include "libmime/images.h"
+#include "css/css.h"
#include <unicode/uversion.h>
#include <unicode/ucnv.h>
@@ -2781,7 +2782,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
GByteArray *in,
GList **exceptions,
khash_t (rspamd_url_hash) *url_set,
- GPtrArray *part_urls)
+ GPtrArray *part_urls,
+ bool allow_css)
{
const guchar *p, *c, *end, *savep = NULL;
guchar t;
@@ -2809,6 +2811,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
xml_tag_end,
content_ignore,
content_write,
+ content_style,
content_ignore_sp
} state = parse_start;
@@ -3118,6 +3121,36 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
p ++;
break;
+ case content_style: {
+
+ /*
+ * We just search for the first </s substring and then pass
+ * the content to the parser (if needed)
+ */
+ goffset end_style = rspamd_substring_search (p, end - p,
+ "</", 2);
+ if (end_style == -1 || g_ascii_tolower (p[end_style + 2]) != 's') {
+ /* Invalid style */
+ state = content_ignore;
+ }
+ else {
+
+ if (allow_css) {
+ GError *err = NULL;
+ (void)rspamd_css_parse_style (pool, p, end_style, &err);
+
+ if (err) {
+ msg_info_pool ("cannot parse css: %e", err);
+ g_error_free (err);
+ }
+ }
+
+ p += end_style;
+ state = tag_begin;
+ }
+ break;
+ }
+
case content_ignore_sp:
if (!g_ascii_isspace (t)) {
c = p;
@@ -3173,7 +3206,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
need_decode = FALSE;
}
else {
- state = content_ignore;
+ if (cur_tag->id == Tag_STYLE) {
+ state = content_style;
+ }
+ else {
+ state = content_ignore;
+ }
}
if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
@@ -3387,5 +3425,6 @@ rspamd_html_process_part (rspamd_mempool_t *pool,
struct html_content *hc,
GByteArray *in)
{
- return rspamd_html_process_part_full (pool, hc, in, NULL, NULL, NULL);
+ return rspamd_html_process_part_full (pool, hc, in, NULL,
+ NULL, NULL, FALSE);
}
diff --git a/src/libserver/html.h b/src/libserver/html.h
index fba412cb3..f8a5e18e4 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -147,7 +147,8 @@ GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool,
struct html_content *hc,
GByteArray *in, GList **exceptions,
khash_t (rspamd_url_hash) *url_set,
- GPtrArray *part_urls);
+ GPtrArray *part_urls,
+ bool allow_css);
/*
* Returns true if a specified tag has been seen in a part