diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-12-13 20:22:07 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-12-13 20:22:07 +0000 |
commit | 85c3e693bfeae78f1ffa549eab64da8ac6c05ad1 (patch) | |
tree | f41e0e410567a3f087f6feb1d9a79f254d59fdb4 /src | |
parent | 5aa7af16f9f3b6cc27c831109b422261875f230d (diff) | |
download | rspamd-85c3e693bfeae78f1ffa549eab64da8ac6c05ad1.tar.gz rspamd-85c3e693bfeae78f1ffa549eab64da8ac6c05ad1.zip |
[CritFix] Add sanity guards for badly broken HTML
Diffstat (limited to 'src')
-rw-r--r-- | src/libserver/html.c | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index 8ff6b6fad..fc08baeb5 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -997,6 +997,11 @@ rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc, nnode); } + if (tag->id == -1) { + /* Ignore unknown tags */ + return FALSE; + } + tag->parent = *cur_level; if (!(tag->flags & CM_INLINE)) { @@ -1178,6 +1183,8 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, if (!g_ascii_isalpha (*in) && !g_ascii_isspace (*in)) { hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; state = ignore_bad_tag; + tag->id = -1; + tag->flags |= FL_BROKEN; } else if (g_ascii_isalpha (*in)) { state = parse_name; @@ -1197,6 +1204,7 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, if (tag->name.len == 0) { hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + tag->id = -1; tag->flags |= FL_BROKEN; state = ignore_bad_tag; } @@ -1206,8 +1214,7 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, s = rspamd_mempool_alloc (pool, tag->name.len); memcpy (s, tag->name.start, tag->name.len); - tag->name.len = rspamd_html_decode_entitles_inplace ( - s, + tag->name.len = rspamd_html_decode_entitles_inplace (s, tag->name.len); tag->name.start = s; @@ -2430,6 +2437,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, /* TODO: parse DOCTYPE here */ if (t == '>') { state = tag_end; + /* We don't know a lot about sgml tags, ignore them */ + cur_tag = NULL; continue; } p ++; |