From: Vsevolod Stakhov Date: Wed, 13 Dec 2017 20:22:07 +0000 (+0000) Subject: [CritFix] Add sanity guards for badly broken HTML X-Git-Tag: 1.6.6~3^2~24 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=fd71b4b325baf01188b3b5fb6d1051a4d8737221;p=rspamd.git [CritFix] Add sanity guards for badly broken HTML --- diff --git a/src/libserver/html.c b/src/libserver/html.c index 8ff6b6fad..fc08baeb5 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -997,6 +997,11 @@ rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc, nnode); } + if (tag->id == -1) { + /* Ignore unknown tags */ + return FALSE; + } + tag->parent = *cur_level; if (!(tag->flags & CM_INLINE)) { @@ -1178,6 +1183,8 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, if (!g_ascii_isalpha (*in) && !g_ascii_isspace (*in)) { hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; state = ignore_bad_tag; + tag->id = -1; + tag->flags |= FL_BROKEN; } else if (g_ascii_isalpha (*in)) { state = parse_name; @@ -1197,6 +1204,7 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, if (tag->name.len == 0) { hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + tag->id = -1; tag->flags |= FL_BROKEN; state = ignore_bad_tag; } @@ -1206,8 +1214,7 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, s = rspamd_mempool_alloc (pool, tag->name.len); memcpy (s, tag->name.start, tag->name.len); - tag->name.len = rspamd_html_decode_entitles_inplace ( - s, + tag->name.len = rspamd_html_decode_entitles_inplace (s, tag->name.len); tag->name.start = s; @@ -2430,6 +2437,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, /* TODO: parse DOCTYPE here */ if (t == '>') { state = tag_end; + /* We don't know a lot about sgml tags, ignore them */ + cur_tag = NULL; continue; } p ++;