From ac1385950ae72d5293385e4564917947a44e23c0 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 25 Feb 2019 12:06:58 +0000 Subject: [PATCH] [Fix] HTML: Another HTML comments exception fix --- src/libserver/html.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/libserver/html.c b/src/libserver/html.c index 3353db7b7..6840b9a1a 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -2690,10 +2690,33 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, case comment_tag: if (t != '-') { hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + state = tag_end; + } + else { + p++; + ebrace = 0; + /* + * https://www.w3.org/TR/2012/WD-html5-20120329/syntax.html#syntax-comments + * ... the text must not start with a single + * U+003E GREATER-THAN SIGN character (>), + * nor start with a "-" (U+002D) character followed by + * a U+003E GREATER-THAN SIGN (>) character, + * nor contain two consecutive U+002D HYPHEN-MINUS + * characters (--), nor end with a "-" (U+002D) character. + */ + if (p[0] == '-' && p + 1 < end && p[1] == '>') { + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + p ++; + state = tag_end; + } + else if (*p == '>') { + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + state = tag_end; + } + else { + state = comment_content; + } } - p ++; - ebrace = 0; - state = comment_content; break; case comment_content: -- 2.39.5