From 4c6234a1a07c3fd777551c6789ad0b44523da210 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 8 Sep 2021 14:45:33 +0100 Subject: [PATCH] [Fix] Fix some complicated case with the closing tags parsing --- src/libserver/html/html.cxx | 46 ++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 97009749f..b9729a71e 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1752,9 +1752,49 @@ html_process_input(rspamd_mempool_t *pool, break; case tag_raw_text_less_than: if (t == '/') { - /* Shift back */ - p = c; - state = tag_begin; + /* Here are special things: we look for obrace and then ensure + * that if there is any closing brace nearby + * (we look maximum at 30 characters). We also need to ensure + * that we have no special characters, such as punctuation marks and + * so on. + * Basically, we validate the input to be sane. + * Since closing tags must not have attributes, these assumptions + * seems to be reasonable enough for our toy parser. + */ + gint cur_lookahead = 1; + gint max_lookahead = MIN (end - p, 30); + bool valid_closing_tag = true; + + if (p + 1 < end && !g_ascii_isalpha (p[1])) { + valid_closing_tag = false; + } + else { + while (cur_lookahead < max_lookahead) { + gchar tt = p[cur_lookahead]; + if (tt == '>') { + break; + } + else if (tt < '\n' || tt == ',') { + valid_closing_tag = false; + break; + } + cur_lookahead ++; + } + + if (cur_lookahead == max_lookahead) { + valid_closing_tag = false; + } + } + + if (valid_closing_tag) { + /* Shift back */ + p = c; + state = tag_begin; + } + else { + p ++; + state = tag_raw_text; + } } else { p ++; -- 2.39.5