From 4f254839f829ec18b2ec144a6de6777b1f5688f7 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 5 Oct 2021 15:28:43 +0100 Subject: [PATCH] [Fix] Fix parsing of the unquoted attributes in HTML --- src/libserver/html/html.cxx | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index f8b3e96ea..1e5d52241 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -250,6 +250,7 @@ html_parse_tag_content(rspamd_mempool_t *pool, ignore_bad_tag, tag_end, slash_after_value, + slash_in_unqouted_value, } state; state = static_cast(parser_env.cur_state); @@ -513,8 +514,7 @@ html_parse_tag_content(rspamd_mempool_t *pool, case parse_value: if (*in == '/') { - state = slash_after_value; - store_component_value(); + state = slash_in_unqouted_value; } else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') { store_component_value(); @@ -570,6 +570,20 @@ html_parse_tag_content(rspamd_mempool_t *pool, state = parse_attr_name; } break; + case slash_in_unqouted_value: + if (*in == '>') { + /* That slash was in fact closing tag slash, wohoo */ + tag->flags |= FL_CLOSED; + state = tag_end; + store_component_value(); + } + else { + /* Welcome to the world of html, revert state and save missing / */ + parser_env.buf.push_back('/'); + store_value_character(false); + state = parse_value; + } + break; case ignore_bad_tag: case tag_end: break; -- 2.39.5