]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Fix parsing of the unquoted attributes in HTML
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Oct 2021 14:28:43 +0000 (15:28 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Oct 2021 14:28:43 +0000 (15:28 +0100)
src/libserver/html/html.cxx

index f8b3e96ea1039725968565dd8a4d8d6ae121a506..1e5d5224194f4b9a939a136bc334e23756a25334 100644 (file)
@@ -250,6 +250,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
                ignore_bad_tag,
                tag_end,
                slash_after_value,
+               slash_in_unqouted_value,
        } state;
 
        state = static_cast<enum tag_parser_state>(parser_env.cur_state);
@@ -513,8 +514,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
 
        case parse_value:
                if (*in == '/') {
-                       state = slash_after_value;
-                       store_component_value();
+                       state = slash_in_unqouted_value;
                }
                else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') {
                        store_component_value();
@@ -570,6 +570,20 @@ html_parse_tag_content(rspamd_mempool_t *pool,
                        state = parse_attr_name;
                }
                break;
+       case slash_in_unqouted_value:
+               if (*in == '>') {
+                       /* That slash was in fact closing tag slash, wohoo */
+                       tag->flags |= FL_CLOSED;
+                       state = tag_end;
+                       store_component_value();
+               }
+               else {
+                       /* Welcome to the world of html, revert state and save missing / */
+                       parser_env.buf.push_back('/');
+                       store_value_character(false);
+                       state = parse_value;
+               }
+               break;
        case ignore_bad_tag:
        case tag_end:
                break;