diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-03-02 16:52:39 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-03-02 16:52:39 +0000 |
commit | b82366d9eff3791c986c5d04d107d0fb38a65c3c (patch) | |
tree | a9b114b1dd6c9fbf61021f0fa6929577ad9eda68 /src/libserver | |
parent | d32aceb3b3fefc24c13a821c8eca0c4923cdbb48 (diff) | |
download | rspamd-b82366d9eff3791c986c5d04d107d0fb38a65c3c.tar.gz rspamd-b82366d9eff3791c986c5d04d107d0fb38a65c3c.zip |
[Fix] Another brain damage html standard adoptions
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/html.c | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index 78c69406c..b7e78e57b 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1147,9 +1147,35 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, state = parse_equal; } else if (!g_ascii_isspace (*in)) { - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - tag->flags |= FL_BROKEN; - state = ignore_bad_tag; + /* + * HTML defines that crap could still be restored and + * calculated somehow... So we have to follow this stupid behaviour + */ + /* + * TODO: estimate what insane things do email clients in each case + */ + if (*in == '>') { + /* + * Attribtute name followed by end of tag + * Should be okay (empty attribute). The rest is handled outside + * this automata. + */ + + } + else if (*in == '"' || *in == '\'') { + /* Attribute followed by quote... Missing '=' ? Dunno, need to test */ + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + tag->flags |= FL_BROKEN; + state = ignore_bad_tag; + } + else { + /* + * Just start another attribute ignoring an empty attributes for + * now. We don't use them in fact... + */ + state = parse_attr_name; + *savep = in; + } } break; |