summaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-03-02 16:52:39 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-03-02 16:52:39 +0000
commitb82366d9eff3791c986c5d04d107d0fb38a65c3c (patch)
treea9b114b1dd6c9fbf61021f0fa6929577ad9eda68 /src/libserver
parentd32aceb3b3fefc24c13a821c8eca0c4923cdbb48 (diff)
downloadrspamd-b82366d9eff3791c986c5d04d107d0fb38a65c3c.tar.gz
rspamd-b82366d9eff3791c986c5d04d107d0fb38a65c3c.zip
[Fix] Another brain damage html standard adoptions
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/html.c32
1 files changed, 29 insertions, 3 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 78c69406c..b7e78e57b 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1147,9 +1147,35 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
state = parse_equal;
}
else if (!g_ascii_isspace (*in)) {
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- tag->flags |= FL_BROKEN;
- state = ignore_bad_tag;
+ /*
+ * HTML defines that crap could still be restored and
+ * calculated somehow... So we have to follow this stupid behaviour
+ */
+ /*
+ * TODO: estimate what insane things do email clients in each case
+ */
+ if (*in == '>') {
+ /*
+ * Attribtute name followed by end of tag
+ * Should be okay (empty attribute). The rest is handled outside
+ * this automata.
+ */
+
+ }
+ else if (*in == '"' || *in == '\'') {
+ /* Attribute followed by quote... Missing '=' ? Dunno, need to test */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ tag->flags |= FL_BROKEN;
+ state = ignore_bad_tag;
+ }
+ else {
+ /*
+ * Just start another attribute ignoring an empty attributes for
+ * now. We don't use them in fact...
+ */
+ state = parse_attr_name;
+ *savep = in;
+ }
}
break;