diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-11-19 14:19:50 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-11-19 14:19:50 +0000 |
commit | 51e8a87ce9f7db083b5f3ac74067b08fe9ea5385 (patch) | |
tree | 1581f1a376a2f88e106cf2833e64e7694a53d1fd /src/libmime | |
parent | 87d9d615fe6954850c56aa3a84dfa19087fb1898 (diff) | |
download | rspamd-51e8a87ce9f7db083b5f3ac74067b08fe9ea5385.tar.gz rspamd-51e8a87ce9f7db083b5f3ac74067b08fe9ea5385.zip |
Decode entitles when normalizing HTML parts.
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/message.c | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 382567f77..164a1bd9b 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -42,12 +42,13 @@ strip_html_tags (struct rspamd_task *task, GByteArray * src, gint *stateptr) { - uint8_t *p, *rp, *tbegin = NULL, *end, c, lc; + uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart; gint br, i = 0, depth = 0, in_q = 0; gint state = 0; + guint dlen; GByteArray *buf; GNode *level_ptr = NULL; - gboolean erase = FALSE; + gboolean erase = FALSE, html_decode = FALSE; if (stateptr) state = *stateptr; @@ -204,6 +205,25 @@ unbreak_tag: } break; + case '&': + /* Decode entitle */ + html_decode = TRUE; + estart = rp; + goto reg_char; + break; + + case ';': + if (html_decode) { + html_decode = FALSE; + *rp = ';'; + if (rp - estart > 0) { + dlen = rp - estart + 1; + decode_entitles (estart, &dlen); + rp = estart + dlen; + } + } + break; + case '?': if (state == 1 && *(p - 1) == '<') { @@ -211,7 +231,6 @@ unbreak_tag: state = 2; break; } - case 'E': case 'e': /* !DOCTYPE exception */ @@ -226,7 +245,6 @@ unbreak_tag: break; } /* fall-through */ - case 'l': /* swm: If we encounter '<?xml' then we shouldn't be in |