aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-11-19 14:19:50 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-11-19 14:19:50 +0000
commit51e8a87ce9f7db083b5f3ac74067b08fe9ea5385 (patch)
tree1581f1a376a2f88e106cf2833e64e7694a53d1fd
parent87d9d615fe6954850c56aa3a84dfa19087fb1898 (diff)
downloadrspamd-51e8a87ce9f7db083b5f3ac74067b08fe9ea5385.tar.gz
rspamd-51e8a87ce9f7db083b5f3ac74067b08fe9ea5385.zip
Decode entitles when normalizing HTML parts.
-rw-r--r--src/libmime/message.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 382567f77..164a1bd9b 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -42,12 +42,13 @@ strip_html_tags (struct rspamd_task *task,
GByteArray * src,
gint *stateptr)
{
- uint8_t *p, *rp, *tbegin = NULL, *end, c, lc;
+ uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart;
gint br, i = 0, depth = 0, in_q = 0;
gint state = 0;
+ guint dlen;
GByteArray *buf;
GNode *level_ptr = NULL;
- gboolean erase = FALSE;
+ gboolean erase = FALSE, html_decode = FALSE;
if (stateptr)
state = *stateptr;
@@ -204,6 +205,25 @@ unbreak_tag:
}
break;
+ case '&':
+ /* Decode entitle */
+ html_decode = TRUE;
+ estart = rp;
+ goto reg_char;
+ break;
+
+ case ';':
+ if (html_decode) {
+ html_decode = FALSE;
+ *rp = ';';
+ if (rp - estart > 0) {
+ dlen = rp - estart + 1;
+ decode_entitles (estart, &dlen);
+ rp = estart + dlen;
+ }
+ }
+ break;
+
case '?':
if (state == 1 && *(p - 1) == '<') {
@@ -211,7 +231,6 @@ unbreak_tag:
state = 2;
break;
}
-
case 'E':
case 'e':
/* !DOCTYPE exception */
@@ -226,7 +245,6 @@ unbreak_tag:
break;
}
/* fall-through */
-
case 'l':
/* swm: If we encounter '<?xml' then we shouldn't be in