diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-01-24 14:09:07 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-01-24 14:09:07 +0000 |
commit | b1dca3c8bdba396ed5dc692cbcd22186f7dcc525 (patch) | |
tree | 769af0dba00677777042c54e6a36e9564ec67925 | |
parent | b29c55a1945c81036a02c62d0329a63877ac73b4 (diff) | |
download | rspamd-b1dca3c8bdba396ed5dc692cbcd22186f7dcc525.tar.gz rspamd-b1dca3c8bdba396ed5dc692cbcd22186f7dcc525.zip |
[Fix] HTML: Fix entities in HTML attributes
-rw-r--r-- | src/libserver/html.c | 45 | ||||
-rw-r--r-- | src/libserver/html.h | 2 |
2 files changed, 37 insertions, 10 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index ee276d813..63d913762 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -337,9 +337,9 @@ rspamd_html_tag_by_id (gint id) /* Decode HTML entitles in text */ guint -rspamd_html_decode_entitles_inplace (gchar *s, guint len) +rspamd_html_decode_entitles_inplace (gchar *s, gsize len) { - guint l, rep_len; + goffset l, rep_len; gchar *t = s, *h = s, *e = s, *end_ptr; const gchar *end; const gchar *entity; @@ -355,7 +355,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len) end = s + l; - while (h - s < (gint)l) { + while (h - s < l) { switch (state) { /* Out of entity */ case 0: @@ -448,7 +448,11 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len) t += g_unichar_to_utf8 (val, t); } else { - /* Remove unknown entities */ + /* Leave unknown entities as is */ + if (end - t >= h - e) { + memmove (t, e, h - e); + t += h - e; + } } } } @@ -463,6 +467,15 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len) } } + /* Leftover */ + if (state == 1 && h > e) { + /* Unfinished entity, copy as is */ + if (end - t >= h - e) { + memmove (t, e, h - e); + t += h - e; + } + } + return (t - s); } @@ -898,7 +911,7 @@ rspamd_html_parse_tag_component (rspamd_mempool_t *pool, return ret; } -static void +static inline void rspamd_html_parse_tag_content (rspamd_mempool_t *pool, struct html_content *hc, struct html_tag *tag, const guchar *in, gint *statep, guchar const **savep) @@ -1151,12 +1164,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, if (store) { if (*savep != NULL) { + gchar *s; + g_assert (tag->params != NULL); comp = g_queue_peek_tail (tag->params); g_assert (comp != NULL); comp->len = in - *savep; - comp->start = *savep; - /* We cannot use entities inside tag values ! */ + s = rspamd_mempool_alloc (pool, comp->len); + memcpy (s, *savep, comp->len); + comp->len = rspamd_html_decode_entitles_inplace (s, comp->len); + comp->start = s; *savep = NULL; } } @@ -1169,11 +1186,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, } if (store) { if (*savep != NULL) { + gchar *s; + g_assert (tag->params != NULL); comp = g_queue_peek_tail (tag->params); g_assert (comp != NULL); comp->len = in - *savep; - comp->start = *savep; + s = rspamd_mempool_alloc (pool, comp->len); + memcpy (s, *savep, comp->len); + comp->len = rspamd_html_decode_entitles_inplace (s, comp->len); + comp->start = s; *savep = NULL; } } @@ -1191,11 +1213,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, if (store) { if (*savep != NULL) { + gchar *s; + g_assert (tag->params != NULL); comp = g_queue_peek_tail (tag->params); g_assert (comp != NULL); comp->len = in - *savep; - comp->start = *savep; + s = rspamd_mempool_alloc (pool, comp->len); + memcpy (s, *savep, comp->len); + comp->len = rspamd_html_decode_entitles_inplace (s, comp->len); + comp->start = s; *savep = NULL; } } diff --git a/src/libserver/html.h b/src/libserver/html.h index a2f3a0b1d..f816567bd 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -127,7 +127,7 @@ struct html_content { /* * Decode HTML entitles in text. Text is modified in place. */ -guint rspamd_html_decode_entitles_inplace (gchar *s, guint len); +guint rspamd_html_decode_entitles_inplace (gchar *s, gsize len); GByteArray* rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, |