aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-01-24 14:09:07 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-01-24 14:09:07 +0000
commitb1dca3c8bdba396ed5dc692cbcd22186f7dcc525 (patch)
tree769af0dba00677777042c54e6a36e9564ec67925
parentb29c55a1945c81036a02c62d0329a63877ac73b4 (diff)
downloadrspamd-b1dca3c8bdba396ed5dc692cbcd22186f7dcc525.tar.gz
rspamd-b1dca3c8bdba396ed5dc692cbcd22186f7dcc525.zip
[Fix] HTML: Fix entities in HTML attributes
-rw-r--r--src/libserver/html.c45
-rw-r--r--src/libserver/html.h2
2 files changed, 37 insertions, 10 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index ee276d813..63d913762 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -337,9 +337,9 @@ rspamd_html_tag_by_id (gint id)
/* Decode HTML entitles in text */
guint
-rspamd_html_decode_entitles_inplace (gchar *s, guint len)
+rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
{
- guint l, rep_len;
+ goffset l, rep_len;
gchar *t = s, *h = s, *e = s, *end_ptr;
const gchar *end;
const gchar *entity;
@@ -355,7 +355,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
end = s + l;
- while (h - s < (gint)l) {
+ while (h - s < l) {
switch (state) {
/* Out of entity */
case 0:
@@ -448,7 +448,11 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
t += g_unichar_to_utf8 (val, t);
}
else {
- /* Remove unknown entities */
+ /* Leave unknown entities as is */
+ if (end - t >= h - e) {
+ memmove (t, e, h - e);
+ t += h - e;
+ }
}
}
}
@@ -463,6 +467,15 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
}
}
+ /* Leftover */
+ if (state == 1 && h > e) {
+ /* Unfinished entity, copy as is */
+ if (end - t >= h - e) {
+ memmove (t, e, h - e);
+ t += h - e;
+ }
+ }
+
return (t - s);
}
@@ -898,7 +911,7 @@ rspamd_html_parse_tag_component (rspamd_mempool_t *pool,
return ret;
}
-static void
+static inline void
rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
struct html_content *hc, struct html_tag *tag, const guchar *in,
gint *statep, guchar const **savep)
@@ -1151,12 +1164,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
if (store) {
if (*savep != NULL) {
+ gchar *s;
+
g_assert (tag->params != NULL);
comp = g_queue_peek_tail (tag->params);
g_assert (comp != NULL);
comp->len = in - *savep;
- comp->start = *savep;
- /* We cannot use entities inside tag values ! */
+ s = rspamd_mempool_alloc (pool, comp->len);
+ memcpy (s, *savep, comp->len);
+ comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+ comp->start = s;
*savep = NULL;
}
}
@@ -1169,11 +1186,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
}
if (store) {
if (*savep != NULL) {
+ gchar *s;
+
g_assert (tag->params != NULL);
comp = g_queue_peek_tail (tag->params);
g_assert (comp != NULL);
comp->len = in - *savep;
- comp->start = *savep;
+ s = rspamd_mempool_alloc (pool, comp->len);
+ memcpy (s, *savep, comp->len);
+ comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+ comp->start = s;
*savep = NULL;
}
}
@@ -1191,11 +1213,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
if (store) {
if (*savep != NULL) {
+ gchar *s;
+
g_assert (tag->params != NULL);
comp = g_queue_peek_tail (tag->params);
g_assert (comp != NULL);
comp->len = in - *savep;
- comp->start = *savep;
+ s = rspamd_mempool_alloc (pool, comp->len);
+ memcpy (s, *savep, comp->len);
+ comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+ comp->start = s;
*savep = NULL;
}
}
diff --git a/src/libserver/html.h b/src/libserver/html.h
index a2f3a0b1d..f816567bd 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -127,7 +127,7 @@ struct html_content {
/*
* Decode HTML entitles in text. Text is modified in place.
*/
-guint rspamd_html_decode_entitles_inplace (gchar *s, guint len);
+guint rspamd_html_decode_entitles_inplace (gchar *s, gsize len);
GByteArray* rspamd_html_process_part (rspamd_mempool_t *pool,
struct html_content *hc,