]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] HTML: Fix entities in HTML attributes
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 24 Jan 2019 14:09:07 +0000 (14:09 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 24 Jan 2019 14:09:07 +0000 (14:09 +0000)
src/libserver/html.c
src/libserver/html.h

index ee276d813fb55ff1467f35166c4d422f720ddfd9..63d9137624bb9ec12131e6d0cc15ef4b6e3c4898 100644 (file)
@@ -337,9 +337,9 @@ rspamd_html_tag_by_id (gint id)
 
 /* Decode HTML entitles in text */
 guint
-rspamd_html_decode_entitles_inplace (gchar *s, guint len)
+rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 {
-       guint l, rep_len;
+       goffset l, rep_len;
        gchar *t = s, *h = s, *e = s, *end_ptr;
        const gchar *end;
        const gchar *entity;
@@ -355,7 +355,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
 
        end = s + l;
 
-       while (h - s < (gint)l) {
+       while (h - s < l) {
                switch (state) {
                /* Out of entity */
                case 0:
@@ -448,7 +448,11 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
                                                                t += g_unichar_to_utf8 (val, t);
                                                        }
                                                        else {
-                                                               /* Remove unknown entities */
+                                                               /* Leave unknown entities as is */
+                                                               if (end - t >= h - e) {
+                                                                       memmove (t, e, h - e);
+                                                                       t += h - e;
+                                                               }
                                                        }
                                                }
                                        }
@@ -463,6 +467,15 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
                }
        }
 
+       /* Leftover */
+       if (state == 1 && h > e) {
+               /* Unfinished entity, copy as is */
+               if (end - t >= h - e) {
+                       memmove (t, e, h - e);
+                       t += h - e;
+               }
+       }
+
        return (t - s);
 }
 
@@ -898,7 +911,7 @@ rspamd_html_parse_tag_component (rspamd_mempool_t *pool,
        return ret;
 }
 
-static void
+static inline void
 rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
                struct html_content *hc, struct html_tag *tag, const guchar *in,
                gint *statep, guchar const **savep)
@@ -1151,12 +1164,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
 
                if (store) {
                        if (*savep != NULL) {
+                               gchar *s;
+
                                g_assert (tag->params != NULL);
                                comp = g_queue_peek_tail (tag->params);
                                g_assert (comp != NULL);
                                comp->len = in - *savep;
-                               comp->start = *savep;
-                               /* We cannot use entities inside tag values ! */
+                               s = rspamd_mempool_alloc (pool, comp->len);
+                               memcpy (s, *savep, comp->len);
+                               comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+                               comp->start = s;
                                *savep = NULL;
                        }
                }
@@ -1169,11 +1186,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
                }
                if (store) {
                        if (*savep != NULL) {
+                               gchar *s;
+
                                g_assert (tag->params != NULL);
                                comp = g_queue_peek_tail (tag->params);
                                g_assert (comp != NULL);
                                comp->len = in - *savep;
-                               comp->start = *savep;
+                               s = rspamd_mempool_alloc (pool, comp->len);
+                               memcpy (s, *savep, comp->len);
+                               comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+                               comp->start = s;
                                *savep = NULL;
                        }
                }
@@ -1191,11 +1213,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
 
                if (store) {
                        if (*savep != NULL) {
+                               gchar *s;
+
                                g_assert (tag->params != NULL);
                                comp = g_queue_peek_tail (tag->params);
                                g_assert (comp != NULL);
                                comp->len = in - *savep;
-                               comp->start = *savep;
+                               s = rspamd_mempool_alloc (pool, comp->len);
+                               memcpy (s, *savep, comp->len);
+                               comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+                               comp->start = s;
                                *savep = NULL;
                        }
                }
index a2f3a0b1d52be8fd0d24ab3538702b9b187228e9..f816567bdc18f18cb1cb0467d1ea5dc2eee756b1 100644 (file)
@@ -127,7 +127,7 @@ struct html_content {
 /*
  * Decode HTML entitles in text. Text is modified in place.
  */
-guint rspamd_html_decode_entitles_inplace (gchar *s, guint len);
+guint rspamd_html_decode_entitles_inplace (gchar *s, gsize len);
 
 GByteArray* rspamd_html_process_part (rspamd_mempool_t *pool,
                struct html_content *hc,