]> source.dussan.org Git - rspamd.git/commitdiff
[CritFix] Fix decoding of UTF HTML entitles
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 11 Jul 2016 14:20:17 +0000 (15:20 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 11 Jul 2016 14:20:17 +0000 (15:20 +0100)
src/libserver/html.c

index 33ee10d8306066bf0e218aab687520fd1ee152de..7fbe585233b55597619643136d0f717633108be5 100644 (file)
@@ -612,7 +612,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
                        }
                        break;
                case 1:
-                       if (*h == ';') {
+                       if (*h == ';' && h > e) {
                                /* Determine base */
                                /* First find in entities table */
 
@@ -621,14 +621,18 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
                                if (*(e + 1) != '#' &&
                                        (found =
                                        bsearch (&key, entities_defs, G_N_ELEMENTS (entities_defs),
-                                       sizeof (entity), entity_cmp)) != NULL) {
+                                                       sizeof (entity), entity_cmp)) != NULL) {
                                        if (found->replacement) {
                                                rep_len = strlen (found->replacement);
                                                memcpy (t, found->replacement, rep_len);
                                                t += rep_len;
                                        }
+                                       else {
+                                               memcpy (t, e, h - e);
+                                               t += h - e;
+                                       }
                                }
-                               else {
+                               else if (e + 2 < h) {
                                        if (*(e + 2) == 'x' || *(e + 2) == 'X') {
                                                base = 16;
                                        }
@@ -646,7 +650,8 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
                                        }
                                        if (end_ptr != NULL && *end_ptr != '\0') {
                                                /* Skip undecoded */
-                                               t = h;
+                                               memcpy (t, e, h - e);
+                                               t += h - e;
                                        }
                                        else {
                                                /* Search for a replacement */
@@ -662,12 +667,24 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
                                                                t += rep_len;
                                                        }
                                                }
+                                               else {
+                                                       /* Unicode point */
+                                                       if (g_unichar_isgraph (val)) {
+                                                               t += g_unichar_to_utf8 (val, t);
+                                                       }
+                                                       else {
+                                                               memcpy (t, e, h - e);
+                                                               t += h - e;
+                                                       }
+                                               }
                                        }
                                }
+
                                *h = ';';
                                state = 0;
                        }
                        h++;
+
                        break;
                }
        }