]> source.dussan.org Git - rspamd.git/commitdiff
Decode entitles when normalizing HTML parts.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 19 Nov 2014 14:19:50 +0000 (14:19 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 19 Nov 2014 14:19:50 +0000 (14:19 +0000)
src/libmime/message.c

index 382567f774b99fc16b9e6195610666f920ff7a23..164a1bd9bd4e45ff8bafe6fca440883fbd2d4197 100644 (file)
@@ -42,12 +42,13 @@ strip_html_tags (struct rspamd_task *task,
        GByteArray * src,
        gint *stateptr)
 {
-       uint8_t *p, *rp, *tbegin = NULL, *end, c, lc;
+       uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart;
        gint br, i = 0, depth = 0, in_q = 0;
        gint state = 0;
+       guint dlen;
        GByteArray *buf;
        GNode *level_ptr = NULL;
-       gboolean erase = FALSE;
+       gboolean erase = FALSE, html_decode = FALSE;
 
        if (stateptr)
                state = *stateptr;
@@ -204,6 +205,25 @@ unbreak_tag:
                        }
                        break;
 
+               case '&':
+                       /* Decode entitle */
+                       html_decode = TRUE;
+                       estart = rp;
+                       goto reg_char;
+                       break;
+
+               case ';':
+                       if (html_decode) {
+                               html_decode = FALSE;
+                               *rp = ';';
+                               if (rp - estart > 0) {
+                                       dlen = rp - estart + 1;
+                                       decode_entitles (estart, &dlen);
+                                       rp = estart + dlen;
+                               }
+                       }
+                       break;
+
                case '?':
 
                        if (state == 1 && *(p - 1) == '<') {
@@ -211,7 +231,6 @@ unbreak_tag:
                                state = 2;
                                break;
                        }
-
                case 'E':
                case 'e':
                        /* !DOCTYPE exception */
@@ -226,7 +245,6 @@ unbreak_tag:
                                break;
                        }
                /* fall-through */
-
                case 'l':
 
                        /* swm: If we encounter '<?xml' then we shouldn't be in