]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Deal with 8bit characters in email addresses
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 25 Apr 2017 12:21:51 +0000 (13:21 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 1 May 2017 13:03:04 +0000 (14:03 +0100)
src/libmime/email_addr.c
src/libmime/email_addr.h
src/libmime/mime_encoding.c
src/libutil/str_util.h
src/libutil/util.h
src/ragel/smtp_addr_parser.rl

index c857794c8da5fb45a6d6efc4fecec46fca3030ab..59e99f531934369f4afd7009aeff1c10ecd6ba15 100644 (file)
@@ -169,7 +169,8 @@ static gboolean
 rspamd_email_address_parse_heuristic (const char *data, size_t len,
                struct rspamd_email_address *addr)
 {
-       const gchar *p = data;
+       const gchar *p = data, *at = NULL, *end = data + len;
+       gboolean ret = FALSE;
 
        memset (addr, 0, sizeof (*addr));
 
@@ -179,19 +180,36 @@ rspamd_email_address_parse_heuristic (const char *data, size_t len,
                addr->addr = p + 1;
                addr->raw = p;
                addr->raw_len = len;
+               ret = TRUE;
 
-               return TRUE;
+               p = p + 1;
+               len = addr->addr_len;
+               end = p + len;
        }
        else if (len > 0) {
                addr->addr = p;
                addr->addr_len = len;
                addr->raw = p;
                addr->raw_len = len;
+               ret = TRUE;
+       }
+
+       if (ret) {
+               at = memchr (p, '@', len);
+
+               if (at != NULL && at + 1 < end) {
+                       addr->domain = at + 1;
+                       addr->domain_len = end - (at + 1);
+                       addr->user = p;
+                       addr->user_len = at - p;
+               }
 
-               return TRUE;
+               if (rspamd_str_has_8bit (p, len)) {
+                       addr->flags |= RSPAMD_EMAIL_ADDR_HAS_8BIT;
+               }
        }
 
-       return FALSE;
+       return ret;
 }
 
 GPtrArray *
@@ -329,6 +347,9 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool,
                                state = skip_spaces;
                                next_state = parse_name;
                        }
+                       else if (*p == '@') {
+                               seen_at = TRUE;
+                       }
                        p ++;
                        break;
                case skip_spaces:
index f6a16d6823ccc94a33bebcb1abe6566643624576..b4f192ee7453d9cb9f9793fc2331d44fe84ccfa8 100644 (file)
@@ -32,6 +32,7 @@ enum rspamd_email_address_flags {
        RSPAMD_EMAIL_ADDR_HAS_BACKSLASH = (1 << 6),
        RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED = (1 << 7),
        RSPAMD_EMAIL_ADDR_USER_ALLOCATED = (1 << 8),
+       RSPAMD_EMAIL_ADDR_HAS_8BIT = (1 << 9),
 };
 
 /*
index 11f764e11771a9468bd9b8600b1358e5487a65a6..97eede7261cd812d6f446af570c37e96241b1ba5 100644 (file)
@@ -455,36 +455,6 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
        return FALSE;
 }
 
-/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */
-#define hasmore(x,n) (((x)+~0UL/255*(127-(n))|(x))&~0UL/255*128)
-
-static inline gboolean
-rspamd_mime_has_8bit (const guchar *beg, gsize len)
-{
-       unsigned long *w;
-       gsize i, leftover = len % sizeof (*w);
-
-       w = (unsigned long *)beg;
-
-       for (i = 0; i < len / sizeof (*w); i ++) {
-               if (hasmore (*w, 127)) {
-                       return TRUE;
-               }
-
-               w ++;
-       }
-
-       beg = (const guchar *)w;
-
-       for (i = 0; i < leftover; i ++) {
-               if (beg[i] > 127) {
-                       return TRUE;
-               }
-       }
-
-       return FALSE;
-}
-
 GByteArray *
 rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                struct rspamd_mime_text_part *text_part)
@@ -498,7 +468,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
        rspamd_ftok_t charset_tok;
        struct rspamd_mime_part *part = text_part->mime_part;
 
-       if (rspamd_mime_has_8bit (text_part->raw.begin, text_part->raw.len)) {
+       if (rspamd_str_has_8bit (text_part->raw.begin, text_part->raw.len)) {
                text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT;
        }
 
@@ -508,7 +478,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
        memcpy (part_content->data, text_part->parsed.begin, text_part->parsed.len);
        part_content->len = text_part->parsed.len;
 
-       if (rspamd_mime_has_8bit (text_part->parsed.begin, text_part->parsed.len)) {
+       if (rspamd_str_has_8bit (text_part->parsed.begin, text_part->parsed.len)) {
                text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED;
        }
 
index 2fec4298793ecf6460ae45b49002d081364c93c6..473b5cbbb2913dcac9732392f8a3aba2a5614012 100644 (file)
@@ -339,4 +339,35 @@ const void *rspamd_memrchr (const void *m, gint c, gsize len);
  */
 gsize rspamd_memcspn (const gchar *s, const gchar *e, gsize len);
 
+
+/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */
+#define rspamd_str_hasmore(x,n) ((((x)+~0UL/255*(127-(n)))|(x))&~0UL/255*128)
+
+static inline gboolean
+rspamd_str_has_8bit (const guchar *beg, gsize len)
+{
+       unsigned long *w;
+       gsize i, leftover = len % sizeof (*w);
+
+       w = (unsigned long *)beg;
+
+       for (i = 0; i < len / sizeof (*w); i ++) {
+               if (rspamd_str_hasmore (*w, 127)) {
+                       return TRUE;
+               }
+
+               w ++;
+       }
+
+       beg = (const guchar *)w;
+
+       for (i = 0; i < leftover; i ++) {
+               if (beg[i] > 127) {
+                       return TRUE;
+               }
+       }
+
+       return FALSE;
+}
+
 #endif /* SRC_LIBUTIL_STR_UTIL_H_ */
index 48381ed92db88005adffcc1085ea9d3202b1eb32..605822fee25c6b6480d2fde2a88160d705980eb5 100644 (file)
@@ -517,4 +517,6 @@ gdouble rspamd_normalize_probability (gdouble x, gdouble bias);
 guint64 rspamd_tm_to_time (const struct tm *tm, glong tz);
 
 #define PTR_ARRAY_FOREACH(ar, i, cur) for ((i) = 0; (ar) != NULL && (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i))
+
+
 #endif
index 7e8498966bcd6aceebb40571ba3afa637a5feea1..501ee82f49b46f7a2129ecb7e47c66a4fb2474bb 100644 (file)
@@ -54,7 +54,9 @@
   }
 
   action Valid_addr {
-    addr->flags |= RSPAMD_EMAIL_ADDR_VALID;
+    if (addr->addr_len > 0) {
+      addr->flags |= RSPAMD_EMAIL_ADDR_VALID;
+    }
   }
 
   action Addr_has_angle {