From a03b2c328b0c9cb332527adb584b6e3496de5e46 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 25 Apr 2017 13:21:51 +0100 Subject: [PATCH] [Fix] Deal with 8bit characters in email addresses --- src/libmime/email_addr.c | 29 +++++++++++++++++++++++++---- src/libmime/email_addr.h | 1 + src/libmime/mime_encoding.c | 34 ++-------------------------------- src/libutil/str_util.h | 31 +++++++++++++++++++++++++++++++ src/libutil/util.h | 2 ++ src/ragel/smtp_addr_parser.rl | 4 +++- 6 files changed, 64 insertions(+), 37 deletions(-) diff --git a/src/libmime/email_addr.c b/src/libmime/email_addr.c index c857794c8..59e99f531 100644 --- a/src/libmime/email_addr.c +++ b/src/libmime/email_addr.c @@ -169,7 +169,8 @@ static gboolean rspamd_email_address_parse_heuristic (const char *data, size_t len, struct rspamd_email_address *addr) { - const gchar *p = data; + const gchar *p = data, *at = NULL, *end = data + len; + gboolean ret = FALSE; memset (addr, 0, sizeof (*addr)); @@ -179,19 +180,36 @@ rspamd_email_address_parse_heuristic (const char *data, size_t len, addr->addr = p + 1; addr->raw = p; addr->raw_len = len; + ret = TRUE; - return TRUE; + p = p + 1; + len = addr->addr_len; + end = p + len; } else if (len > 0) { addr->addr = p; addr->addr_len = len; addr->raw = p; addr->raw_len = len; + ret = TRUE; + } + + if (ret) { + at = memchr (p, '@', len); + + if (at != NULL && at + 1 < end) { + addr->domain = at + 1; + addr->domain_len = end - (at + 1); + addr->user = p; + addr->user_len = at - p; + } - return TRUE; + if (rspamd_str_has_8bit (p, len)) { + addr->flags |= RSPAMD_EMAIL_ADDR_HAS_8BIT; + } } - return FALSE; + return ret; } GPtrArray * @@ -329,6 +347,9 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, state = skip_spaces; next_state = parse_name; } + else if (*p == '@') { + seen_at = TRUE; + } p ++; break; case skip_spaces: diff --git a/src/libmime/email_addr.h b/src/libmime/email_addr.h index f6a16d682..b4f192ee7 100644 --- a/src/libmime/email_addr.h +++ b/src/libmime/email_addr.h @@ -32,6 +32,7 @@ enum rspamd_email_address_flags { RSPAMD_EMAIL_ADDR_HAS_BACKSLASH = (1 << 6), RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED = (1 << 7), RSPAMD_EMAIL_ADDR_USER_ALLOCATED = (1 << 8), + RSPAMD_EMAIL_ADDR_HAS_8BIT = (1 << 9), }; /* diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 11f764e11..97eede726 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -455,36 +455,6 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, return FALSE; } -/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */ -#define hasmore(x,n) (((x)+~0UL/255*(127-(n))|(x))&~0UL/255*128) - -static inline gboolean -rspamd_mime_has_8bit (const guchar *beg, gsize len) -{ - unsigned long *w; - gsize i, leftover = len % sizeof (*w); - - w = (unsigned long *)beg; - - for (i = 0; i < len / sizeof (*w); i ++) { - if (hasmore (*w, 127)) { - return TRUE; - } - - w ++; - } - - beg = (const guchar *)w; - - for (i = 0; i < leftover; i ++) { - if (beg[i] > 127) { - return TRUE; - } - } - - return FALSE; -} - GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, struct rspamd_mime_text_part *text_part) @@ -498,7 +468,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, rspamd_ftok_t charset_tok; struct rspamd_mime_part *part = text_part->mime_part; - if (rspamd_mime_has_8bit (text_part->raw.begin, text_part->raw.len)) { + if (rspamd_str_has_8bit (text_part->raw.begin, text_part->raw.len)) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT; } @@ -508,7 +478,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, memcpy (part_content->data, text_part->parsed.begin, text_part->parsed.len); part_content->len = text_part->parsed.len; - if (rspamd_mime_has_8bit (text_part->parsed.begin, text_part->parsed.len)) { + if (rspamd_str_has_8bit (text_part->parsed.begin, text_part->parsed.len)) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED; } diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 2fec42987..473b5cbbb 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -339,4 +339,35 @@ const void *rspamd_memrchr (const void *m, gint c, gsize len); */ gsize rspamd_memcspn (const gchar *s, const gchar *e, gsize len); + +/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */ +#define rspamd_str_hasmore(x,n) ((((x)+~0UL/255*(127-(n)))|(x))&~0UL/255*128) + +static inline gboolean +rspamd_str_has_8bit (const guchar *beg, gsize len) +{ + unsigned long *w; + gsize i, leftover = len % sizeof (*w); + + w = (unsigned long *)beg; + + for (i = 0; i < len / sizeof (*w); i ++) { + if (rspamd_str_hasmore (*w, 127)) { + return TRUE; + } + + w ++; + } + + beg = (const guchar *)w; + + for (i = 0; i < leftover; i ++) { + if (beg[i] > 127) { + return TRUE; + } + } + + return FALSE; +} + #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ diff --git a/src/libutil/util.h b/src/libutil/util.h index 48381ed92..605822fee 100644 --- a/src/libutil/util.h +++ b/src/libutil/util.h @@ -517,4 +517,6 @@ gdouble rspamd_normalize_probability (gdouble x, gdouble bias); guint64 rspamd_tm_to_time (const struct tm *tm, glong tz); #define PTR_ARRAY_FOREACH(ar, i, cur) for ((i) = 0; (ar) != NULL && (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i)) + + #endif diff --git a/src/ragel/smtp_addr_parser.rl b/src/ragel/smtp_addr_parser.rl index 7e8498966..501ee82f4 100644 --- a/src/ragel/smtp_addr_parser.rl +++ b/src/ragel/smtp_addr_parser.rl @@ -54,7 +54,9 @@ } action Valid_addr { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; + if (addr->addr_len > 0) { + addr->flags |= RSPAMD_EMAIL_ADDR_VALID; + } } action Addr_has_angle { -- 2.39.5