aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-25 13:21:51 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-25 13:21:51 +0100
commita03b2c328b0c9cb332527adb584b6e3496de5e46 (patch)
treea46ff1eabb22c94e76324805be0d38d6ae8efa72
parentd21fdd376f883510ae2bf5de7307eb59d84cb614 (diff)
downloadrspamd-a03b2c328b0c9cb332527adb584b6e3496de5e46.tar.gz
rspamd-a03b2c328b0c9cb332527adb584b6e3496de5e46.zip
[Fix] Deal with 8bit characters in email addresses
-rw-r--r--src/libmime/email_addr.c29
-rw-r--r--src/libmime/email_addr.h1
-rw-r--r--src/libmime/mime_encoding.c34
-rw-r--r--src/libutil/str_util.h31
-rw-r--r--src/libutil/util.h2
-rw-r--r--src/ragel/smtp_addr_parser.rl4
6 files changed, 64 insertions, 37 deletions
diff --git a/src/libmime/email_addr.c b/src/libmime/email_addr.c
index c857794c8..59e99f531 100644
--- a/src/libmime/email_addr.c
+++ b/src/libmime/email_addr.c
@@ -169,7 +169,8 @@ static gboolean
rspamd_email_address_parse_heuristic (const char *data, size_t len,
struct rspamd_email_address *addr)
{
- const gchar *p = data;
+ const gchar *p = data, *at = NULL, *end = data + len;
+ gboolean ret = FALSE;
memset (addr, 0, sizeof (*addr));
@@ -179,19 +180,36 @@ rspamd_email_address_parse_heuristic (const char *data, size_t len,
addr->addr = p + 1;
addr->raw = p;
addr->raw_len = len;
+ ret = TRUE;
- return TRUE;
+ p = p + 1;
+ len = addr->addr_len;
+ end = p + len;
}
else if (len > 0) {
addr->addr = p;
addr->addr_len = len;
addr->raw = p;
addr->raw_len = len;
+ ret = TRUE;
+ }
+
+ if (ret) {
+ at = memchr (p, '@', len);
+
+ if (at != NULL && at + 1 < end) {
+ addr->domain = at + 1;
+ addr->domain_len = end - (at + 1);
+ addr->user = p;
+ addr->user_len = at - p;
+ }
- return TRUE;
+ if (rspamd_str_has_8bit (p, len)) {
+ addr->flags |= RSPAMD_EMAIL_ADDR_HAS_8BIT;
+ }
}
- return FALSE;
+ return ret;
}
GPtrArray *
@@ -329,6 +347,9 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool,
state = skip_spaces;
next_state = parse_name;
}
+ else if (*p == '@') {
+ seen_at = TRUE;
+ }
p ++;
break;
case skip_spaces:
diff --git a/src/libmime/email_addr.h b/src/libmime/email_addr.h
index f6a16d682..b4f192ee7 100644
--- a/src/libmime/email_addr.h
+++ b/src/libmime/email_addr.h
@@ -32,6 +32,7 @@ enum rspamd_email_address_flags {
RSPAMD_EMAIL_ADDR_HAS_BACKSLASH = (1 << 6),
RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED = (1 << 7),
RSPAMD_EMAIL_ADDR_USER_ALLOCATED = (1 << 8),
+ RSPAMD_EMAIL_ADDR_HAS_8BIT = (1 << 9),
};
/*
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index 11f764e11..97eede726 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -455,36 +455,6 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
return FALSE;
}
-/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */
-#define hasmore(x,n) (((x)+~0UL/255*(127-(n))|(x))&~0UL/255*128)
-
-static inline gboolean
-rspamd_mime_has_8bit (const guchar *beg, gsize len)
-{
- unsigned long *w;
- gsize i, leftover = len % sizeof (*w);
-
- w = (unsigned long *)beg;
-
- for (i = 0; i < len / sizeof (*w); i ++) {
- if (hasmore (*w, 127)) {
- return TRUE;
- }
-
- w ++;
- }
-
- beg = (const guchar *)w;
-
- for (i = 0; i < leftover; i ++) {
- if (beg[i] > 127) {
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
GByteArray *
rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
struct rspamd_mime_text_part *text_part)
@@ -498,7 +468,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
rspamd_ftok_t charset_tok;
struct rspamd_mime_part *part = text_part->mime_part;
- if (rspamd_mime_has_8bit (text_part->raw.begin, text_part->raw.len)) {
+ if (rspamd_str_has_8bit (text_part->raw.begin, text_part->raw.len)) {
text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT;
}
@@ -508,7 +478,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
memcpy (part_content->data, text_part->parsed.begin, text_part->parsed.len);
part_content->len = text_part->parsed.len;
- if (rspamd_mime_has_8bit (text_part->parsed.begin, text_part->parsed.len)) {
+ if (rspamd_str_has_8bit (text_part->parsed.begin, text_part->parsed.len)) {
text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED;
}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 2fec42987..473b5cbbb 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -339,4 +339,35 @@ const void *rspamd_memrchr (const void *m, gint c, gsize len);
*/
gsize rspamd_memcspn (const gchar *s, const gchar *e, gsize len);
+
+/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */
+#define rspamd_str_hasmore(x,n) ((((x)+~0UL/255*(127-(n)))|(x))&~0UL/255*128)
+
+static inline gboolean
+rspamd_str_has_8bit (const guchar *beg, gsize len)
+{
+ unsigned long *w;
+ gsize i, leftover = len % sizeof (*w);
+
+ w = (unsigned long *)beg;
+
+ for (i = 0; i < len / sizeof (*w); i ++) {
+ if (rspamd_str_hasmore (*w, 127)) {
+ return TRUE;
+ }
+
+ w ++;
+ }
+
+ beg = (const guchar *)w;
+
+ for (i = 0; i < leftover; i ++) {
+ if (beg[i] > 127) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
#endif /* SRC_LIBUTIL_STR_UTIL_H_ */
diff --git a/src/libutil/util.h b/src/libutil/util.h
index 48381ed92..605822fee 100644
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -517,4 +517,6 @@ gdouble rspamd_normalize_probability (gdouble x, gdouble bias);
guint64 rspamd_tm_to_time (const struct tm *tm, glong tz);
#define PTR_ARRAY_FOREACH(ar, i, cur) for ((i) = 0; (ar) != NULL && (i) < (ar)->len && (((cur) = g_ptr_array_index((ar), (i))) || 1); ++(i))
+
+
#endif
diff --git a/src/ragel/smtp_addr_parser.rl b/src/ragel/smtp_addr_parser.rl
index 7e8498966..501ee82f4 100644
--- a/src/ragel/smtp_addr_parser.rl
+++ b/src/ragel/smtp_addr_parser.rl
@@ -54,7 +54,9 @@
}
action Valid_addr {
- addr->flags |= RSPAMD_EMAIL_ADDR_VALID;
+ if (addr->addr_len > 0) {
+ addr->flags |= RSPAMD_EMAIL_ADDR_VALID;
+ }
}
action Addr_has_angle {