/*- * Copyright 2016 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "config.h" #include "email_addr.h" #include "message.h" #include "printf.h" #include "smtp_parsers.h" static void rspamd_email_address_unescape(struct rspamd_email_address *addr) { const char *h, *end; char *t, *d; if (addr->user_len == 0) { return; } d = g_malloc(addr->user_len); t = d; h = addr->user; end = h + addr->user_len; while (h < end) { if (*h != '\\') { *t++ = *h; } h++; } addr->user = d; addr->user_len = t - d; addr->flags |= RSPAMD_EMAIL_ADDR_USER_ALLOCATED; } struct rspamd_email_address * rspamd_email_address_from_smtp(const char *str, unsigned int len) { struct rspamd_email_address addr, *ret; gsize nlen; if (str == NULL || len == 0) { return NULL; } rspamd_smtp_addr_parse(str, len, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { ret = g_malloc(sizeof(*ret)); memcpy(ret, &addr, sizeof(addr)); if ((ret->flags & RSPAMD_EMAIL_ADDR_QUOTED) && ret->addr[0] == '"') { if (ret->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) { /* We also need to unquote user */ rspamd_email_address_unescape(ret); } /* We need to unquote addr */ nlen = ret->domain_len + ret->user_len + 2; ret->addr = g_malloc(nlen + 1); ret->addr_len = rspamd_snprintf((char *) ret->addr, nlen, "%*s@%*s", (int) ret->user_len, ret->user, (int) ret->domain_len, ret->domain); ret->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED; } return ret; } return NULL; } void rspamd_email_address_free(struct rspamd_email_address *addr) { if (addr) { if (addr->flags & RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED) { g_free((void *) addr->addr); } if (addr->flags & RSPAMD_EMAIL_ADDR_USER_ALLOCATED) { g_free((void *) addr->user); } g_free(addr); } } static inline void rspamd_email_address_add(rspamd_mempool_t *pool, GPtrArray *ar, struct rspamd_email_address *addr, GString *name) { struct rspamd_email_address *elt; unsigned int nlen; elt = g_malloc0(sizeof(*elt)); rspamd_mempool_notify_alloc(pool, sizeof(*elt)); if (addr != NULL) { memcpy(elt, addr, sizeof(*addr)); } else { elt->addr = ""; elt->domain = ""; elt->raw = "<>"; elt->raw_len = 2; elt->user = ""; elt->flags |= RSPAMD_EMAIL_ADDR_EMPTY; } if ((elt->flags & RSPAMD_EMAIL_ADDR_QUOTED) && elt->addr[0] == '"') { if (elt->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) { /* We also need to unquote user */ rspamd_email_address_unescape(elt); } /* We need to unquote addr */ nlen = elt->domain_len + elt->user_len + 2; elt->addr = g_malloc(nlen + 1); rspamd_mempool_notify_alloc(pool, nlen + 1); elt->addr_len = rspamd_snprintf((char *) elt->addr, nlen, "%*s@%*s", (int) elt->user_len, elt->user, (int) elt->domain_len, elt->domain); elt->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED; } if (name->len > 0) { rspamd_gstring_strip(name, " \t\v"); elt->name = rspamd_mime_header_decode(pool, name->str, name->len, NULL); } rspamd_mempool_notify_alloc(pool, name->len); g_ptr_array_add(ar, elt); } /* * Tries to parse an email address that doesn't conform RFC */ static gboolean rspamd_email_address_parse_heuristic(const char *data, size_t len, struct rspamd_email_address *addr) { const char *p = data, *at = NULL, *end = data + len; gboolean ret = FALSE; memset(addr, 0, sizeof(*addr)); if (*p == '<' && len > 1) { /* Angled address */ addr->addr_len = rspamd_memcspn(p + 1, ">", len - 1); addr->addr = p + 1; addr->raw = p; addr->raw_len = len; ret = TRUE; p = p + 1; len = addr->addr_len; end = p + len; } else if (len > 0) { addr->addr = p; addr->addr_len = len; addr->raw = p; addr->raw_len = len; ret = TRUE; } if (ret) { at = rspamd_memrchr(p, '@', len); if (at != NULL && at + 1 < end) { addr->domain = at + 1; addr->domain_len = end - (at + 1); addr->user = p; addr->user_len = at - p; } if (rspamd_str_has_8bit(p, len)) { addr->flags |= RSPAMD_EMAIL_ADDR_HAS_8BIT; } } return ret; } static inline int rspamd_email_address_check_and_add(const char *start, gsize len, GPtrArray *res, rspamd_mempool_t *pool, GString *ns, int max_elements) { struct rspamd_email_address addr; g_assert(res != NULL); if (max_elements > 0 && res->len >= max_elements) { msg_info_pool_check("reached maximum number of elements %d when adding %v", max_elements, ns); return -1; } /* The whole email is likely address */ memset(&addr, 0, sizeof(addr)); rspamd_smtp_addr_parse(start, len, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { rspamd_email_address_add(pool, res, &addr, ns); } else { /* Try heuristic */ if (rspamd_email_address_parse_heuristic(start, len, &addr)) { rspamd_email_address_add(pool, res, &addr, ns); return 1; } else { return 0; } } return 1; } GPtrArray * rspamd_email_address_from_mime(rspamd_mempool_t *pool, const char *hdr, unsigned int len, GPtrArray *src, int max_elements) { GPtrArray *res = src; gboolean seen_at = FALSE, seen_obrace = FALSE; const char *p = hdr, *end = hdr + len, *c = hdr, *t; GString *ns, *cpy; int obraces, ebraces; enum { parse_name = 0, parse_quoted, parse_addr, skip_spaces } state = parse_name, next_state = parse_name; if (res == NULL) { res = g_ptr_array_sized_new(2); rspamd_mempool_add_destructor(pool, rspamd_email_address_list_destroy, res); } else if (max_elements > 0 && res->len >= max_elements) { msg_info_pool_check("reached maximum number of elements %d", max_elements); return res; } ns = g_string_sized_new(len); cpy = g_string_sized_new(len); rspamd_mempool_add_destructor(pool, rspamd_gstring_free_hard, cpy); /* First, we need to remove all comments as they are terrible */ obraces = 0; ebraces = 0; while (p < end) { if (state == parse_name) { if (*p == '\\') { if (obraces == 0) { g_string_append_c(cpy, *p); } p++; } else { if (*p == '"') { state = parse_quoted; } else if (*p == '(') { obraces++; /* To avoid ) itself being copied */ } else if (*p == ')') { ebraces++; p++; } if (obraces == ebraces) { obraces = 0; ebraces = 0; } } if (p < end && obraces == 0) { g_string_append_c(cpy, *p); } } else { /* Quoted elt */ if (*p == '\\') { g_string_append_c(cpy, *p); p++; } else { if (*p == '"') { state = parse_name; } } if (p < end) { g_string_append_c(cpy, *p); } } p++; } state = parse_name; p = cpy->str; c = p; end = p + cpy->len; while (p < end) { switch (state) { case parse_name: if (*p == '"') { /* We need to strip last spaces and update `ns` */ if (p > c) { unsigned int nspaces = 0; t = p - 1; while (t > c && g_ascii_isspace(*t)) { t--; nspaces++; } g_string_append_len(ns, c, t - c + 1); if (nspaces > 0) { g_string_append_c(ns, ' '); } } state = parse_quoted; c = p + 1; } else if (*p == '<') { if (p > c) { t = p - 1; while (t > c && g_ascii_isspace(*t)) { t--; } g_string_append_len(ns, c, t - c + 1); } c = p; state = parse_addr; } else if (*p == ',') { if (p > c && seen_at) { /* * Last token must be the address: * e.g. Some name name@domain.com */ t = p - 1; while (t > c && g_ascii_isspace(*t)) { t--; } int check = rspamd_email_address_check_and_add(c, t - c + 1, res, pool, ns, max_elements); if (check == 0 && res->len == 0) { /* Insert fake address */ rspamd_email_address_add(pool, res, NULL, ns); } else if (check != 1) { goto end; } /* Cleanup for the next use */ g_string_set_size(ns, 0); seen_at = FALSE; } state = skip_spaces; next_state = parse_name; } else if (*p == '@') { seen_at = TRUE; } p++; break; case parse_quoted: if (*p == '\\') { if (p > c) { g_string_append_len(ns, c, p - c); } p++; c = p; } else if (*p == '"') { if (p > c) { g_string_append_len(ns, c, p - c); } if (p + 1 < end && g_ascii_isspace(p[1])) { g_string_append_c(ns, ' '); } state = skip_spaces; next_state = parse_name; } else if (*p == '@' && seen_obrace) { seen_at = TRUE; } else if (*p == '<') { seen_obrace = TRUE; } p++; break; case parse_addr: if (*p == '>') { int check = rspamd_email_address_check_and_add(c, p - c + 1, res, pool, ns, max_elements); if (check == 0 && res->len == 0) { /* Insert a fake address */ rspamd_email_address_add(pool, res, NULL, ns); } else if (check != 1) { goto end; } /* Cleanup for the next use */ g_string_set_size(ns, 0); seen_at = FALSE; state = skip_spaces; next_state = parse_name; } else if (*p == '@') { seen_at = TRUE; } p++; break; case skip_spaces: if (!g_ascii_isspace(*p)) { c = p; state = next_state; } else { p++; } break; } } /* Handle leftover */ switch (state) { case parse_name: /* Assume the whole header as name (bad thing) */ if (p > c) { while (p > c && g_ascii_isspace(*p)) { p--; } if (p > c) { if (seen_at) { /* The whole email is likely address */ int check = rspamd_email_address_check_and_add(c, p - c, res, pool, ns, max_elements); if (check == 0 && res->len == 0) { /* Insert a fake address */ rspamd_email_address_add(pool, res, NULL, ns); } else if (check != 1) { goto end; } } else { /* No @ seen */ g_string_append_len(ns, c, p - c); if (res->len == 0) { rspamd_email_address_add(pool, res, NULL, ns); } } } else if (res->len == 0) { rspamd_email_address_add(pool, res, NULL, ns); } } break; case parse_addr: if (p > c) { if (rspamd_email_address_check_and_add(c, p - c, res, pool, ns, max_elements) == 0) { if (res->len == 0) { rspamd_email_address_add(pool, res, NULL, ns); } } } break; case parse_quoted: /* Unfinished quoted string or a comment */ /* If we have seen obrace + at, then we still can try to resolve address */ if (seen_at && seen_obrace) { p = rspamd_memrchr(cpy->str, '<', cpy->len); g_assert(p != NULL); if (rspamd_email_address_check_and_add(p, end - p, res, pool, ns, max_elements) == 0) { if (res->len == 0) { rspamd_email_address_add(pool, res, NULL, ns); } } } break; default: /* Do nothing */ break; } end: rspamd_mempool_notify_alloc(pool, cpy->len); g_string_free(ns, TRUE); return res; } void rspamd_email_address_list_destroy(gpointer ptr) { GPtrArray *ar = ptr; unsigned int i; struct rspamd_email_address *addr; PTR_ARRAY_FOREACH(ar, i, addr) { rspamd_email_address_free(addr); } g_ptr_array_free(ar, TRUE); }