@@ -1,5 +1,6 @@ | |||
# Librspamd mime | |||
SET(LIBRSPAMDMIMESRC | |||
${CMAKE_CURRENT_SOURCE_DIR}/received.cxx | |||
${CMAKE_CURRENT_SOURCE_DIR}/email_addr.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/mime_expressions.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/scan_result.c | |||
@@ -11,6 +12,7 @@ SET(LIBRSPAMDMIMESRC | |||
${CMAKE_CURRENT_SOURCE_DIR}/mime_parser.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/mime_encoding.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/lang_detection.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/mime_string.cxx) | |||
${CMAKE_CURRENT_SOURCE_DIR}/mime_string.cxx | |||
) | |||
SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE) |
@@ -58,7 +58,6 @@ struct rspamd_email_address { | |||
guint flags; | |||
}; | |||
struct rspamd_received_header; | |||
struct rspamd_task; | |||
/** |
@@ -174,7 +174,7 @@ struct rspamd_message { | |||
GPtrArray *parts; /**< list of parsed parts */ | |||
GPtrArray *text_parts; /**< list of text parts */ | |||
struct rspamd_message_raw_headers_content raw_headers_content; | |||
struct rspamd_received_header *received; /**< list of received headers */ | |||
void *received_headers; /**< list of received headers */ | |||
khash_t (rspamd_url_hash) *urls; | |||
struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers */ | |||
struct rspamd_mime_header *headers_order; /**< order of raw headers */ |
@@ -17,9 +17,9 @@ | |||
#include "mime_headers.h" | |||
#include "smtp_parsers.h" | |||
#include "mime_encoding.h" | |||
#include "received.h" | |||
#include "contrib/uthash/utlist.h" | |||
#include "libserver/mempool_vars_internal.h" | |||
#include "libserver/url.h" | |||
#include "libserver/cfg_file.h" | |||
#include "libutil/util.h" | |||
#include <unicode/utf8.h> | |||
@@ -33,9 +33,6 @@ struct rspamd_mime_headers_table { | |||
ref_entry_t ref; | |||
}; | |||
#define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \ | |||
(RSPAMD_INET_ADDRESS_PARSE_REMOTE|RSPAMD_INET_ADDRESS_PARSE_NO_UNIX) | |||
static void | |||
rspamd_mime_header_check_special (struct rspamd_task *task, | |||
struct rspamd_mime_header *rh) | |||
@@ -913,794 +910,6 @@ rspamd_mime_message_id_generate (const gchar *fqdn) | |||
return g_string_free (out, FALSE); | |||
} | |||
enum rspamd_received_part_type { | |||
RSPAMD_RECEIVED_PART_FROM, | |||
RSPAMD_RECEIVED_PART_BY, | |||
RSPAMD_RECEIVED_PART_FOR, | |||
RSPAMD_RECEIVED_PART_WITH, | |||
RSPAMD_RECEIVED_PART_ID, | |||
RSPAMD_RECEIVED_PART_UNKNOWN, | |||
}; | |||
struct rspamd_received_comment { | |||
gchar *data; | |||
gsize dlen; | |||
struct rspamd_received_comment *prev; | |||
}; | |||
struct rspamd_received_part { | |||
enum rspamd_received_part_type type; | |||
gchar *data; | |||
gsize dlen; | |||
struct rspamd_received_comment *tail_comment; | |||
struct rspamd_received_comment *head_comment; | |||
struct rspamd_received_part *prev, *next; | |||
}; | |||
static void | |||
rspamd_smtp_received_part_set_or_append (struct rspamd_task *task, | |||
const gchar *begin, | |||
gsize len, | |||
gchar **dest, | |||
gsize *destlen) | |||
{ | |||
if (len == 0) { | |||
return; | |||
} | |||
if (*dest) { | |||
/* Append */ | |||
gsize total_len = *destlen + len; | |||
gchar *new_dest; | |||
new_dest = rspamd_mempool_alloc (task->task_pool, total_len); | |||
memcpy (new_dest, *dest, *destlen); | |||
memcpy (new_dest + *destlen, begin, len); | |||
rspamd_str_lc (new_dest + *destlen, len); | |||
*dest = new_dest; | |||
*destlen = total_len; | |||
} | |||
else { | |||
/* Set */ | |||
*dest = rspamd_mempool_alloc (task->task_pool, len); | |||
memcpy (*dest, begin, len); | |||
rspamd_str_lc (*dest, len); | |||
*dest = (gchar *)rspamd_string_len_strip (*dest, &len, " \t"); | |||
*destlen = len; | |||
} | |||
} | |||
static struct rspamd_received_part * | |||
rspamd_smtp_received_process_part (struct rspamd_task *task, | |||
const char *data, | |||
size_t len, | |||
enum rspamd_received_part_type type, | |||
goffset *last) | |||
{ | |||
struct rspamd_received_part *npart; | |||
const guchar *p, *c, *end; | |||
guint obraces = 0, ebraces = 0; | |||
gboolean seen_tcpinfo = FALSE; | |||
enum _parse_state { | |||
skip_spaces, | |||
in_comment, | |||
read_data, | |||
read_tcpinfo, | |||
all_done | |||
} state, next_state; | |||
npart = rspamd_mempool_alloc0 (task->task_pool, sizeof (*npart)); | |||
npart->type = type; | |||
/* In this function, we just process comments and data separately */ | |||
p = data; | |||
end = data + len; | |||
c = data; | |||
state = skip_spaces; | |||
next_state = read_data; | |||
while (p < end) { | |||
switch (state) { | |||
case skip_spaces: | |||
if (!g_ascii_isspace (*p)) { | |||
c = p; | |||
state = next_state; | |||
} | |||
else { | |||
p ++; | |||
} | |||
break; | |||
case in_comment: | |||
if (*p == '(') { | |||
obraces ++; | |||
} | |||
else if (*p == ')') { | |||
ebraces ++; | |||
if (ebraces >= obraces) { | |||
if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
if (p > c) { | |||
struct rspamd_received_comment *comment; | |||
comment = rspamd_mempool_alloc0 (task->task_pool, | |||
sizeof (*comment)); | |||
rspamd_smtp_received_part_set_or_append (task, | |||
c, p - c, | |||
&comment->data, &comment->dlen); | |||
if (!npart->head_comment) { | |||
comment->prev = NULL; | |||
npart->head_comment = comment; | |||
npart->tail_comment = comment; | |||
} | |||
else { | |||
comment->prev = npart->tail_comment; | |||
npart->tail_comment = comment; | |||
} | |||
} | |||
} | |||
p ++; | |||
c = p; | |||
state = skip_spaces; | |||
next_state = read_data; | |||
continue; | |||
} | |||
} | |||
p ++; | |||
break; | |||
case read_data: | |||
if (*p == '(') { | |||
if (p > c) { | |||
if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
rspamd_smtp_received_part_set_or_append (task, | |||
c, p - c, | |||
&npart->data, &npart->dlen); | |||
} | |||
} | |||
state = in_comment; | |||
obraces = 1; | |||
ebraces = 0; | |||
p ++; | |||
c = p; | |||
} | |||
else if (g_ascii_isspace (*p)) { | |||
if (p > c) { | |||
if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
rspamd_smtp_received_part_set_or_append (task, | |||
c, p - c, | |||
&npart->data, &npart->dlen); | |||
} | |||
} | |||
state = skip_spaces; | |||
next_state = read_data; | |||
c = p; | |||
} | |||
else if (*p == ';') { | |||
/* It is actually delimiter of date part if not in the comments */ | |||
if (p > c) { | |||
if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
rspamd_smtp_received_part_set_or_append (task, | |||
c, p - c, | |||
&npart->data, &npart->dlen); | |||
} | |||
} | |||
state = all_done; | |||
continue; | |||
} | |||
else if (npart->dlen > 0) { | |||
/* We have already received data and find something with no ( */ | |||
if (!seen_tcpinfo && type == RSPAMD_RECEIVED_PART_FROM) { | |||
/* Check if we have something special here, such as TCPinfo */ | |||
if (*c == '[') { | |||
state = read_tcpinfo; | |||
p ++; | |||
} | |||
else { | |||
state = all_done; | |||
continue; | |||
} | |||
} | |||
else { | |||
state = all_done; | |||
continue; | |||
} | |||
} | |||
else { | |||
p ++; | |||
} | |||
break; | |||
case read_tcpinfo: | |||
if (*p == ']') { | |||
rspamd_smtp_received_part_set_or_append (task, | |||
c, p - c + 1, | |||
&npart->data, &npart->dlen); | |||
seen_tcpinfo = TRUE; | |||
state = skip_spaces; | |||
next_state = read_data; | |||
c = p; | |||
} | |||
p ++; | |||
break; | |||
case all_done: | |||
if (p > (const guchar *)data) { | |||
*last = p - (const guchar *) data; | |||
return npart; | |||
} | |||
else { | |||
/* Empty element */ | |||
return NULL; | |||
} | |||
break; | |||
} | |||
} | |||
/* Leftover */ | |||
switch (state) { | |||
case read_data: | |||
if (p > c) { | |||
if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
rspamd_smtp_received_part_set_or_append (task, | |||
c, p - c, | |||
&npart->data, &npart->dlen); | |||
} | |||
*last = p - (const guchar *)data; | |||
return npart; | |||
} | |||
break; | |||
case skip_spaces: | |||
if (p > (const guchar *)data) { | |||
*last = p - (const guchar *) data; | |||
return npart; | |||
} | |||
default: | |||
break; | |||
} | |||
return NULL; | |||
} | |||
static struct rspamd_received_part * | |||
rspamd_smtp_received_spill (struct rspamd_task *task, | |||
const char *data, | |||
size_t len, | |||
goffset *date_pos) | |||
{ | |||
const guchar *p, *end; | |||
struct rspamd_received_part *cur_part, *head = NULL; | |||
goffset pos = 0; | |||
p = data; | |||
end = data + len; | |||
while (p < end && g_ascii_isspace (*p)) { | |||
p ++; | |||
} | |||
len = end - p; | |||
/* Ignore all received but those started from from part */ | |||
if (len <= 4 || (lc_map[p[0]] != 'f' && | |||
lc_map[p[1]] != 'r' && | |||
lc_map[p[2]] != 'o' && | |||
lc_map[p[3]] != 'm')) { | |||
return NULL; | |||
} | |||
p += sizeof ("from") - 1; | |||
/* We can now store from part */ | |||
cur_part = rspamd_smtp_received_process_part (task, p, end - p, | |||
RSPAMD_RECEIVED_PART_FROM, &pos); | |||
if (!cur_part) { | |||
return NULL; | |||
} | |||
g_assert (pos != 0); | |||
p += pos; | |||
len = end > p ? end - p : 0; | |||
DL_APPEND (head, cur_part); | |||
if (len > 2 && (lc_map[p[0]] == 'b' && | |||
lc_map[p[1]] == 'y')) { | |||
p += sizeof ("by") - 1; | |||
cur_part = rspamd_smtp_received_process_part (task, p, end - p, | |||
RSPAMD_RECEIVED_PART_BY, &pos); | |||
if (!cur_part) { | |||
return NULL; | |||
} | |||
g_assert (pos != 0); | |||
p += pos; | |||
len = end > p ? end - p : 0; | |||
DL_APPEND (head, cur_part); | |||
} | |||
while (p < end) { | |||
if (*p == ';') { | |||
/* We are at the date separator, stop here */ | |||
*date_pos = p - (const guchar *)data + 1; | |||
break; | |||
} | |||
else { | |||
if (len > sizeof ("with") && (lc_map[p[0]] == 'w' && | |||
lc_map[p[1]] == 'i' && | |||
lc_map[p[2]] == 't' && | |||
lc_map[p[3]] == 'h')) { | |||
p += sizeof ("with") - 1; | |||
cur_part = rspamd_smtp_received_process_part (task, p, end - p, | |||
RSPAMD_RECEIVED_PART_WITH, &pos); | |||
} | |||
else if (len > sizeof ("for") && (lc_map[p[0]] == 'f' && | |||
lc_map[p[1]] == 'o' && | |||
lc_map[p[2]] == 'r')) { | |||
p += sizeof ("for") - 1; | |||
cur_part = rspamd_smtp_received_process_part (task, p, end - p, | |||
RSPAMD_RECEIVED_PART_FOR, &pos); | |||
} | |||
else if (len > sizeof ("id") && (lc_map[p[0]] == 'i' && | |||
lc_map[p[1]] == 'd')) { | |||
p += sizeof ("id") - 1; | |||
cur_part = rspamd_smtp_received_process_part (task, p, end - p, | |||
RSPAMD_RECEIVED_PART_ID, &pos); | |||
} | |||
else { | |||
while (p < end) { | |||
if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) { | |||
p ++; | |||
} | |||
else { | |||
break; | |||
} | |||
} | |||
if (p == end) { | |||
return NULL; | |||
} | |||
else if (*p == ';') { | |||
*date_pos = p - (const guchar *)data + 1; | |||
break; | |||
} | |||
else { | |||
cur_part = rspamd_smtp_received_process_part (task, p, end - p, | |||
RSPAMD_RECEIVED_PART_UNKNOWN, &pos); | |||
} | |||
} | |||
if (!cur_part) { | |||
p ++; | |||
len = end > p ? end - p : 0; | |||
} | |||
else { | |||
g_assert (pos != 0); | |||
p += pos; | |||
len = end > p ? end - p : 0; | |||
DL_APPEND (head, cur_part); | |||
} | |||
} | |||
} | |||
return head; | |||
} | |||
static gboolean | |||
rspamd_smtp_received_process_rdns (struct rspamd_task *task, | |||
const gchar *begin, | |||
gsize len, | |||
const gchar **pdest) | |||
{ | |||
const gchar *p, *end; | |||
gsize hlen = 0; | |||
gboolean seen_dot = FALSE; | |||
p = begin; | |||
end = begin + len; | |||
if (len == 0) { | |||
return FALSE; | |||
} | |||
if (*p == '[' && *(end - 1) == ']' && len > 2) { | |||
/* We have enclosed ip address */ | |||
rspamd_inet_addr_t *addr = rspamd_parse_inet_address_pool (p + 1, | |||
(end - p) - 2, | |||
task->task_pool, | |||
RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
const gchar *addr_str; | |||
gchar *dest; | |||
if (rspamd_inet_address_get_port (addr) != 0) { | |||
addr_str = rspamd_inet_address_to_string_pretty (addr); | |||
} | |||
else { | |||
addr_str = rspamd_inet_address_to_string (addr); | |||
} | |||
dest = rspamd_mempool_strdup (task->task_pool, addr_str); | |||
*pdest = dest; | |||
return TRUE; | |||
} | |||
} | |||
while (p < end) { | |||
if (!g_ascii_isspace (*p) && rspamd_url_is_domain (*p)) { | |||
if (*p == '.') { | |||
seen_dot = TRUE; | |||
} | |||
hlen ++; | |||
} | |||
else { | |||
break; | |||
} | |||
p ++; | |||
} | |||
if (hlen > 0) { | |||
if (p == end) { | |||
/* All data looks like a hostname */ | |||
gchar *dest; | |||
dest = rspamd_mempool_alloc (task->task_pool, | |||
hlen + 1); | |||
rspamd_strlcpy (dest, begin, hlen + 1); | |||
*pdest = dest; | |||
return TRUE; | |||
} | |||
else if (seen_dot && (g_ascii_isspace (*p) || *p == '[' || *p == '(')) { | |||
gchar *dest; | |||
dest = rspamd_mempool_alloc (task->task_pool, | |||
hlen + 1); | |||
rspamd_strlcpy (dest, begin, hlen + 1); | |||
*pdest = dest; | |||
return TRUE; | |||
} | |||
} | |||
return FALSE; | |||
} | |||
static gboolean | |||
rspamd_smtp_received_process_host_tcpinfo (struct rspamd_task *task, | |||
struct rspamd_received_header *rh, | |||
const gchar *data, | |||
gsize len) | |||
{ | |||
rspamd_inet_addr_t *addr = NULL; | |||
gboolean ret = FALSE; | |||
if (data[0] == '[') { | |||
/* Likely Exim version */ | |||
const gchar *brace_pos = memchr (data, ']', len); | |||
if (brace_pos) { | |||
addr = rspamd_parse_inet_address_pool (data + 1, | |||
brace_pos - data - 1, | |||
task->task_pool, | |||
RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
rh->addr = addr; | |||
rh->real_ip = rspamd_mempool_strdup (task->task_pool, | |||
rspamd_inet_address_to_string (addr)); | |||
rh->from_ip = rh->real_ip; | |||
} | |||
} | |||
} | |||
else { | |||
if (g_ascii_isxdigit (data[0])) { | |||
/* Try to parse IP address */ | |||
addr = rspamd_parse_inet_address_pool (data, | |||
len, task->task_pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
rh->addr = addr; | |||
rh->real_ip = rspamd_mempool_strdup (task->task_pool, | |||
rspamd_inet_address_to_string (addr)); | |||
rh->from_ip = rh->real_ip; | |||
} | |||
} | |||
if (!addr) { | |||
/* Try canonical Postfix version: rdns [ip] */ | |||
const gchar *obrace_pos = memchr (data, '[', len), | |||
*ebrace_pos, *dend; | |||
if (obrace_pos) { | |||
dend = data + len; | |||
ebrace_pos = memchr (obrace_pos, ']', dend - obrace_pos); | |||
if (ebrace_pos) { | |||
addr = rspamd_parse_inet_address_pool (obrace_pos + 1, | |||
ebrace_pos - obrace_pos - 1, | |||
task->task_pool, | |||
RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
rh->addr = addr; | |||
rh->real_ip = rspamd_mempool_strdup (task->task_pool, | |||
rspamd_inet_address_to_string (addr)); | |||
rh->from_ip = rh->real_ip; | |||
/* Process with rDNS */ | |||
if (rspamd_smtp_received_process_rdns (task, | |||
data, | |||
obrace_pos - data, | |||
&rh->real_hostname)) { | |||
ret = TRUE; | |||
} | |||
} | |||
} | |||
} | |||
else { | |||
/* Hostname or some crap, sigh... */ | |||
if (rspamd_smtp_received_process_rdns (task, | |||
data, | |||
len, | |||
&rh->real_hostname)) { | |||
ret = TRUE; | |||
} | |||
} | |||
} | |||
} | |||
return ret; | |||
} | |||
static void | |||
rspamd_smtp_received_process_from (struct rspamd_task *task, | |||
struct rspamd_received_part *rpart, | |||
struct rspamd_received_header *rh) | |||
{ | |||
if (rpart->dlen > 0) { | |||
/* We have seen multiple cases: | |||
* - [ip] (hostname/unknown [real_ip]) | |||
* - helo (hostname/unknown [real_ip]) | |||
* - [ip] | |||
* - hostname | |||
* - hostname ([ip]:port helo=xxx) | |||
* Maybe more... | |||
*/ | |||
gboolean seen_ip_in_data = FALSE; | |||
if (rpart->head_comment && rpart->head_comment->dlen > 0) { | |||
/* We can have info within comment as part of RFC */ | |||
rspamd_smtp_received_process_host_tcpinfo ( | |||
task, rh, | |||
rpart->head_comment->data, rpart->head_comment->dlen); | |||
} | |||
if (!rh->real_ip) { | |||
if (rpart->data[0] == '[') { | |||
/* No comment, just something that looks like SMTP IP */ | |||
const gchar *brace_pos = memchr (rpart->data, ']', rpart->dlen); | |||
rspamd_inet_addr_t *addr; | |||
if (brace_pos) { | |||
addr = rspamd_parse_inet_address_pool (rpart->data + 1, | |||
brace_pos - rpart->data - 1, | |||
task->task_pool, | |||
RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
seen_ip_in_data = TRUE; | |||
rh->addr = addr; | |||
rh->real_ip = rspamd_mempool_strdup (task->task_pool, | |||
rspamd_inet_address_to_string (addr)); | |||
rh->from_ip = rh->real_ip; | |||
} | |||
} | |||
} | |||
else if (g_ascii_isxdigit (rpart->data[0])) { | |||
/* Try to parse IP address */ | |||
rspamd_inet_addr_t *addr; | |||
addr = rspamd_parse_inet_address_pool (rpart->data, | |||
rpart->dlen, task->task_pool, | |||
RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
seen_ip_in_data = TRUE; | |||
rh->addr = addr; | |||
rh->real_ip = rspamd_mempool_strdup (task->task_pool, | |||
rspamd_inet_address_to_string (addr)); | |||
rh->from_ip = rh->real_ip; | |||
} | |||
} | |||
} | |||
if (!seen_ip_in_data) { | |||
if (rh->real_ip) { | |||
/* Get anounced hostname (usually helo) */ | |||
rspamd_smtp_received_process_rdns (task, | |||
rpart->data, | |||
rpart->dlen, | |||
&rh->from_hostname); | |||
} | |||
else { | |||
rspamd_smtp_received_process_host_tcpinfo (task, | |||
rh, rpart->data, rpart->dlen); | |||
} | |||
} | |||
} | |||
else { | |||
/* rpart->dlen = 0 */ | |||
if (rpart->head_comment && rpart->head_comment->dlen > 0) { | |||
rspamd_smtp_received_process_host_tcpinfo (task, | |||
rh, | |||
rpart->head_comment->data, | |||
rpart->head_comment->dlen); | |||
} | |||
} | |||
} | |||
int | |||
rspamd_smtp_received_parse (struct rspamd_task *task, | |||
const char *data, | |||
size_t len, | |||
struct rspamd_received_header *rh) | |||
{ | |||
goffset date_pos = -1; | |||
struct rspamd_received_part *head, *cur; | |||
rspamd_ftok_t t1, t2; | |||
head = rspamd_smtp_received_spill (task, data, len, &date_pos); | |||
if (head == NULL) { | |||
return -1; | |||
} | |||
rh->flags = RSPAMD_RECEIVED_UNKNOWN; | |||
DL_FOREACH (head, cur) { | |||
switch (cur->type) { | |||
case RSPAMD_RECEIVED_PART_FROM: | |||
rspamd_smtp_received_process_from (task, cur, rh); | |||
break; | |||
case RSPAMD_RECEIVED_PART_BY: | |||
rspamd_smtp_received_process_rdns (task, | |||
cur->data, | |||
cur->dlen, | |||
&rh->by_hostname); | |||
break; | |||
case RSPAMD_RECEIVED_PART_WITH: | |||
t1.begin = cur->data; | |||
t1.len = cur->dlen; | |||
if (t1.len > 0) { | |||
RSPAMD_FTOK_ASSIGN (&t2, "smtp"); | |||
if (rspamd_ftok_cmp (&t1, &t2) == 0) { | |||
rh->flags = RSPAMD_RECEIVED_SMTP; | |||
} | |||
RSPAMD_FTOK_ASSIGN (&t2, "esmtp"); | |||
if (rspamd_ftok_starts_with (&t1, &t2)) { | |||
/* | |||
* esmtp, esmtps, esmtpsa | |||
*/ | |||
if (t1.len == t2.len + 1) { | |||
if (t1.begin[t2.len] == 'a') { | |||
rh->flags = RSPAMD_RECEIVED_ESMTPA; | |||
rh->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED; | |||
} | |||
else if (t1.begin[t2.len] == 's') { | |||
rh->flags = RSPAMD_RECEIVED_ESMTPS; | |||
rh->flags |= RSPAMD_RECEIVED_FLAG_SSL; | |||
} | |||
continue; | |||
} | |||
else if (t1.len == t2.len + 2) { | |||
if (t1.begin[t2.len] == 's' && | |||
t1.begin[t2.len + 1] == 'a') { | |||
rh->flags = RSPAMD_RECEIVED_ESMTPSA; | |||
rh->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED; | |||
rh->flags |= RSPAMD_RECEIVED_FLAG_SSL; | |||
} | |||
continue; | |||
} | |||
else if (t1.len == t2.len) { | |||
rh->flags = RSPAMD_RECEIVED_ESMTP; | |||
continue; | |||
} | |||
} | |||
RSPAMD_FTOK_ASSIGN (&t2, "lmtp"); | |||
if (rspamd_ftok_cmp (&t1, &t2) == 0) { | |||
rh->flags = RSPAMD_RECEIVED_LMTP; | |||
continue; | |||
} | |||
RSPAMD_FTOK_ASSIGN (&t2, "imap"); | |||
if (rspamd_ftok_cmp (&t1, &t2) == 0) { | |||
rh->flags = RSPAMD_RECEIVED_IMAP; | |||
continue; | |||
} | |||
RSPAMD_FTOK_ASSIGN (&t2, "local"); | |||
if (rspamd_ftok_cmp (&t1, &t2) == 0) { | |||
rh->flags = RSPAMD_RECEIVED_LOCAL; | |||
continue; | |||
} | |||
RSPAMD_FTOK_ASSIGN (&t2, "http"); | |||
if (rspamd_ftok_starts_with (&t1, &t2)) { | |||
if (t1.len == t2.len + 1) { | |||
if (t1.begin[t2.len] == 's') { | |||
rh->flags = RSPAMD_RECEIVED_HTTP; | |||
rh->flags |= RSPAMD_RECEIVED_FLAG_SSL; | |||
} | |||
} | |||
else if (t1.len == t2.len) { | |||
rh->flags = RSPAMD_RECEIVED_HTTP; | |||
} | |||
continue; | |||
} | |||
} | |||
break; | |||
case RSPAMD_RECEIVED_PART_FOR: | |||
rh->for_addr = rspamd_email_address_from_smtp (cur->data, cur->dlen); | |||
if (rh->for_addr) { | |||
if (rh->for_addr->addr_len > 0) { | |||
t1.begin = rh->for_addr->addr; | |||
t1.len = rh->for_addr->addr_len; | |||
rh->for_mbox = rspamd_mempool_ftokdup (task->task_pool, | |||
&t1); | |||
} | |||
rspamd_mempool_add_destructor (task->task_pool, | |||
(rspamd_mempool_destruct_t)rspamd_email_address_free, | |||
rh->for_addr); | |||
} | |||
break; | |||
default: | |||
/* Do nothing */ | |||
break; | |||
} | |||
} | |||
if (rh->real_ip && !rh->from_ip) { | |||
rh->from_ip = rh->real_ip; | |||
} | |||
if (rh->real_hostname && !rh->from_hostname) { | |||
rh->from_hostname = rh->real_hostname; | |||
} | |||
if (date_pos > 0 && date_pos < len) { | |||
rh->timestamp = rspamd_parse_smtp_date (data + date_pos, | |||
len - date_pos, NULL); | |||
} | |||
return 0; | |||
} | |||
struct rspamd_mime_header * | |||
rspamd_message_get_header_from_hash (struct rspamd_mime_headers_table *hdrs, | |||
const gchar *field, |
@@ -72,52 +72,6 @@ struct rspamd_mime_header { | |||
struct rspamd_mime_headers_table; | |||
enum rspamd_received_type { | |||
RSPAMD_RECEIVED_SMTP = 1u << 0u, | |||
RSPAMD_RECEIVED_ESMTP = 1u << 1u, | |||
RSPAMD_RECEIVED_ESMTPA = 1u << 2u, | |||
RSPAMD_RECEIVED_ESMTPS = 1u << 3u, | |||
RSPAMD_RECEIVED_ESMTPSA = 1u << 4u, | |||
RSPAMD_RECEIVED_LMTP = 1u << 5u, | |||
RSPAMD_RECEIVED_IMAP = 1u << 6u, | |||
RSPAMD_RECEIVED_LOCAL = 1u << 7u, | |||
RSPAMD_RECEIVED_HTTP = 1u << 8u, | |||
RSPAMD_RECEIVED_MAPI = 1u << 9u, | |||
RSPAMD_RECEIVED_UNKNOWN = 1u << 10u, | |||
RSPAMD_RECEIVED_FLAG_ARTIFICIAL = (1u << 11u), | |||
RSPAMD_RECEIVED_FLAG_SSL = (1u << 12u), | |||
RSPAMD_RECEIVED_FLAG_AUTHENTICATED = (1u << 13u), | |||
}; | |||
#define RSPAMD_RECEIVED_FLAG_TYPE_MASK (RSPAMD_RECEIVED_SMTP| \ | |||
RSPAMD_RECEIVED_ESMTP| \ | |||
RSPAMD_RECEIVED_ESMTPA| \ | |||
RSPAMD_RECEIVED_ESMTPS| \ | |||
RSPAMD_RECEIVED_ESMTPSA| \ | |||
RSPAMD_RECEIVED_LMTP| \ | |||
RSPAMD_RECEIVED_IMAP| \ | |||
RSPAMD_RECEIVED_LOCAL| \ | |||
RSPAMD_RECEIVED_HTTP| \ | |||
RSPAMD_RECEIVED_MAPI| \ | |||
RSPAMD_RECEIVED_UNKNOWN) | |||
struct rspamd_email_address; | |||
struct rspamd_received_header { | |||
const gchar *from_hostname; | |||
const gchar *from_ip; | |||
const gchar *real_hostname; | |||
const gchar *real_ip; | |||
const gchar *by_hostname; | |||
const gchar *for_mbox; | |||
struct rspamd_email_address *for_addr; | |||
rspamd_inet_addr_t *addr; | |||
struct rspamd_mime_header *hdr; | |||
time_t timestamp; | |||
gint flags; /* See enum rspamd_received_type */ | |||
struct rspamd_received_header *prev, *next; | |||
}; | |||
/** | |||
* Process headers and store them in `target` | |||
* @param task |
@@ -0,0 +1,745 @@ | |||
/*- | |||
* Copyright 2021 Vsevolod Stakhov | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "config.h" | |||
#include "received.h" | |||
#include "libserver/task.h" | |||
#include "libserver/url.h" | |||
#include "mime_string.hxx" | |||
#include "smtp_parsers.h" | |||
#include "message.h" | |||
#include <vector> | |||
#include <string_view> | |||
#include <utility> | |||
#include "frozen/string.h" | |||
#include "frozen/unordered_map.h" | |||
namespace rspamd::mime { | |||
enum class received_part_type { | |||
RSPAMD_RECEIVED_PART_FROM, | |||
RSPAMD_RECEIVED_PART_BY, | |||
RSPAMD_RECEIVED_PART_FOR, | |||
RSPAMD_RECEIVED_PART_WITH, | |||
RSPAMD_RECEIVED_PART_ID, | |||
RSPAMD_RECEIVED_PART_UNKNOWN, | |||
}; | |||
static inline auto | |||
received_char_filter(UChar32 uc) -> UChar32 | |||
{ | |||
if (u_isprint(uc)) { | |||
return u_tolower(uc); | |||
} | |||
return 0; | |||
} | |||
struct received_header { | |||
mime_string from_hostname; | |||
std::string_view from_ip; | |||
mime_string real_hostname; | |||
mime_string real_ip; | |||
mime_string by_hostname; | |||
std::string_view for_mbox; | |||
struct rspamd_email_address *for_addr = nullptr; | |||
rspamd_inet_addr_t *addr = nullptr; | |||
struct rspamd_mime_header *hdr = nullptr; | |||
time_t timestamp = 0; | |||
int flags = 0; /* See enum rspamd_received_type */ | |||
received_header() noexcept | |||
: from_hostname(received_char_filter), | |||
real_hostname(received_char_filter), | |||
real_ip(received_char_filter), | |||
by_hostname(received_char_filter), | |||
for_mbox(received_char_filter) {} | |||
~received_header() { | |||
if (for_addr) { | |||
rspamd_email_address_free(for_addr); | |||
} | |||
} | |||
}; | |||
class received_header_chain { | |||
public: | |||
explicit received_header_chain(struct rspamd_task *_task) : task(_task) { | |||
headers.reserve(2); | |||
rspamd_mempool_add_destructor(task->task_pool, | |||
received_header_chain::received_header_chain_pool_dtor, this); | |||
} | |||
auto new_received() -> received_header & { | |||
headers.emplace_back(); | |||
return headers.back(); | |||
} | |||
private: | |||
static auto received_header_chain_pool_dtor(void *ptr) -> void { | |||
delete static_cast<received_header_chain *>(ptr); | |||
} | |||
std::vector<received_header> headers; | |||
struct rspamd_task *task; | |||
}; | |||
struct received_part { | |||
received_part_type type; | |||
mime_string data; | |||
std::vector<mime_string> comments; | |||
explicit received_part(received_part_type t) | |||
: type(t), | |||
data(received_char_filter) {} | |||
}; | |||
static inline auto | |||
received_part_set_or_append(struct rspamd_task *task, | |||
const gchar *begin, | |||
gsize len, | |||
mime_string &dest) -> void | |||
{ | |||
if (len == 0) { | |||
return; | |||
} | |||
dest.append(begin, len); | |||
dest.trim(" \t"); | |||
} | |||
static auto | |||
received_process_part(struct rspamd_task *task, | |||
const std::string_view &data, | |||
received_part_type type, | |||
std::ptrdiff_t &last, | |||
received_part &npart) -> bool | |||
{ | |||
auto obraces = 0, ebraces = 0; | |||
auto seen_tcpinfo = false; | |||
enum _parse_state { | |||
skip_spaces, | |||
in_comment, | |||
read_data, | |||
read_tcpinfo, | |||
all_done | |||
} state, next_state; | |||
/* In this function, we just process comments and data separately */ | |||
const auto *p = data.data(); | |||
const auto *end = p + data.size(); | |||
const auto *c = p; | |||
state = skip_spaces; | |||
next_state = read_data; | |||
while (p < end) { | |||
switch (state) { | |||
case skip_spaces: | |||
if (!g_ascii_isspace(*p)) { | |||
c = p; | |||
state = next_state; | |||
} | |||
else { | |||
p++; | |||
} | |||
break; | |||
case in_comment: | |||
if (*p == '(') { | |||
obraces++; | |||
} | |||
else if (*p == ')') { | |||
ebraces++; | |||
if (ebraces >= obraces) { | |||
if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
if (p > c) { | |||
npart.comments.emplace_back(received_char_filter); | |||
auto &comment = npart.comments.back(); | |||
received_part_set_or_append(task, | |||
c, p - c, | |||
comment); | |||
} | |||
} | |||
p++; | |||
c = p; | |||
state = skip_spaces; | |||
next_state = read_data; | |||
continue; | |||
} | |||
} | |||
p++; | |||
break; | |||
case read_data: | |||
if (*p == '(') { | |||
if (p > c) { | |||
if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
received_part_set_or_append(task, | |||
c, p - c, | |||
npart.data); | |||
} | |||
} | |||
state = in_comment; | |||
obraces = 1; | |||
ebraces = 0; | |||
p++; | |||
c = p; | |||
} | |||
else if (g_ascii_isspace (*p)) { | |||
if (p > c) { | |||
if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
received_part_set_or_append(task, | |||
c, p - c, | |||
npart.data); | |||
} | |||
} | |||
state = skip_spaces; | |||
next_state = read_data; | |||
c = p; | |||
} | |||
else if (*p == ';') { | |||
/* It is actually delimiter of date part if not in the comments */ | |||
if (p > c) { | |||
if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
received_part_set_or_append(task, | |||
c, p - c, | |||
npart.data); | |||
} | |||
} | |||
state = all_done; | |||
continue; | |||
} | |||
else if (npart.data.size() > 0) { | |||
/* We have already received data and find something with no ( */ | |||
if (!seen_tcpinfo && type == received_part_type::RSPAMD_RECEIVED_PART_FROM) { | |||
/* Check if we have something special here, such as TCPinfo */ | |||
if (*c == '[') { | |||
state = read_tcpinfo; | |||
p++; | |||
} | |||
else { | |||
state = all_done; | |||
continue; | |||
} | |||
} | |||
else { | |||
state = all_done; | |||
continue; | |||
} | |||
} | |||
else { | |||
p++; | |||
} | |||
break; | |||
case read_tcpinfo: | |||
if (*p == ']') { | |||
received_part_set_or_append(task, | |||
c, p - c + 1, | |||
npart.data); | |||
seen_tcpinfo = TRUE; | |||
state = skip_spaces; | |||
next_state = read_data; | |||
c = p; | |||
} | |||
p++; | |||
break; | |||
case all_done: | |||
if (p > data.data()) { | |||
last = p - data.data(); | |||
return true; | |||
} | |||
else { | |||
/* Empty element */ | |||
return false; | |||
} | |||
break; | |||
} | |||
} | |||
/* Leftover */ | |||
switch (state) { | |||
case read_data: | |||
if (p > c) { | |||
if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { | |||
received_part_set_or_append(task, | |||
c, p - c, | |||
npart.data); | |||
} | |||
last = p - data.data(); | |||
return true; | |||
} | |||
break; | |||
case skip_spaces: | |||
if (p > data.data()) { | |||
last = p - data.data(); | |||
return true; | |||
} | |||
default: | |||
break; | |||
} | |||
return false; | |||
} | |||
template <std::size_t N> | |||
constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool | |||
{ | |||
for (auto i = 0; i < N; i ++) { | |||
if (lc_map[(unsigned char)in[i]] != lit[i]) { | |||
return false; | |||
} | |||
} | |||
return true; | |||
} | |||
static auto | |||
received_spill(struct rspamd_task *task, | |||
const std::string_view &in, | |||
std::ptrdiff_t &date_pos) -> std::vector<received_part> | |||
{ | |||
std::vector<received_part> parts; | |||
std::ptrdiff_t pos = 0; | |||
const auto *p = in.data(); | |||
const auto *end = p + in.size(); | |||
while (p < end && g_ascii_isspace (*p)) { | |||
p++; | |||
} | |||
auto len = end - p; | |||
/* Ignore all received but those started from from part */ | |||
if (len <= 4 || !lit_compare_lowercase<4>("from", p)) { | |||
return {}; | |||
} | |||
p += sizeof("from") - 1; | |||
auto maybe_process_part = [&](received_part_type what) -> bool { | |||
parts.emplace_back(what); | |||
auto &rcvd_part = parts.back(); | |||
auto chunk = std::string_view{p, (std::size_t)(end - p)}; | |||
if (!received_process_part(task, chunk, what, pos, rcvd_part)) { | |||
parts.pop_back(); | |||
return false; | |||
} | |||
return true; | |||
}; | |||
/* We can now store from part */ | |||
if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FROM)) { | |||
return {}; | |||
} | |||
g_assert (pos != 0); | |||
p += pos; | |||
len = end > p ? end - p : 0; | |||
if (len > 2 && lit_compare_lowercase<2>("by", p)) { | |||
p += sizeof("by") - 1; | |||
if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_BY)) { | |||
return {}; | |||
} | |||
g_assert (pos != 0); | |||
p += pos; | |||
len = end > p ? end - p : 0; | |||
} | |||
while (p < end) { | |||
bool got_part = false; | |||
if (*p == ';') { | |||
/* We are at the date separator, stop here */ | |||
date_pos = p - in.data() + 1; | |||
break; | |||
} | |||
else { | |||
if (len > sizeof("with") && lit_compare_lowercase<4>("with", p)) { | |||
p += sizeof("with") - 1; | |||
got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_WITH); | |||
} | |||
else if (len > sizeof("for") && lit_compare_lowercase<3>("for", p)) { | |||
p += sizeof("for") - 1; | |||
got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FOR); | |||
} | |||
else if (len > sizeof("id") && lit_compare_lowercase<2>("id", p)) { | |||
p += sizeof("id") - 1; | |||
got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_ID); | |||
} | |||
else { | |||
while (p < end) { | |||
if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) { | |||
p++; | |||
} | |||
else { | |||
break; | |||
} | |||
} | |||
if (p == end) { | |||
return {}; | |||
} | |||
else if (*p == ';') { | |||
date_pos = p - in.data() + 1; | |||
break; | |||
} | |||
else { | |||
got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN); | |||
} | |||
} | |||
if (!got_part) { | |||
p++; | |||
len = end > p ? end - p : 0; | |||
} | |||
else { | |||
g_assert (pos != 0); | |||
p += pos; | |||
len = end > p ? end - p : 0; | |||
} | |||
} | |||
} | |||
return parts; | |||
} | |||
#define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \ | |||
(rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE|RSPAMD_INET_ADDRESS_PARSE_NO_UNIX) | |||
static auto | |||
received_process_rdns(struct rspamd_task *task, | |||
const std::string_view &in, | |||
mime_string &dest) -> bool | |||
{ | |||
auto seen_dot = false; | |||
const auto *p = in.data(); | |||
const auto *end = p + in.size(); | |||
if (in.empty()) { | |||
return false; | |||
} | |||
if (*p == '[' && *(end - 1) == ']' && in.size() > 2) { | |||
/* We have enclosed ip address */ | |||
auto *addr = rspamd_parse_inet_address_pool(p + 1, | |||
(end - p) - 2, | |||
task->task_pool, | |||
RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
const gchar *addr_str; | |||
if (rspamd_inet_address_get_port(addr) != 0) { | |||
addr_str = rspamd_inet_address_to_string_pretty(addr); | |||
} | |||
else { | |||
addr_str = rspamd_inet_address_to_string(addr); | |||
} | |||
dest.assign_copy(std::string_view{addr_str}); | |||
return true; | |||
} | |||
} | |||
auto hlen = 0u; | |||
while (p < end) { | |||
if (!g_ascii_isspace(*p) && rspamd_url_is_domain(*p)) { | |||
if (*p == '.') { | |||
seen_dot = true; | |||
} | |||
hlen++; | |||
} | |||
else { | |||
break; | |||
} | |||
p++; | |||
} | |||
if (hlen > 0) { | |||
if (p == end || (seen_dot && (g_ascii_isspace(*p) || *p == '[' || *p == '('))) { | |||
/* All data looks like a hostname */ | |||
dest.assign_copy(std::string_view{in.data(), hlen}); | |||
return true; | |||
} | |||
} | |||
return false; | |||
} | |||
static auto | |||
received_process_host_tcpinfo(struct rspamd_task *task, | |||
received_header &rh, | |||
const std::string_view &in) -> bool | |||
{ | |||
rspamd_inet_addr_t *addr = nullptr; | |||
auto ret = false; | |||
if (in.empty()) { | |||
return false; | |||
} | |||
if (in[0] == '[') { | |||
/* Likely Exim version */ | |||
auto brace_pos = in.find(']'); | |||
if (brace_pos != std::string_view::npos) { | |||
auto substr_addr = in.substr(1, brace_pos - 1); | |||
addr = rspamd_parse_inet_address_pool(substr_addr.data(), | |||
substr_addr.size(), | |||
task->task_pool, | |||
RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
rh.addr = addr; | |||
rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr))); | |||
rh.from_ip = rh.real_ip.as_view(); | |||
} | |||
} | |||
} | |||
else { | |||
if (g_ascii_isxdigit(in[0])) { | |||
/* Try to parse IP address */ | |||
addr = rspamd_parse_inet_address_pool(in.data(), | |||
in.size(), task->task_pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
rh.addr = addr; | |||
rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr))); | |||
rh.from_ip = rh.real_ip.as_view(); | |||
} | |||
} | |||
if (!addr) { | |||
/* Try canonical Postfix version: rdns [ip] */ | |||
auto obrace_pos = in.find('['); | |||
if (obrace_pos != std::string_view::npos) { | |||
auto ebrace_pos = in.rfind(']', obrace_pos); | |||
if (ebrace_pos != std::string_view::npos) { | |||
auto substr_addr = in.substr(obrace_pos + 1, | |||
ebrace_pos - obrace_pos - 1); | |||
addr = rspamd_parse_inet_address_pool(substr_addr.data(), | |||
substr_addr.size(), | |||
task->task_pool, | |||
RSPAMD_INET_ADDRESS_PARSE_RECEIVED); | |||
if (addr) { | |||
rh.addr = addr; | |||
rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr))); | |||
rh.from_ip = rh.real_ip.as_view(); | |||
/* Process with rDNS */ | |||
auto rdns_substr = in.substr(0, obrace_pos); | |||
if (received_process_rdns(task, | |||
rdns_substr, | |||
rh.real_hostname)) { | |||
ret = true; | |||
} | |||
} | |||
} | |||
} | |||
else { | |||
/* Hostname or some crap, sigh... */ | |||
if (received_process_rdns(task, in, rh.real_hostname)) { | |||
ret = true; | |||
} | |||
} | |||
} | |||
} | |||
return ret; | |||
} | |||
static void | |||
received_process_from(struct rspamd_task *task, | |||
const received_part &rpart, | |||
received_header &rh) | |||
{ | |||
if (rpart.data.size() > 0) { | |||
/* We have seen multiple cases: | |||
* - [ip] (hostname/unknown [real_ip]) | |||
* - helo (hostname/unknown [real_ip]) | |||
* - [ip] | |||
* - hostname | |||
* - hostname ([ip]:port helo=xxx) | |||
* Maybe more... | |||
*/ | |||
auto seen_ip_in_data = false; | |||
if (!rpart.comments.empty()) { | |||
/* We can have info within comment as part of RFC */ | |||
received_process_host_tcpinfo( | |||
task, rh, | |||
rpart.comments[0].as_view()); | |||
} | |||
if (rh.real_ip.size() == 0) { | |||
/* Try to do the same with data */ | |||
if (received_process_host_tcpinfo( | |||
task, rh, | |||
rpart.data.as_view())) { | |||
seen_ip_in_data = true; | |||
} | |||
} | |||
if (!seen_ip_in_data) { | |||
if (rh.real_ip.size() != 0) { | |||
/* Get anounced hostname (usually helo) */ | |||
received_process_rdns(task, | |||
rpart.data.as_view(), | |||
rh.from_hostname); | |||
} | |||
else { | |||
received_process_host_tcpinfo(task, | |||
rh, rpart.data.as_view()); | |||
} | |||
} | |||
} | |||
else { | |||
/* rpart->dlen = 0 */ | |||
if (!rpart.comments.empty()) { | |||
received_process_host_tcpinfo( | |||
task, rh, | |||
rpart.comments[0].as_view()); | |||
} | |||
} | |||
} | |||
auto | |||
received_header_parse(struct rspamd_task *task, const std::string_view &in, | |||
struct rspamd_mime_header *hdr) -> bool | |||
{ | |||
std::ptrdiff_t date_pos = -1; | |||
static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, int>({ | |||
{"smtp", RSPAMD_RECEIVED_SMTP}, | |||
{"esmtp", RSPAMD_RECEIVED_ESMTP}, | |||
{"esmtpa", RSPAMD_RECEIVED_ESMTPA | RSPAMD_RECEIVED_FLAG_AUTHENTICATED}, | |||
{"esmtpsa", RSPAMD_RECEIVED_ESMTPSA | RSPAMD_RECEIVED_FLAG_SSL | RSPAMD_RECEIVED_FLAG_AUTHENTICATED}, | |||
{"esmtps", RSPAMD_RECEIVED_ESMTPS | RSPAMD_RECEIVED_FLAG_SSL}, | |||
{"lmtp", RSPAMD_RECEIVED_LMTP}, | |||
{"imap", RSPAMD_RECEIVED_IMAP}, | |||
{"imaps", RSPAMD_RECEIVED_IMAP | RSPAMD_RECEIVED_FLAG_SSL}, | |||
{"http", RSPAMD_RECEIVED_HTTP}, | |||
{"https", RSPAMD_RECEIVED_HTTP | RSPAMD_RECEIVED_FLAG_SSL}, | |||
{"local", RSPAMD_RECEIVED_LOCAL} | |||
}); | |||
auto parts = received_spill(task, in, date_pos); | |||
if (parts.empty()) { | |||
return false; | |||
} | |||
auto *recv_chain_ptr = static_cast<received_header_chain *>(MESSAGE_FIELD(task, received_headers)); | |||
if (recv_chain_ptr == nullptr) { | |||
/* This constructor automatically registers dtor in mempool */ | |||
recv_chain_ptr = new received_header_chain(task); | |||
MESSAGE_FIELD(task, received_headers) = (void *)recv_chain_ptr; | |||
} | |||
auto &rh = recv_chain_ptr->new_received(); | |||
rh.flags = RSPAMD_RECEIVED_UNKNOWN; | |||
rh.hdr = hdr; | |||
for (const auto &part : parts) { | |||
switch (part.type) { | |||
case received_part_type::RSPAMD_RECEIVED_PART_FROM: | |||
received_process_from(task, part, rh); | |||
break; | |||
case received_part_type::RSPAMD_RECEIVED_PART_BY: | |||
received_process_rdns(task, | |||
part.data.as_view(), | |||
rh.by_hostname); | |||
break; | |||
case received_part_type::RSPAMD_RECEIVED_PART_WITH: | |||
if (part.data.size() > 0) { | |||
auto proto_flag_it = protos_map.find(part.data.as_view()); | |||
if (proto_flag_it != protos_map.end()) { | |||
rh.flags = proto_flag_it->second; | |||
} | |||
} | |||
break; | |||
case received_part_type::RSPAMD_RECEIVED_PART_FOR: | |||
rh.for_addr = rspamd_email_address_from_smtp(part.data.data(), | |||
part.data.size()); | |||
if (rh.for_addr) { | |||
if (rh.for_addr->addr_len > 0) { | |||
rh.for_mbox = std::string_view{rh.for_addr->addr, | |||
rh.for_addr->addr_len}; | |||
} | |||
} | |||
break; | |||
default: | |||
/* Do nothing */ | |||
break; | |||
} | |||
} | |||
if (!rh.real_ip.empty() && rh.from_ip.empty()) { | |||
rh.from_ip = rh.real_ip.as_view(); | |||
} | |||
if (!rh.real_hostname.empty() && rh.from_hostname.empty()) { | |||
rh.from_hostname.assign_copy(rh.real_hostname); | |||
} | |||
if (date_pos > 0 && date_pos < in.size()) { | |||
auto date_sub = in.substr(date_pos); | |||
rh.timestamp = rspamd_parse_smtp_date((const unsigned char*)date_sub.data(), | |||
date_sub.size(), nullptr); | |||
} | |||
return true; | |||
} | |||
} // namespace rspamd::mime | |||
bool | |||
rspamd_received_header_parse(struct rspamd_task *task, | |||
const char *data, size_t sz, | |||
struct rspamd_mime_header *hdr) | |||
{ | |||
return rspamd::mime::received_header_parse(task, std::string_view{data, sz}, hdr); | |||
} |
@@ -0,0 +1,69 @@ | |||
/*- | |||
* Copyright 2021 Vsevolod Stakhov | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef RSPAMD_RECEIVED_H | |||
#define RSPAMD_RECEIVED_H | |||
#include "config.h" | |||
#include "libutil/addr.h" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
enum rspamd_received_type { | |||
RSPAMD_RECEIVED_SMTP = 1u << 0u, | |||
RSPAMD_RECEIVED_ESMTP = 1u << 1u, | |||
RSPAMD_RECEIVED_ESMTPA = 1u << 2u, | |||
RSPAMD_RECEIVED_ESMTPS = 1u << 3u, | |||
RSPAMD_RECEIVED_ESMTPSA = 1u << 4u, | |||
RSPAMD_RECEIVED_LMTP = 1u << 5u, | |||
RSPAMD_RECEIVED_IMAP = 1u << 6u, | |||
RSPAMD_RECEIVED_LOCAL = 1u << 7u, | |||
RSPAMD_RECEIVED_HTTP = 1u << 8u, | |||
RSPAMD_RECEIVED_MAPI = 1u << 9u, | |||
RSPAMD_RECEIVED_UNKNOWN = 1u << 10u, | |||
RSPAMD_RECEIVED_FLAG_ARTIFICIAL = (1u << 11u), | |||
RSPAMD_RECEIVED_FLAG_SSL = (1u << 12u), | |||
RSPAMD_RECEIVED_FLAG_AUTHENTICATED = (1u << 13u), | |||
}; | |||
#define RSPAMD_RECEIVED_FLAG_TYPE_MASK (RSPAMD_RECEIVED_SMTP| \ | |||
RSPAMD_RECEIVED_ESMTP| \ | |||
RSPAMD_RECEIVED_ESMTPA| \ | |||
RSPAMD_RECEIVED_ESMTPS| \ | |||
RSPAMD_RECEIVED_ESMTPSA| \ | |||
RSPAMD_RECEIVED_LMTP| \ | |||
RSPAMD_RECEIVED_IMAP| \ | |||
RSPAMD_RECEIVED_LOCAL| \ | |||
RSPAMD_RECEIVED_HTTP| \ | |||
RSPAMD_RECEIVED_MAPI| \ | |||
RSPAMD_RECEIVED_UNKNOWN) | |||
struct rspamd_email_address; | |||
struct rspamd_received_header_chain; | |||
struct rspamd_mime_header; | |||
bool rspamd_received_header_parse(struct rspamd_task *task, | |||
const char *data, size_t sz, struct rspamd_mime_header *hdr); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif //RSPAMD_RECEIVED_H |
@@ -27,10 +27,6 @@ | |||
extern "C" { | |||
#endif | |||
int rspamd_smtp_received_parse (struct rspamd_task *task, | |||
const char *data, size_t len, | |||
struct rspamd_received_header *rh); | |||
int rspamd_smtp_addr_parse (const char *data, size_t len, | |||
struct rspamd_email_address *addr); | |||