aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-10-03 12:52:45 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-10-03 12:52:45 +0100
commite040d66c354b135e1281cd438958ecb3e7a8983e (patch)
treede70116cca0d94668ab9ec9637f3c67fdef9552d
parent028bda7293b7ed3c908b0a35db9d48e4e411cfb5 (diff)
downloadrspamd-e040d66c354b135e1281cd438958ecb3e7a8983e.tar.gz
rspamd-e040d66c354b135e1281cd438958ecb3e7a8983e.zip
[Project] Rework received headers parsing to C++
-rw-r--r--src/libmime/CMakeLists.txt4
-rw-r--r--src/libmime/email_addr.h1
-rw-r--r--src/libmime/message.h2
-rw-r--r--src/libmime/mime_headers.c793
-rw-r--r--src/libmime/mime_headers.h46
-rw-r--r--src/libmime/received.cxx745
-rw-r--r--src/libmime/received.h69
-rw-r--r--src/libmime/smtp_parsers.h4
8 files changed, 819 insertions, 845 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt
index 878ac8149..4a64aac58 100644
--- a/src/libmime/CMakeLists.txt
+++ b/src/libmime/CMakeLists.txt
@@ -1,5 +1,6 @@
# Librspamd mime
SET(LIBRSPAMDMIMESRC
+ ${CMAKE_CURRENT_SOURCE_DIR}/received.cxx
${CMAKE_CURRENT_SOURCE_DIR}/email_addr.c
${CMAKE_CURRENT_SOURCE_DIR}/mime_expressions.c
${CMAKE_CURRENT_SOURCE_DIR}/scan_result.c
@@ -11,6 +12,7 @@ SET(LIBRSPAMDMIMESRC
${CMAKE_CURRENT_SOURCE_DIR}/mime_parser.c
${CMAKE_CURRENT_SOURCE_DIR}/mime_encoding.c
${CMAKE_CURRENT_SOURCE_DIR}/lang_detection.c
- ${CMAKE_CURRENT_SOURCE_DIR}/mime_string.cxx)
+ ${CMAKE_CURRENT_SOURCE_DIR}/mime_string.cxx
+ )
SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE) \ No newline at end of file
diff --git a/src/libmime/email_addr.h b/src/libmime/email_addr.h
index fe9fd9e9d..7e150f80d 100644
--- a/src/libmime/email_addr.h
+++ b/src/libmime/email_addr.h
@@ -58,7 +58,6 @@ struct rspamd_email_address {
guint flags;
};
-struct rspamd_received_header;
struct rspamd_task;
/**
diff --git a/src/libmime/message.h b/src/libmime/message.h
index a391daf0d..d5329efa7 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -174,7 +174,7 @@ struct rspamd_message {
GPtrArray *parts; /**< list of parsed parts */
GPtrArray *text_parts; /**< list of text parts */
struct rspamd_message_raw_headers_content raw_headers_content;
- struct rspamd_received_header *received; /**< list of received headers */
+ void *received_headers; /**< list of received headers */
khash_t (rspamd_url_hash) *urls;
struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers */
struct rspamd_mime_header *headers_order; /**< order of raw headers */
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 7b5011be4..7afb0e7a6 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -17,9 +17,9 @@
#include "mime_headers.h"
#include "smtp_parsers.h"
#include "mime_encoding.h"
+#include "received.h"
#include "contrib/uthash/utlist.h"
#include "libserver/mempool_vars_internal.h"
-#include "libserver/url.h"
#include "libserver/cfg_file.h"
#include "libutil/util.h"
#include <unicode/utf8.h>
@@ -33,9 +33,6 @@ struct rspamd_mime_headers_table {
ref_entry_t ref;
};
-#define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \
- (RSPAMD_INET_ADDRESS_PARSE_REMOTE|RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)
-
static void
rspamd_mime_header_check_special (struct rspamd_task *task,
struct rspamd_mime_header *rh)
@@ -913,794 +910,6 @@ rspamd_mime_message_id_generate (const gchar *fqdn)
return g_string_free (out, FALSE);
}
-enum rspamd_received_part_type {
- RSPAMD_RECEIVED_PART_FROM,
- RSPAMD_RECEIVED_PART_BY,
- RSPAMD_RECEIVED_PART_FOR,
- RSPAMD_RECEIVED_PART_WITH,
- RSPAMD_RECEIVED_PART_ID,
- RSPAMD_RECEIVED_PART_UNKNOWN,
-};
-
-struct rspamd_received_comment {
- gchar *data;
- gsize dlen;
- struct rspamd_received_comment *prev;
-};
-
-struct rspamd_received_part {
- enum rspamd_received_part_type type;
- gchar *data;
- gsize dlen;
- struct rspamd_received_comment *tail_comment;
- struct rspamd_received_comment *head_comment;
- struct rspamd_received_part *prev, *next;
-};
-
-static void
-rspamd_smtp_received_part_set_or_append (struct rspamd_task *task,
- const gchar *begin,
- gsize len,
- gchar **dest,
- gsize *destlen)
-{
- if (len == 0) {
- return;
- }
-
- if (*dest) {
- /* Append */
- gsize total_len = *destlen + len;
- gchar *new_dest;
-
- new_dest = rspamd_mempool_alloc (task->task_pool, total_len);
- memcpy (new_dest, *dest, *destlen);
- memcpy (new_dest + *destlen, begin, len);
- rspamd_str_lc (new_dest + *destlen, len);
- *dest = new_dest;
- *destlen = total_len;
- }
- else {
- /* Set */
- *dest = rspamd_mempool_alloc (task->task_pool, len);
- memcpy (*dest, begin, len);
- rspamd_str_lc (*dest, len);
- *dest = (gchar *)rspamd_string_len_strip (*dest, &len, " \t");
- *destlen = len;
- }
-}
-
-static struct rspamd_received_part *
-rspamd_smtp_received_process_part (struct rspamd_task *task,
- const char *data,
- size_t len,
- enum rspamd_received_part_type type,
- goffset *last)
-{
- struct rspamd_received_part *npart;
- const guchar *p, *c, *end;
- guint obraces = 0, ebraces = 0;
- gboolean seen_tcpinfo = FALSE;
- enum _parse_state {
- skip_spaces,
- in_comment,
- read_data,
- read_tcpinfo,
- all_done
- } state, next_state;
-
- npart = rspamd_mempool_alloc0 (task->task_pool, sizeof (*npart));
- npart->type = type;
-
- /* In this function, we just process comments and data separately */
- p = data;
- end = data + len;
- c = data;
- state = skip_spaces;
- next_state = read_data;
-
- while (p < end) {
- switch (state) {
- case skip_spaces:
- if (!g_ascii_isspace (*p)) {
- c = p;
- state = next_state;
- }
- else {
- p ++;
- }
- break;
- case in_comment:
- if (*p == '(') {
- obraces ++;
- }
- else if (*p == ')') {
- ebraces ++;
-
- if (ebraces >= obraces) {
- if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
- if (p > c) {
- struct rspamd_received_comment *comment;
-
-
- comment = rspamd_mempool_alloc0 (task->task_pool,
- sizeof (*comment));
- rspamd_smtp_received_part_set_or_append (task,
- c, p - c,
- &comment->data, &comment->dlen);
-
- if (!npart->head_comment) {
- comment->prev = NULL;
- npart->head_comment = comment;
- npart->tail_comment = comment;
- }
- else {
- comment->prev = npart->tail_comment;
- npart->tail_comment = comment;
- }
- }
- }
-
- p ++;
- c = p;
- state = skip_spaces;
- next_state = read_data;
-
- continue;
- }
- }
-
- p ++;
- break;
- case read_data:
- if (*p == '(') {
- if (p > c) {
- if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
- rspamd_smtp_received_part_set_or_append (task,
- c, p - c,
- &npart->data, &npart->dlen);
- }
- }
-
- state = in_comment;
- obraces = 1;
- ebraces = 0;
- p ++;
- c = p;
- }
- else if (g_ascii_isspace (*p)) {
- if (p > c) {
- if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
- rspamd_smtp_received_part_set_or_append (task,
- c, p - c,
- &npart->data, &npart->dlen);
- }
- }
-
- state = skip_spaces;
- next_state = read_data;
- c = p;
- }
- else if (*p == ';') {
- /* It is actually delimiter of date part if not in the comments */
- if (p > c) {
- if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
- rspamd_smtp_received_part_set_or_append (task,
- c, p - c,
- &npart->data, &npart->dlen);
- }
- }
-
- state = all_done;
- continue;
- }
- else if (npart->dlen > 0) {
- /* We have already received data and find something with no ( */
- if (!seen_tcpinfo && type == RSPAMD_RECEIVED_PART_FROM) {
- /* Check if we have something special here, such as TCPinfo */
- if (*c == '[') {
- state = read_tcpinfo;
- p ++;
- }
- else {
- state = all_done;
- continue;
- }
- }
- else {
- state = all_done;
- continue;
- }
- }
- else {
- p ++;
- }
- break;
- case read_tcpinfo:
- if (*p == ']') {
- rspamd_smtp_received_part_set_or_append (task,
- c, p - c + 1,
- &npart->data, &npart->dlen);
- seen_tcpinfo = TRUE;
- state = skip_spaces;
- next_state = read_data;
- c = p;
- }
- p ++;
- break;
- case all_done:
- if (p > (const guchar *)data) {
- *last = p - (const guchar *) data;
- return npart;
- }
- else {
- /* Empty element */
- return NULL;
- }
- break;
- }
- }
-
- /* Leftover */
- switch (state) {
- case read_data:
- if (p > c) {
- if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
- rspamd_smtp_received_part_set_or_append (task,
- c, p - c,
- &npart->data, &npart->dlen);
- }
-
- *last = p - (const guchar *)data;
-
- return npart;
- }
- break;
- case skip_spaces:
- if (p > (const guchar *)data) {
- *last = p - (const guchar *) data;
-
- return npart;
- }
- default:
- break;
- }
-
- return NULL;
-}
-
-static struct rspamd_received_part *
-rspamd_smtp_received_spill (struct rspamd_task *task,
- const char *data,
- size_t len,
- goffset *date_pos)
-{
- const guchar *p, *end;
- struct rspamd_received_part *cur_part, *head = NULL;
- goffset pos = 0;
-
- p = data;
- end = data + len;
-
- while (p < end && g_ascii_isspace (*p)) {
- p ++;
- }
-
- len = end - p;
-
- /* Ignore all received but those started from from part */
- if (len <= 4 || (lc_map[p[0]] != 'f' &&
- lc_map[p[1]] != 'r' &&
- lc_map[p[2]] != 'o' &&
- lc_map[p[3]] != 'm')) {
- return NULL;
- }
-
- p += sizeof ("from") - 1;
-
- /* We can now store from part */
- cur_part = rspamd_smtp_received_process_part (task, p, end - p,
- RSPAMD_RECEIVED_PART_FROM, &pos);
-
- if (!cur_part) {
- return NULL;
- }
-
- g_assert (pos != 0);
- p += pos;
- len = end > p ? end - p : 0;
- DL_APPEND (head, cur_part);
-
- if (len > 2 && (lc_map[p[0]] == 'b' &&
- lc_map[p[1]] == 'y')) {
- p += sizeof ("by") - 1;
-
- cur_part = rspamd_smtp_received_process_part (task, p, end - p,
- RSPAMD_RECEIVED_PART_BY, &pos);
-
- if (!cur_part) {
- return NULL;
- }
-
- g_assert (pos != 0);
- p += pos;
- len = end > p ? end - p : 0;
- DL_APPEND (head, cur_part);
- }
-
- while (p < end) {
- if (*p == ';') {
- /* We are at the date separator, stop here */
- *date_pos = p - (const guchar *)data + 1;
- break;
- }
- else {
- if (len > sizeof ("with") && (lc_map[p[0]] == 'w' &&
- lc_map[p[1]] == 'i' &&
- lc_map[p[2]] == 't' &&
- lc_map[p[3]] == 'h')) {
- p += sizeof ("with") - 1;
-
- cur_part = rspamd_smtp_received_process_part (task, p, end - p,
- RSPAMD_RECEIVED_PART_WITH, &pos);
- }
- else if (len > sizeof ("for") && (lc_map[p[0]] == 'f' &&
- lc_map[p[1]] == 'o' &&
- lc_map[p[2]] == 'r')) {
- p += sizeof ("for") - 1;
- cur_part = rspamd_smtp_received_process_part (task, p, end - p,
- RSPAMD_RECEIVED_PART_FOR, &pos);
- }
- else if (len > sizeof ("id") && (lc_map[p[0]] == 'i' &&
- lc_map[p[1]] == 'd')) {
- p += sizeof ("id") - 1;
- cur_part = rspamd_smtp_received_process_part (task, p, end - p,
- RSPAMD_RECEIVED_PART_ID, &pos);
- }
- else {
- while (p < end) {
- if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) {
- p ++;
- }
- else {
- break;
- }
- }
-
- if (p == end) {
- return NULL;
- }
- else if (*p == ';') {
- *date_pos = p - (const guchar *)data + 1;
- break;
- }
- else {
- cur_part = rspamd_smtp_received_process_part (task, p, end - p,
- RSPAMD_RECEIVED_PART_UNKNOWN, &pos);
- }
- }
-
- if (!cur_part) {
- p ++;
- len = end > p ? end - p : 0;
- }
- else {
- g_assert (pos != 0);
- p += pos;
- len = end > p ? end - p : 0;
- DL_APPEND (head, cur_part);
- }
- }
- }
-
- return head;
-}
-
-static gboolean
-rspamd_smtp_received_process_rdns (struct rspamd_task *task,
- const gchar *begin,
- gsize len,
- const gchar **pdest)
-{
- const gchar *p, *end;
- gsize hlen = 0;
- gboolean seen_dot = FALSE;
-
- p = begin;
- end = begin + len;
-
- if (len == 0) {
- return FALSE;
- }
-
- if (*p == '[' && *(end - 1) == ']' && len > 2) {
- /* We have enclosed ip address */
- rspamd_inet_addr_t *addr = rspamd_parse_inet_address_pool (p + 1,
- (end - p) - 2,
- task->task_pool,
- RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
-
- if (addr) {
- const gchar *addr_str;
- gchar *dest;
-
- if (rspamd_inet_address_get_port (addr) != 0) {
- addr_str = rspamd_inet_address_to_string_pretty (addr);
- }
- else {
- addr_str = rspamd_inet_address_to_string (addr);
- }
- dest = rspamd_mempool_strdup (task->task_pool, addr_str);
- *pdest = dest;
-
- return TRUE;
- }
- }
-
- while (p < end) {
- if (!g_ascii_isspace (*p) && rspamd_url_is_domain (*p)) {
- if (*p == '.') {
- seen_dot = TRUE;
- }
-
- hlen ++;
- }
- else {
- break;
- }
-
- p ++;
- }
-
- if (hlen > 0) {
- if (p == end) {
- /* All data looks like a hostname */
- gchar *dest;
-
- dest = rspamd_mempool_alloc (task->task_pool,
- hlen + 1);
- rspamd_strlcpy (dest, begin, hlen + 1);
- *pdest = dest;
-
- return TRUE;
- }
- else if (seen_dot && (g_ascii_isspace (*p) || *p == '[' || *p == '(')) {
- gchar *dest;
-
- dest = rspamd_mempool_alloc (task->task_pool,
- hlen + 1);
- rspamd_strlcpy (dest, begin, hlen + 1);
- *pdest = dest;
-
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-static gboolean
-rspamd_smtp_received_process_host_tcpinfo (struct rspamd_task *task,
- struct rspamd_received_header *rh,
- const gchar *data,
- gsize len)
-{
- rspamd_inet_addr_t *addr = NULL;
- gboolean ret = FALSE;
-
- if (data[0] == '[') {
- /* Likely Exim version */
-
- const gchar *brace_pos = memchr (data, ']', len);
-
- if (brace_pos) {
- addr = rspamd_parse_inet_address_pool (data + 1,
- brace_pos - data - 1,
- task->task_pool,
- RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
-
- if (addr) {
- rh->addr = addr;
- rh->real_ip = rspamd_mempool_strdup (task->task_pool,
- rspamd_inet_address_to_string (addr));
- rh->from_ip = rh->real_ip;
- }
- }
- }
- else {
- if (g_ascii_isxdigit (data[0])) {
- /* Try to parse IP address */
- addr = rspamd_parse_inet_address_pool (data,
- len, task->task_pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
- if (addr) {
- rh->addr = addr;
- rh->real_ip = rspamd_mempool_strdup (task->task_pool,
- rspamd_inet_address_to_string (addr));
- rh->from_ip = rh->real_ip;
- }
- }
-
- if (!addr) {
- /* Try canonical Postfix version: rdns [ip] */
- const gchar *obrace_pos = memchr (data, '[', len),
- *ebrace_pos, *dend;
-
- if (obrace_pos) {
- dend = data + len;
- ebrace_pos = memchr (obrace_pos, ']', dend - obrace_pos);
-
- if (ebrace_pos) {
- addr = rspamd_parse_inet_address_pool (obrace_pos + 1,
- ebrace_pos - obrace_pos - 1,
- task->task_pool,
- RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
-
- if (addr) {
- rh->addr = addr;
- rh->real_ip = rspamd_mempool_strdup (task->task_pool,
- rspamd_inet_address_to_string (addr));
- rh->from_ip = rh->real_ip;
-
- /* Process with rDNS */
- if (rspamd_smtp_received_process_rdns (task,
- data,
- obrace_pos - data,
- &rh->real_hostname)) {
- ret = TRUE;
- }
- }
- }
- }
- else {
- /* Hostname or some crap, sigh... */
- if (rspamd_smtp_received_process_rdns (task,
- data,
- len,
- &rh->real_hostname)) {
- ret = TRUE;
- }
- }
- }
- }
-
- return ret;
-}
-
-static void
-rspamd_smtp_received_process_from (struct rspamd_task *task,
- struct rspamd_received_part *rpart,
- struct rspamd_received_header *rh)
-{
- if (rpart->dlen > 0) {
- /* We have seen multiple cases:
- * - [ip] (hostname/unknown [real_ip])
- * - helo (hostname/unknown [real_ip])
- * - [ip]
- * - hostname
- * - hostname ([ip]:port helo=xxx)
- * Maybe more...
- */
- gboolean seen_ip_in_data = FALSE;
-
- if (rpart->head_comment && rpart->head_comment->dlen > 0) {
- /* We can have info within comment as part of RFC */
- rspamd_smtp_received_process_host_tcpinfo (
- task, rh,
- rpart->head_comment->data, rpart->head_comment->dlen);
- }
-
- if (!rh->real_ip) {
- if (rpart->data[0] == '[') {
- /* No comment, just something that looks like SMTP IP */
- const gchar *brace_pos = memchr (rpart->data, ']', rpart->dlen);
- rspamd_inet_addr_t *addr;
-
- if (brace_pos) {
- addr = rspamd_parse_inet_address_pool (rpart->data + 1,
- brace_pos - rpart->data - 1,
- task->task_pool,
- RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
-
- if (addr) {
- seen_ip_in_data = TRUE;
- rh->addr = addr;
- rh->real_ip = rspamd_mempool_strdup (task->task_pool,
- rspamd_inet_address_to_string (addr));
- rh->from_ip = rh->real_ip;
- }
- }
- }
- else if (g_ascii_isxdigit (rpart->data[0])) {
- /* Try to parse IP address */
- rspamd_inet_addr_t *addr;
- addr = rspamd_parse_inet_address_pool (rpart->data,
- rpart->dlen, task->task_pool,
- RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
- if (addr) {
- seen_ip_in_data = TRUE;
- rh->addr = addr;
- rh->real_ip = rspamd_mempool_strdup (task->task_pool,
- rspamd_inet_address_to_string (addr));
- rh->from_ip = rh->real_ip;
- }
- }
- }
-
- if (!seen_ip_in_data) {
- if (rh->real_ip) {
- /* Get anounced hostname (usually helo) */
- rspamd_smtp_received_process_rdns (task,
- rpart->data,
- rpart->dlen,
- &rh->from_hostname);
- }
- else {
- rspamd_smtp_received_process_host_tcpinfo (task,
- rh, rpart->data, rpart->dlen);
- }
- }
- }
- else {
- /* rpart->dlen = 0 */
-
- if (rpart->head_comment && rpart->head_comment->dlen > 0) {
- rspamd_smtp_received_process_host_tcpinfo (task,
- rh,
- rpart->head_comment->data,
- rpart->head_comment->dlen);
- }
- }
-}
-
-int
-rspamd_smtp_received_parse (struct rspamd_task *task,
- const char *data,
- size_t len,
- struct rspamd_received_header *rh)
-{
- goffset date_pos = -1;
- struct rspamd_received_part *head, *cur;
- rspamd_ftok_t t1, t2;
-
- head = rspamd_smtp_received_spill (task, data, len, &date_pos);
-
- if (head == NULL) {
- return -1;
- }
-
- rh->flags = RSPAMD_RECEIVED_UNKNOWN;
-
- DL_FOREACH (head, cur) {
- switch (cur->type) {
- case RSPAMD_RECEIVED_PART_FROM:
- rspamd_smtp_received_process_from (task, cur, rh);
- break;
- case RSPAMD_RECEIVED_PART_BY:
- rspamd_smtp_received_process_rdns (task,
- cur->data,
- cur->dlen,
- &rh->by_hostname);
- break;
- case RSPAMD_RECEIVED_PART_WITH:
- t1.begin = cur->data;
- t1.len = cur->dlen;
-
- if (t1.len > 0) {
- RSPAMD_FTOK_ASSIGN (&t2, "smtp");
-
- if (rspamd_ftok_cmp (&t1, &t2) == 0) {
- rh->flags = RSPAMD_RECEIVED_SMTP;
- }
-
- RSPAMD_FTOK_ASSIGN (&t2, "esmtp");
-
- if (rspamd_ftok_starts_with (&t1, &t2)) {
- /*
- * esmtp, esmtps, esmtpsa
- */
- if (t1.len == t2.len + 1) {
- if (t1.begin[t2.len] == 'a') {
- rh->flags = RSPAMD_RECEIVED_ESMTPA;
- rh->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED;
- }
- else if (t1.begin[t2.len] == 's') {
- rh->flags = RSPAMD_RECEIVED_ESMTPS;
- rh->flags |= RSPAMD_RECEIVED_FLAG_SSL;
- }
- continue;
- }
- else if (t1.len == t2.len + 2) {
- if (t1.begin[t2.len] == 's' &&
- t1.begin[t2.len + 1] == 'a') {
- rh->flags = RSPAMD_RECEIVED_ESMTPSA;
- rh->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED;
- rh->flags |= RSPAMD_RECEIVED_FLAG_SSL;
- }
- continue;
- }
- else if (t1.len == t2.len) {
- rh->flags = RSPAMD_RECEIVED_ESMTP;
- continue;
- }
- }
-
- RSPAMD_FTOK_ASSIGN (&t2, "lmtp");
-
- if (rspamd_ftok_cmp (&t1, &t2) == 0) {
- rh->flags = RSPAMD_RECEIVED_LMTP;
- continue;
- }
-
- RSPAMD_FTOK_ASSIGN (&t2, "imap");
-
- if (rspamd_ftok_cmp (&t1, &t2) == 0) {
- rh->flags = RSPAMD_RECEIVED_IMAP;
- continue;
- }
-
- RSPAMD_FTOK_ASSIGN (&t2, "local");
-
- if (rspamd_ftok_cmp (&t1, &t2) == 0) {
- rh->flags = RSPAMD_RECEIVED_LOCAL;
- continue;
- }
-
- RSPAMD_FTOK_ASSIGN (&t2, "http");
-
- if (rspamd_ftok_starts_with (&t1, &t2)) {
- if (t1.len == t2.len + 1) {
- if (t1.begin[t2.len] == 's') {
- rh->flags = RSPAMD_RECEIVED_HTTP;
- rh->flags |= RSPAMD_RECEIVED_FLAG_SSL;
- }
- }
- else if (t1.len == t2.len) {
- rh->flags = RSPAMD_RECEIVED_HTTP;
- }
-
- continue;
- }
- }
-
- break;
- case RSPAMD_RECEIVED_PART_FOR:
- rh->for_addr = rspamd_email_address_from_smtp (cur->data, cur->dlen);
-
- if (rh->for_addr) {
- if (rh->for_addr->addr_len > 0) {
- t1.begin = rh->for_addr->addr;
- t1.len = rh->for_addr->addr_len;
- rh->for_mbox = rspamd_mempool_ftokdup (task->task_pool,
- &t1);
- }
-
- rspamd_mempool_add_destructor (task->task_pool,
- (rspamd_mempool_destruct_t)rspamd_email_address_free,
- rh->for_addr);
- }
- break;
- default:
- /* Do nothing */
- break;
- }
- }
-
- if (rh->real_ip && !rh->from_ip) {
- rh->from_ip = rh->real_ip;
- }
-
- if (rh->real_hostname && !rh->from_hostname) {
- rh->from_hostname = rh->real_hostname;
- }
-
- if (date_pos > 0 && date_pos < len) {
- rh->timestamp = rspamd_parse_smtp_date (data + date_pos,
- len - date_pos, NULL);
- }
-
- return 0;
-}
-
struct rspamd_mime_header *
rspamd_message_get_header_from_hash (struct rspamd_mime_headers_table *hdrs,
const gchar *field,
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index f24b0d6c6..07a64c31e 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -72,52 +72,6 @@ struct rspamd_mime_header {
struct rspamd_mime_headers_table;
-enum rspamd_received_type {
- RSPAMD_RECEIVED_SMTP = 1u << 0u,
- RSPAMD_RECEIVED_ESMTP = 1u << 1u,
- RSPAMD_RECEIVED_ESMTPA = 1u << 2u,
- RSPAMD_RECEIVED_ESMTPS = 1u << 3u,
- RSPAMD_RECEIVED_ESMTPSA = 1u << 4u,
- RSPAMD_RECEIVED_LMTP = 1u << 5u,
- RSPAMD_RECEIVED_IMAP = 1u << 6u,
- RSPAMD_RECEIVED_LOCAL = 1u << 7u,
- RSPAMD_RECEIVED_HTTP = 1u << 8u,
- RSPAMD_RECEIVED_MAPI = 1u << 9u,
- RSPAMD_RECEIVED_UNKNOWN = 1u << 10u,
- RSPAMD_RECEIVED_FLAG_ARTIFICIAL = (1u << 11u),
- RSPAMD_RECEIVED_FLAG_SSL = (1u << 12u),
- RSPAMD_RECEIVED_FLAG_AUTHENTICATED = (1u << 13u),
-};
-
-#define RSPAMD_RECEIVED_FLAG_TYPE_MASK (RSPAMD_RECEIVED_SMTP| \
- RSPAMD_RECEIVED_ESMTP| \
- RSPAMD_RECEIVED_ESMTPA| \
- RSPAMD_RECEIVED_ESMTPS| \
- RSPAMD_RECEIVED_ESMTPSA| \
- RSPAMD_RECEIVED_LMTP| \
- RSPAMD_RECEIVED_IMAP| \
- RSPAMD_RECEIVED_LOCAL| \
- RSPAMD_RECEIVED_HTTP| \
- RSPAMD_RECEIVED_MAPI| \
- RSPAMD_RECEIVED_UNKNOWN)
-
-struct rspamd_email_address;
-
-struct rspamd_received_header {
- const gchar *from_hostname;
- const gchar *from_ip;
- const gchar *real_hostname;
- const gchar *real_ip;
- const gchar *by_hostname;
- const gchar *for_mbox;
- struct rspamd_email_address *for_addr;
- rspamd_inet_addr_t *addr;
- struct rspamd_mime_header *hdr;
- time_t timestamp;
- gint flags; /* See enum rspamd_received_type */
- struct rspamd_received_header *prev, *next;
-};
-
/**
* Process headers and store them in `target`
* @param task
diff --git a/src/libmime/received.cxx b/src/libmime/received.cxx
new file mode 100644
index 000000000..78c9f1841
--- /dev/null
+++ b/src/libmime/received.cxx
@@ -0,0 +1,745 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "received.h"
+#include "libserver/task.h"
+#include "libserver/url.h"
+#include "mime_string.hxx"
+#include "smtp_parsers.h"
+#include "message.h"
+
+#include <vector>
+#include <string_view>
+#include <utility>
+#include "frozen/string.h"
+#include "frozen/unordered_map.h"
+
+namespace rspamd::mime {
+
+enum class received_part_type {
+ RSPAMD_RECEIVED_PART_FROM,
+ RSPAMD_RECEIVED_PART_BY,
+ RSPAMD_RECEIVED_PART_FOR,
+ RSPAMD_RECEIVED_PART_WITH,
+ RSPAMD_RECEIVED_PART_ID,
+ RSPAMD_RECEIVED_PART_UNKNOWN,
+};
+
+static inline auto
+received_char_filter(UChar32 uc) -> UChar32
+{
+ if (u_isprint(uc)) {
+ return u_tolower(uc);
+ }
+
+ return 0;
+}
+
+
+struct received_header {
+ mime_string from_hostname;
+ std::string_view from_ip;
+ mime_string real_hostname;
+ mime_string real_ip;
+ mime_string by_hostname;
+ std::string_view for_mbox;
+ struct rspamd_email_address *for_addr = nullptr;
+ rspamd_inet_addr_t *addr = nullptr;
+ struct rspamd_mime_header *hdr = nullptr;
+ time_t timestamp = 0;
+ int flags = 0; /* See enum rspamd_received_type */
+
+ received_header() noexcept
+ : from_hostname(received_char_filter),
+ real_hostname(received_char_filter),
+ real_ip(received_char_filter),
+ by_hostname(received_char_filter),
+ for_mbox(received_char_filter) {}
+
+ ~received_header() {
+ if (for_addr) {
+ rspamd_email_address_free(for_addr);
+ }
+ }
+};
+
+class received_header_chain {
+public:
+ explicit received_header_chain(struct rspamd_task *_task) : task(_task) {
+ headers.reserve(2);
+ rspamd_mempool_add_destructor(task->task_pool,
+ received_header_chain::received_header_chain_pool_dtor, this);
+ }
+
+ auto new_received() -> received_header & {
+ headers.emplace_back();
+ return headers.back();
+ }
+private:
+ static auto received_header_chain_pool_dtor(void *ptr) -> void {
+ delete static_cast<received_header_chain *>(ptr);
+ }
+ std::vector<received_header> headers;
+ struct rspamd_task *task;
+};
+
+struct received_part {
+ received_part_type type;
+ mime_string data;
+ std::vector<mime_string> comments;
+
+ explicit received_part(received_part_type t)
+ : type(t),
+ data(received_char_filter) {}
+};
+
+static inline auto
+received_part_set_or_append(struct rspamd_task *task,
+ const gchar *begin,
+ gsize len,
+ mime_string &dest) -> void
+{
+ if (len == 0) {
+ return;
+ }
+
+ dest.append(begin, len);
+ dest.trim(" \t");
+}
+
+static auto
+received_process_part(struct rspamd_task *task,
+ const std::string_view &data,
+ received_part_type type,
+ std::ptrdiff_t &last,
+ received_part &npart) -> bool
+{
+ auto obraces = 0, ebraces = 0;
+ auto seen_tcpinfo = false;
+ enum _parse_state {
+ skip_spaces,
+ in_comment,
+ read_data,
+ read_tcpinfo,
+ all_done
+ } state, next_state;
+
+ /* In this function, we just process comments and data separately */
+ const auto *p = data.data();
+ const auto *end = p + data.size();
+ const auto *c = p;
+
+ state = skip_spaces;
+ next_state = read_data;
+
+ while (p < end) {
+ switch (state) {
+ case skip_spaces:
+ if (!g_ascii_isspace(*p)) {
+ c = p;
+ state = next_state;
+ }
+ else {
+ p++;
+ }
+ break;
+ case in_comment:
+ if (*p == '(') {
+ obraces++;
+ }
+ else if (*p == ')') {
+ ebraces++;
+
+ if (ebraces >= obraces) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ if (p > c) {
+ npart.comments.emplace_back(received_char_filter);
+ auto &comment = npart.comments.back();
+ received_part_set_or_append(task,
+ c, p - c,
+ comment);
+ }
+ }
+
+ p++;
+ c = p;
+ state = skip_spaces;
+ next_state = read_data;
+
+ continue;
+ }
+ }
+
+ p++;
+ break;
+ case read_data:
+ if (*p == '(') {
+ if (p > c) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ received_part_set_or_append(task,
+ c, p - c,
+ npart.data);
+ }
+ }
+
+ state = in_comment;
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ c = p;
+ }
+ else if (g_ascii_isspace (*p)) {
+ if (p > c) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ received_part_set_or_append(task,
+ c, p - c,
+ npart.data);
+ }
+ }
+
+ state = skip_spaces;
+ next_state = read_data;
+ c = p;
+ }
+ else if (*p == ';') {
+ /* It is actually delimiter of date part if not in the comments */
+ if (p > c) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ received_part_set_or_append(task,
+ c, p - c,
+ npart.data);
+ }
+ }
+
+ state = all_done;
+ continue;
+ }
+ else if (npart.data.size() > 0) {
+ /* We have already received data and find something with no ( */
+ if (!seen_tcpinfo && type == received_part_type::RSPAMD_RECEIVED_PART_FROM) {
+ /* Check if we have something special here, such as TCPinfo */
+ if (*c == '[') {
+ state = read_tcpinfo;
+ p++;
+ }
+ else {
+ state = all_done;
+ continue;
+ }
+ }
+ else {
+ state = all_done;
+ continue;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case read_tcpinfo:
+ if (*p == ']') {
+ received_part_set_or_append(task,
+ c, p - c + 1,
+ npart.data);
+ seen_tcpinfo = TRUE;
+ state = skip_spaces;
+ next_state = read_data;
+ c = p;
+ }
+ p++;
+ break;
+ case all_done:
+ if (p > data.data()) {
+ last = p - data.data();
+ return true;
+ }
+ else {
+ /* Empty element */
+ return false;
+ }
+ break;
+ }
+ }
+
+ /* Leftover */
+ switch (state) {
+ case read_data:
+ if (p > c) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ received_part_set_or_append(task,
+ c, p - c,
+ npart.data);
+ }
+
+ last = p - data.data();
+
+ return true;
+ }
+ break;
+ case skip_spaces:
+ if (p > data.data()) {
+ last = p - data.data();
+
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
+template <std::size_t N>
+constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool
+{
+ for (auto i = 0; i < N; i ++) {
+ if (lc_map[(unsigned char)in[i]] != lit[i]) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static auto
+received_spill(struct rspamd_task *task,
+ const std::string_view &in,
+ std::ptrdiff_t &date_pos) -> std::vector<received_part>
+{
+ std::vector<received_part> parts;
+ std::ptrdiff_t pos = 0;
+
+ const auto *p = in.data();
+ const auto *end = p + in.size();
+
+ while (p < end && g_ascii_isspace (*p)) {
+ p++;
+ }
+
+ auto len = end - p;
+
+ /* Ignore all received but those started from from part */
+ if (len <= 4 || !lit_compare_lowercase<4>("from", p)) {
+ return {};
+ }
+
+ p += sizeof("from") - 1;
+
+ auto maybe_process_part = [&](received_part_type what) -> bool {
+ parts.emplace_back(what);
+ auto &rcvd_part = parts.back();
+ auto chunk = std::string_view{p, (std::size_t)(end - p)};
+
+ if (!received_process_part(task, chunk, what, pos, rcvd_part)) {
+ parts.pop_back();
+
+ return false;
+ }
+
+ return true;
+ };
+
+ /* We can now store from part */
+ if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FROM)) {
+ return {};
+ }
+
+ g_assert (pos != 0);
+ p += pos;
+ len = end > p ? end - p : 0;
+
+ if (len > 2 && lit_compare_lowercase<2>("by", p)) {
+ p += sizeof("by") - 1;
+
+ if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_BY)) {
+ return {};
+ }
+
+ g_assert (pos != 0);
+ p += pos;
+ len = end > p ? end - p : 0;
+ }
+
+ while (p < end) {
+ bool got_part = false;
+ if (*p == ';') {
+ /* We are at the date separator, stop here */
+ date_pos = p - in.data() + 1;
+ break;
+ }
+ else {
+ if (len > sizeof("with") && lit_compare_lowercase<4>("with", p)) {
+ p += sizeof("with") - 1;
+
+ got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_WITH);
+ }
+ else if (len > sizeof("for") && lit_compare_lowercase<3>("for", p)) {
+ p += sizeof("for") - 1;
+ got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FOR);
+ }
+ else if (len > sizeof("id") && lit_compare_lowercase<2>("id", p)) {
+ p += sizeof("id") - 1;
+ got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_ID);
+ }
+ else {
+ while (p < end) {
+ if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) {
+ p++;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (p == end) {
+ return {};
+ }
+ else if (*p == ';') {
+ date_pos = p - in.data() + 1;
+ break;
+ }
+ else {
+ got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN);
+ }
+ }
+
+ if (!got_part) {
+ p++;
+ len = end > p ? end - p : 0;
+ }
+ else {
+ g_assert (pos != 0);
+ p += pos;
+ len = end > p ? end - p : 0;
+ }
+ }
+ }
+
+ return parts;
+}
+
+#define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \
+ (rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE|RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)
+
+static auto
+received_process_rdns(struct rspamd_task *task,
+ const std::string_view &in,
+ mime_string &dest) -> bool
+{
+ auto seen_dot = false;
+
+ const auto *p = in.data();
+ const auto *end = p + in.size();
+
+ if (in.empty()) {
+ return false;
+ }
+
+ if (*p == '[' && *(end - 1) == ']' && in.size() > 2) {
+ /* We have enclosed ip address */
+ auto *addr = rspamd_parse_inet_address_pool(p + 1,
+ (end - p) - 2,
+ task->task_pool,
+ RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+ if (addr) {
+ const gchar *addr_str;
+
+ if (rspamd_inet_address_get_port(addr) != 0) {
+ addr_str = rspamd_inet_address_to_string_pretty(addr);
+ }
+ else {
+ addr_str = rspamd_inet_address_to_string(addr);
+ }
+
+ dest.assign_copy(std::string_view{addr_str});
+
+ return true;
+ }
+ }
+
+ auto hlen = 0u;
+
+ while (p < end) {
+ if (!g_ascii_isspace(*p) && rspamd_url_is_domain(*p)) {
+ if (*p == '.') {
+ seen_dot = true;
+ }
+
+ hlen++;
+ }
+ else {
+ break;
+ }
+
+ p++;
+ }
+
+ if (hlen > 0) {
+ if (p == end || (seen_dot && (g_ascii_isspace(*p) || *p == '[' || *p == '('))) {
+ /* All data looks like a hostname */
+ dest.assign_copy(std::string_view{in.data(), hlen});
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static auto
+received_process_host_tcpinfo(struct rspamd_task *task,
+ received_header &rh,
+ const std::string_view &in) -> bool
+{
+ rspamd_inet_addr_t *addr = nullptr;
+ auto ret = false;
+
+ if (in.empty()) {
+ return false;
+ }
+
+ if (in[0] == '[') {
+ /* Likely Exim version */
+
+ auto brace_pos = in.find(']');
+
+ if (brace_pos != std::string_view::npos) {
+ auto substr_addr = in.substr(1, brace_pos - 1);
+ addr = rspamd_parse_inet_address_pool(substr_addr.data(),
+ substr_addr.size(),
+ task->task_pool,
+ RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+ if (addr) {
+ rh.addr = addr;
+ rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+ rh.from_ip = rh.real_ip.as_view();
+ }
+ }
+ }
+ else {
+ if (g_ascii_isxdigit(in[0])) {
+ /* Try to parse IP address */
+ addr = rspamd_parse_inet_address_pool(in.data(),
+ in.size(), task->task_pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+ if (addr) {
+ rh.addr = addr;
+ rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+ rh.from_ip = rh.real_ip.as_view();
+ }
+ }
+
+ if (!addr) {
+ /* Try canonical Postfix version: rdns [ip] */
+ auto obrace_pos = in.find('[');
+
+ if (obrace_pos != std::string_view::npos) {
+ auto ebrace_pos = in.rfind(']', obrace_pos);
+
+ if (ebrace_pos != std::string_view::npos) {
+ auto substr_addr = in.substr(obrace_pos + 1,
+ ebrace_pos - obrace_pos - 1);
+ addr = rspamd_parse_inet_address_pool(substr_addr.data(),
+ substr_addr.size(),
+ task->task_pool,
+ RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+ if (addr) {
+ rh.addr = addr;
+ rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+ rh.from_ip = rh.real_ip.as_view();
+
+ /* Process with rDNS */
+ auto rdns_substr = in.substr(0, obrace_pos);
+
+ if (received_process_rdns(task,
+ rdns_substr,
+ rh.real_hostname)) {
+ ret = true;
+ }
+ }
+ }
+ }
+ else {
+ /* Hostname or some crap, sigh... */
+ if (received_process_rdns(task, in, rh.real_hostname)) {
+ ret = true;
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+static void
+received_process_from(struct rspamd_task *task,
+ const received_part &rpart,
+ received_header &rh)
+{
+ if (rpart.data.size() > 0) {
+ /* We have seen multiple cases:
+ * - [ip] (hostname/unknown [real_ip])
+ * - helo (hostname/unknown [real_ip])
+ * - [ip]
+ * - hostname
+ * - hostname ([ip]:port helo=xxx)
+ * Maybe more...
+ */
+ auto seen_ip_in_data = false;
+
+ if (!rpart.comments.empty()) {
+ /* We can have info within comment as part of RFC */
+ received_process_host_tcpinfo(
+ task, rh,
+ rpart.comments[0].as_view());
+ }
+
+ if (rh.real_ip.size() == 0) {
+ /* Try to do the same with data */
+ if (received_process_host_tcpinfo(
+ task, rh,
+ rpart.data.as_view())) {
+ seen_ip_in_data = true;
+ }
+ }
+
+ if (!seen_ip_in_data) {
+ if (rh.real_ip.size() != 0) {
+ /* Get anounced hostname (usually helo) */
+ received_process_rdns(task,
+ rpart.data.as_view(),
+ rh.from_hostname);
+ }
+ else {
+ received_process_host_tcpinfo(task,
+ rh, rpart.data.as_view());
+ }
+ }
+ }
+ else {
+ /* rpart->dlen = 0 */
+ if (!rpart.comments.empty()) {
+ received_process_host_tcpinfo(
+ task, rh,
+ rpart.comments[0].as_view());
+ }
+ }
+}
+
+auto
+received_header_parse(struct rspamd_task *task, const std::string_view &in,
+ struct rspamd_mime_header *hdr) -> bool
+{
+ std::ptrdiff_t date_pos = -1;
+
+ static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, int>({
+ {"smtp", RSPAMD_RECEIVED_SMTP},
+ {"esmtp", RSPAMD_RECEIVED_ESMTP},
+ {"esmtpa", RSPAMD_RECEIVED_ESMTPA | RSPAMD_RECEIVED_FLAG_AUTHENTICATED},
+ {"esmtpsa", RSPAMD_RECEIVED_ESMTPSA | RSPAMD_RECEIVED_FLAG_SSL | RSPAMD_RECEIVED_FLAG_AUTHENTICATED},
+ {"esmtps", RSPAMD_RECEIVED_ESMTPS | RSPAMD_RECEIVED_FLAG_SSL},
+ {"lmtp", RSPAMD_RECEIVED_LMTP},
+ {"imap", RSPAMD_RECEIVED_IMAP},
+ {"imaps", RSPAMD_RECEIVED_IMAP | RSPAMD_RECEIVED_FLAG_SSL},
+ {"http", RSPAMD_RECEIVED_HTTP},
+ {"https", RSPAMD_RECEIVED_HTTP | RSPAMD_RECEIVED_FLAG_SSL},
+ {"local", RSPAMD_RECEIVED_LOCAL}
+ });
+
+ auto parts = received_spill(task, in, date_pos);
+
+ if (parts.empty()) {
+ return false;
+ }
+
+ auto *recv_chain_ptr = static_cast<received_header_chain *>(MESSAGE_FIELD(task, received_headers));
+
+ if (recv_chain_ptr == nullptr) {
+ /* This constructor automatically registers dtor in mempool */
+ recv_chain_ptr = new received_header_chain(task);
+ MESSAGE_FIELD(task, received_headers) = (void *)recv_chain_ptr;
+ }
+
+ auto &rh = recv_chain_ptr->new_received();
+
+ rh.flags = RSPAMD_RECEIVED_UNKNOWN;
+ rh.hdr = hdr;
+
+ for (const auto &part : parts) {
+ switch (part.type) {
+ case received_part_type::RSPAMD_RECEIVED_PART_FROM:
+ received_process_from(task, part, rh);
+ break;
+ case received_part_type::RSPAMD_RECEIVED_PART_BY:
+ received_process_rdns(task,
+ part.data.as_view(),
+ rh.by_hostname);
+ break;
+ case received_part_type::RSPAMD_RECEIVED_PART_WITH:
+ if (part.data.size() > 0) {
+ auto proto_flag_it = protos_map.find(part.data.as_view());
+
+ if (proto_flag_it != protos_map.end()) {
+ rh.flags = proto_flag_it->second;
+ }
+ }
+ break;
+ case received_part_type::RSPAMD_RECEIVED_PART_FOR:
+ rh.for_addr = rspamd_email_address_from_smtp(part.data.data(),
+ part.data.size());
+
+ if (rh.for_addr) {
+ if (rh.for_addr->addr_len > 0) {
+ rh.for_mbox = std::string_view{rh.for_addr->addr,
+ rh.for_addr->addr_len};
+ }
+ }
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+ }
+
+ if (!rh.real_ip.empty() && rh.from_ip.empty()) {
+ rh.from_ip = rh.real_ip.as_view();
+ }
+
+ if (!rh.real_hostname.empty() && rh.from_hostname.empty()) {
+ rh.from_hostname.assign_copy(rh.real_hostname);
+ }
+
+ if (date_pos > 0 && date_pos < in.size()) {
+ auto date_sub = in.substr(date_pos);
+ rh.timestamp = rspamd_parse_smtp_date((const unsigned char*)date_sub.data(),
+ date_sub.size(), nullptr);
+ }
+
+ return true;
+}
+
+} // namespace rspamd::mime
+
+bool
+rspamd_received_header_parse(struct rspamd_task *task,
+ const char *data, size_t sz,
+ struct rspamd_mime_header *hdr)
+{
+ return rspamd::mime::received_header_parse(task, std::string_view{data, sz}, hdr);
+}
diff --git a/src/libmime/received.h b/src/libmime/received.h
new file mode 100644
index 000000000..bc3c31e0d
--- /dev/null
+++ b/src/libmime/received.h
@@ -0,0 +1,69 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef RSPAMD_RECEIVED_H
+#define RSPAMD_RECEIVED_H
+
+#include "config.h"
+#include "libutil/addr.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_received_type {
+ RSPAMD_RECEIVED_SMTP = 1u << 0u,
+ RSPAMD_RECEIVED_ESMTP = 1u << 1u,
+ RSPAMD_RECEIVED_ESMTPA = 1u << 2u,
+ RSPAMD_RECEIVED_ESMTPS = 1u << 3u,
+ RSPAMD_RECEIVED_ESMTPSA = 1u << 4u,
+ RSPAMD_RECEIVED_LMTP = 1u << 5u,
+ RSPAMD_RECEIVED_IMAP = 1u << 6u,
+ RSPAMD_RECEIVED_LOCAL = 1u << 7u,
+ RSPAMD_RECEIVED_HTTP = 1u << 8u,
+ RSPAMD_RECEIVED_MAPI = 1u << 9u,
+ RSPAMD_RECEIVED_UNKNOWN = 1u << 10u,
+ RSPAMD_RECEIVED_FLAG_ARTIFICIAL = (1u << 11u),
+ RSPAMD_RECEIVED_FLAG_SSL = (1u << 12u),
+ RSPAMD_RECEIVED_FLAG_AUTHENTICATED = (1u << 13u),
+};
+
+#define RSPAMD_RECEIVED_FLAG_TYPE_MASK (RSPAMD_RECEIVED_SMTP| \
+ RSPAMD_RECEIVED_ESMTP| \
+ RSPAMD_RECEIVED_ESMTPA| \
+ RSPAMD_RECEIVED_ESMTPS| \
+ RSPAMD_RECEIVED_ESMTPSA| \
+ RSPAMD_RECEIVED_LMTP| \
+ RSPAMD_RECEIVED_IMAP| \
+ RSPAMD_RECEIVED_LOCAL| \
+ RSPAMD_RECEIVED_HTTP| \
+ RSPAMD_RECEIVED_MAPI| \
+ RSPAMD_RECEIVED_UNKNOWN)
+
+struct rspamd_email_address;
+struct rspamd_received_header_chain;
+struct rspamd_mime_header;
+
+bool rspamd_received_header_parse(struct rspamd_task *task,
+ const char *data, size_t sz, struct rspamd_mime_header *hdr);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif //RSPAMD_RECEIVED_H
diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h
index 7eff6bf71..0d2c4044d 100644
--- a/src/libmime/smtp_parsers.h
+++ b/src/libmime/smtp_parsers.h
@@ -27,10 +27,6 @@
extern "C" {
#endif
-int rspamd_smtp_received_parse (struct rspamd_task *task,
- const char *data, size_t len,
- struct rspamd_received_header *rh);
-
int rspamd_smtp_addr_parse (const char *data, size_t len,
struct rspamd_email_address *addr);