From: Vsevolod Stakhov Date: Wed, 6 Feb 2019 12:36:10 +0000 (+0000) Subject: [Project] Add spilling machine for received headers X-Git-Tag: 1.9.0~211 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=d525194397181456bba6edea4680a10403c3415c;p=rspamd.git [Project] Add spilling machine for received headers --- diff --git a/src/libmime/message.h b/src/libmime/message.h index 19e8b40b5..eb260cd77 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -125,36 +125,6 @@ struct rspamd_mime_text_part { guint unicode_scripts; }; -enum rspamd_received_type { - RSPAMD_RECEIVED_SMTP = 0, - RSPAMD_RECEIVED_ESMTP, - RSPAMD_RECEIVED_ESMTPA, - RSPAMD_RECEIVED_ESMTPS, - RSPAMD_RECEIVED_ESMTPSA, - RSPAMD_RECEIVED_LMTP, - RSPAMD_RECEIVED_IMAP, - RSPAMD_RECEIVED_UNKNOWN -}; - -#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0) -#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1) -#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2) - -struct received_header { - gchar *from_hostname; - gchar *from_ip; - gchar *real_hostname; - gchar *real_ip; - gchar *by_hostname; - gchar *for_mbox; - gchar *comment_ip; - rspamd_inet_addr_t *addr; - struct rspamd_mime_header *hdr; - time_t timestamp; - enum rspamd_received_type type; - gint flags; -}; - /** * Parse and pre-process mime message * @param task worker_task object diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index 2769ae633..19ad3262e 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -17,6 +17,7 @@ #include "mime_headers.h" #include "smtp_parsers.h" #include "mime_encoding.h" +#include "contrib/uthash/utlist.h" #include "libserver/mempool_vars_internal.h" #include @@ -848,3 +849,333 @@ rspamd_mime_message_id_generate (const gchar *fqdn) return g_string_free (out, FALSE); } + +enum rspamd_received_part_type { + RSPAMD_RECEIVED_PART_FROM, + RSPAMD_RECEIVED_PART_BY, + RSPAMD_RECEIVED_PART_FOR, + RSPAMD_RECEIVED_PART_WITH, + RSPAMD_RECEIVED_PART_UNKNOWN, +}; + +struct rspamd_received_comment { + gchar *data; + gsize dlen; + struct rspamd_received_comment *prev; +}; + +struct rspamd_received_part { + enum rspamd_received_part_type type; + gchar *data; + gsize dlen; + struct rspamd_received_comment *tail_comment; + struct rspamd_received_comment *head_comment; + struct rspamd_received_part *prev, *next; +}; + +static struct rspamd_received_part * +rspamd_smtp_received_process_part (struct rspamd_task *task, + const char *data, + size_t len, + enum rspamd_received_part_type type, + goffset *last) +{ + struct rspamd_received_part *npart; + const guchar *p, *c, *end; + guint obraces = 0, ebraces = 0; + enum _parse_state { + skip_spaces, + in_comment, + read_data, + all_done + } state, next_state; + + npart = rspamd_mempool_alloc0 (task->task_pool, sizeof (*npart)); + npart->type = type; + + /* In this function, we just process comments and data separately */ + p = data; + end = data + len; + c = data; + state = skip_spaces; + next_state = read_data; + + while (p < end) { + switch (state) { + case skip_spaces: + if (!g_ascii_isspace (*p)) { + c = p; + state = next_state; + } + else { + p ++; + } + break; + case in_comment: + if (*p == '(') { + obraces ++; + } + else if (*p == ')') { + ebraces ++; + + if (ebraces >= obraces) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + if (p > c) { + struct rspamd_received_comment *comment; + + comment = rspamd_mempool_alloc (task->task_pool, + sizeof (*comment)); + + comment->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (comment->data, c, p - c); + rspamd_str_lc (comment->data, p - c); + comment->dlen = p - c; + + if (!npart->head_comment) { + comment->prev = NULL; + npart->head_comment = comment; + npart->tail_comment = comment; + } + else { + comment->prev = npart->tail_comment; + npart->tail_comment = comment; + } + } + } + + p ++; + c = p; + state = skip_spaces; + next_state = read_data; + + continue; + } + } + + p ++; + break; + case read_data: + if (*p == '(') { + if (p > c) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + npart->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (npart->data, c, p - c); + rspamd_str_lc (npart->data, p - c); + npart->dlen = p - c; + } + } + + state = in_comment; + obraces = 1; + ebraces = 0; + p ++; + c = p; + } + else if (g_ascii_isspace (*p)) { + if (p > c) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + npart->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (npart->data, c, p - c); + rspamd_str_lc (npart->data, p - c); + npart->dlen = p - c; + } + } + + state = skip_spaces; + next_state = read_data; + c = p; + } + else if (*p == ';') { + /* It is actually delimiter of date part if not in the comments */ + if (p > c) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + npart->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (npart->data, c, p - c); + rspamd_str_lc (npart->data, p - c); + npart->dlen = p - c; + } + } + + state = all_done; + continue; + } + else if (npart->dlen > 0) { + /* We have already received data and find something with no ( */ + state = all_done; + continue; + } + else { + p ++; + } + break; + case all_done: + *last = p - (const guchar *)data; + return npart; + break; + } + } + + /* Leftover */ + switch (state) { + case read_data: + if (p > c) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + npart->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (npart->data, c, p - c); + rspamd_str_lc (npart->data, p - c); + npart->dlen = p - c; + } + + return npart; + } + break; + case skip_spaces: + return npart; + default: + break; + } + + return NULL; +} + +static struct rspamd_received_part * +rspamd_smtp_received_spill (struct rspamd_task *task, + const char *data, + size_t len, + goffset *date_pos) +{ + const guchar *p, *end; + struct rspamd_received_part *cur_part, *head = NULL; + goffset pos = 0; + + p = data; + end = data + len; + + while (p < end && g_ascii_isspace (*p)) { + p ++; + } + + len = end - p; + + /* Ignore all received but those started from from part */ + if (len <= 4 || (lc_map[p[0]] != 'f' && + lc_map[p[1]] != 'r' && + lc_map[p[2]] != 'o' && + lc_map[p[3]] != 'm')) { + return NULL; + } + + p += sizeof ("from") - 1; + + /* We can now store from part */ + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_FROM, &pos); + + if (!cur_part) { + return NULL; + } + + p += pos; + len = end > p ? end - p : 0; + DL_APPEND (head, cur_part); + + + if (len > 2 && (lc_map[p[0]] == 'b' && + lc_map[p[1]] == 'y')) { + p += sizeof ("by") - 1; + + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_BY, &pos); + + if (!cur_part) { + return NULL; + } + + p += pos; + len = end > p ? end - p : 0; + DL_APPEND (head, cur_part); + } + + while (p > end) { + if (*p == ';') { + /* We are at the date separator, stop here */ + *date_pos = p - (const guchar *)data + 1; + break; + } + else { + if (len > sizeof ("with") && (lc_map[p[0]] == 'w' && + lc_map[p[1]] == 'i' && + lc_map[p[2]] == 't' && + lc_map[p[3]] == 'h')) { + p += sizeof ("with") - 1; + + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_WITH, &pos); + } + else if (len > sizeof ("for") && (lc_map[p[0]] == 'f' && + lc_map[p[1]] == 'o' && + lc_map[p[2]] == 'r')) { + p += sizeof ("for") - 1; + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_FOR, &pos); + } + else { + while (p < end) { + if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) { + p ++; + } + else { + break; + } + } + + if (p == end) { + return NULL; + } + else if (*p == ';') { + *date_pos = p - (const guchar *)data + 1; + break; + } + else { + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_UNKNOWN, &pos); + } + } + + if (!cur_part) { + return NULL; + } + else { + p += pos; + len = end > p ? end - p : 0; + DL_APPEND (head, cur_part); + } + } + } + + return head; +} + +int +rspamd_smtp_received_parse (struct rspamd_task *task, + const char *data, + size_t len, + struct received_header *rh) +{ + const gchar *p, *c, *end; + goffset date_pos = 0; + struct rspamd_received_part *head, *cur; + + head = rspamd_smtp_received_spill (task, data, len, &date_pos); + + if (head == NULL) { + return -1; + } + + return 0; +} \ No newline at end of file diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index 3c0c23a36..ceed5ab06 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -18,6 +18,7 @@ #include "config.h" #include "libutil/mem_pool.h" +#include "libutil/addr.h" struct rspamd_task; @@ -55,6 +56,36 @@ struct rspamd_mime_header { gchar *decoded; }; +enum rspamd_received_type { + RSPAMD_RECEIVED_SMTP = 0, + RSPAMD_RECEIVED_ESMTP, + RSPAMD_RECEIVED_ESMTPA, + RSPAMD_RECEIVED_ESMTPS, + RSPAMD_RECEIVED_ESMTPSA, + RSPAMD_RECEIVED_LMTP, + RSPAMD_RECEIVED_IMAP, + RSPAMD_RECEIVED_UNKNOWN +}; + +#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0) +#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1) +#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2) + +struct received_header { + gchar *from_hostname; + gchar *from_ip; + gchar *real_hostname; + gchar *real_ip; + gchar *by_hostname; + gchar *for_mbox; + gchar *comment_ip; + rspamd_inet_addr_t *addr; + struct rspamd_mime_header *hdr; + time_t timestamp; + enum rspamd_received_type type; + gint flags; +}; + /** * Process headers and store them in `target` * @param task diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h index fdd390f22..6904bece0 100644 --- a/src/libmime/smtp_parsers.h +++ b/src/libmime/smtp_parsers.h @@ -34,6 +34,9 @@ rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding, const gchar **charset, gsize *charset_len, const gchar **encoded, gsize *encoded_len); +rspamd_inet_addr_t* rspamd_parse_smtp_ip (const char *data, size_t len, + rspamd_mempool_t *pool); + guint64 rspamd_parse_smtp_date (const char *data, size_t len); #endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */