diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-02-06 12:36:10 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-02-06 12:36:10 +0000 |
commit | d525194397181456bba6edea4680a10403c3415c (patch) | |
tree | 563b036764abad3be68cf6912613e6c78addf9a0 /src/libmime/mime_headers.c | |
parent | 659ec2d02a354f9132f3a02d27b0b60aa156c551 (diff) | |
download | rspamd-d525194397181456bba6edea4680a10403c3415c.tar.gz rspamd-d525194397181456bba6edea4680a10403c3415c.zip |
[Project] Add spilling machine for received headers
Diffstat (limited to 'src/libmime/mime_headers.c')
-rw-r--r-- | src/libmime/mime_headers.c | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index 2769ae633..19ad3262e 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -17,6 +17,7 @@ #include "mime_headers.h" #include "smtp_parsers.h" #include "mime_encoding.h" +#include "contrib/uthash/utlist.h" #include "libserver/mempool_vars_internal.h" #include <unicode/utf8.h> @@ -848,3 +849,333 @@ rspamd_mime_message_id_generate (const gchar *fqdn) return g_string_free (out, FALSE); } + +enum rspamd_received_part_type { + RSPAMD_RECEIVED_PART_FROM, + RSPAMD_RECEIVED_PART_BY, + RSPAMD_RECEIVED_PART_FOR, + RSPAMD_RECEIVED_PART_WITH, + RSPAMD_RECEIVED_PART_UNKNOWN, +}; + +struct rspamd_received_comment { + gchar *data; + gsize dlen; + struct rspamd_received_comment *prev; +}; + +struct rspamd_received_part { + enum rspamd_received_part_type type; + gchar *data; + gsize dlen; + struct rspamd_received_comment *tail_comment; + struct rspamd_received_comment *head_comment; + struct rspamd_received_part *prev, *next; +}; + +static struct rspamd_received_part * +rspamd_smtp_received_process_part (struct rspamd_task *task, + const char *data, + size_t len, + enum rspamd_received_part_type type, + goffset *last) +{ + struct rspamd_received_part *npart; + const guchar *p, *c, *end; + guint obraces = 0, ebraces = 0; + enum _parse_state { + skip_spaces, + in_comment, + read_data, + all_done + } state, next_state; + + npart = rspamd_mempool_alloc0 (task->task_pool, sizeof (*npart)); + npart->type = type; + + /* In this function, we just process comments and data separately */ + p = data; + end = data + len; + c = data; + state = skip_spaces; + next_state = read_data; + + while (p < end) { + switch (state) { + case skip_spaces: + if (!g_ascii_isspace (*p)) { + c = p; + state = next_state; + } + else { + p ++; + } + break; + case in_comment: + if (*p == '(') { + obraces ++; + } + else if (*p == ')') { + ebraces ++; + + if (ebraces >= obraces) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + if (p > c) { + struct rspamd_received_comment *comment; + + comment = rspamd_mempool_alloc (task->task_pool, + sizeof (*comment)); + + comment->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (comment->data, c, p - c); + rspamd_str_lc (comment->data, p - c); + comment->dlen = p - c; + + if (!npart->head_comment) { + comment->prev = NULL; + npart->head_comment = comment; + npart->tail_comment = comment; + } + else { + comment->prev = npart->tail_comment; + npart->tail_comment = comment; + } + } + } + + p ++; + c = p; + state = skip_spaces; + next_state = read_data; + + continue; + } + } + + p ++; + break; + case read_data: + if (*p == '(') { + if (p > c) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + npart->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (npart->data, c, p - c); + rspamd_str_lc (npart->data, p - c); + npart->dlen = p - c; + } + } + + state = in_comment; + obraces = 1; + ebraces = 0; + p ++; + c = p; + } + else if (g_ascii_isspace (*p)) { + if (p > c) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + npart->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (npart->data, c, p - c); + rspamd_str_lc (npart->data, p - c); + npart->dlen = p - c; + } + } + + state = skip_spaces; + next_state = read_data; + c = p; + } + else if (*p == ';') { + /* It is actually delimiter of date part if not in the comments */ + if (p > c) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + npart->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (npart->data, c, p - c); + rspamd_str_lc (npart->data, p - c); + npart->dlen = p - c; + } + } + + state = all_done; + continue; + } + else if (npart->dlen > 0) { + /* We have already received data and find something with no ( */ + state = all_done; + continue; + } + else { + p ++; + } + break; + case all_done: + *last = p - (const guchar *)data; + return npart; + break; + } + } + + /* Leftover */ + switch (state) { + case read_data: + if (p > c) { + if (type != RSPAMD_RECEIVED_PART_UNKNOWN) { + npart->data = rspamd_mempool_alloc (task->task_pool, + p - c); + memcpy (npart->data, c, p - c); + rspamd_str_lc (npart->data, p - c); + npart->dlen = p - c; + } + + return npart; + } + break; + case skip_spaces: + return npart; + default: + break; + } + + return NULL; +} + +static struct rspamd_received_part * +rspamd_smtp_received_spill (struct rspamd_task *task, + const char *data, + size_t len, + goffset *date_pos) +{ + const guchar *p, *end; + struct rspamd_received_part *cur_part, *head = NULL; + goffset pos = 0; + + p = data; + end = data + len; + + while (p < end && g_ascii_isspace (*p)) { + p ++; + } + + len = end - p; + + /* Ignore all received but those started from from part */ + if (len <= 4 || (lc_map[p[0]] != 'f' && + lc_map[p[1]] != 'r' && + lc_map[p[2]] != 'o' && + lc_map[p[3]] != 'm')) { + return NULL; + } + + p += sizeof ("from") - 1; + + /* We can now store from part */ + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_FROM, &pos); + + if (!cur_part) { + return NULL; + } + + p += pos; + len = end > p ? end - p : 0; + DL_APPEND (head, cur_part); + + + if (len > 2 && (lc_map[p[0]] == 'b' && + lc_map[p[1]] == 'y')) { + p += sizeof ("by") - 1; + + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_BY, &pos); + + if (!cur_part) { + return NULL; + } + + p += pos; + len = end > p ? end - p : 0; + DL_APPEND (head, cur_part); + } + + while (p > end) { + if (*p == ';') { + /* We are at the date separator, stop here */ + *date_pos = p - (const guchar *)data + 1; + break; + } + else { + if (len > sizeof ("with") && (lc_map[p[0]] == 'w' && + lc_map[p[1]] == 'i' && + lc_map[p[2]] == 't' && + lc_map[p[3]] == 'h')) { + p += sizeof ("with") - 1; + + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_WITH, &pos); + } + else if (len > sizeof ("for") && (lc_map[p[0]] == 'f' && + lc_map[p[1]] == 'o' && + lc_map[p[2]] == 'r')) { + p += sizeof ("for") - 1; + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_FOR, &pos); + } + else { + while (p < end) { + if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) { + p ++; + } + else { + break; + } + } + + if (p == end) { + return NULL; + } + else if (*p == ';') { + *date_pos = p - (const guchar *)data + 1; + break; + } + else { + cur_part = rspamd_smtp_received_process_part (task, p, end - p, + RSPAMD_RECEIVED_PART_UNKNOWN, &pos); + } + } + + if (!cur_part) { + return NULL; + } + else { + p += pos; + len = end > p ? end - p : 0; + DL_APPEND (head, cur_part); + } + } + } + + return head; +} + +int +rspamd_smtp_received_parse (struct rspamd_task *task, + const char *data, + size_t len, + struct received_header *rh) +{ + const gchar *p, *c, *end; + goffset date_pos = 0; + struct rspamd_received_part *head, *cur; + + head = rspamd_smtp_received_spill (task, data, len, &date_pos); + + if (head == NULL) { + return -1; + } + + return 0; +}
\ No newline at end of file |