[Project] Add spilling machine for received headers

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2019-02-06 12:36:10 +0000
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2019-02-06 12:36:10 +0000
commit: d525194397181456bba6edea4680a10403c3415c (patch)
tree: 563b036764abad3be68cf6912613e6c78addf9a0 /src/libmime/mime_headers.c
parent: 659ec2d02a354f9132f3a02d27b0b60aa156c551 (diff)
download: rspamd-d525194397181456bba6edea4680a10403c3415c.tar.gz
rspamd-d525194397181456bba6edea4680a10403c3415c.zip
1 files changed, 331 insertions, 0 deletions
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 2769ae633..19ad3262e 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -17,6 +17,7 @@
 #include "mime_headers.h"
 #include "smtp_parsers.h"
 #include "mime_encoding.h"
+#include "contrib/uthash/utlist.h"
 #include "libserver/mempool_vars_internal.h"
 #include <unicode/utf8.h>
 
@@ -848,3 +849,333 @@ rspamd_mime_message_id_generate (const gchar *fqdn)
 
 	return g_string_free (out, FALSE);
 }
+
+enum rspamd_received_part_type {
+	RSPAMD_RECEIVED_PART_FROM,
+	RSPAMD_RECEIVED_PART_BY,
+	RSPAMD_RECEIVED_PART_FOR,
+	RSPAMD_RECEIVED_PART_WITH,
+	RSPAMD_RECEIVED_PART_UNKNOWN,
+};
+
+struct rspamd_received_comment {
+	gchar *data;
+	gsize dlen;
+	struct rspamd_received_comment *prev;
+};
+
+struct rspamd_received_part {
+	enum rspamd_received_part_type type;
+	gchar *data;
+	gsize dlen;
+	struct rspamd_received_comment *tail_comment;
+	struct rspamd_received_comment *head_comment;
+	struct rspamd_received_part *prev, *next;
+};
+
+static struct rspamd_received_part *
+rspamd_smtp_received_process_part (struct rspamd_task *task,
+								   const char *data,
+								   size_t len,
+								   enum rspamd_received_part_type type,
+								   goffset *last)
+{
+	struct rspamd_received_part *npart;
+	const guchar *p, *c, *end;
+	guint obraces = 0, ebraces = 0;
+	enum _parse_state {
+		skip_spaces,
+		in_comment,
+		read_data,
+		all_done
+	} state, next_state;
+
+	npart = rspamd_mempool_alloc0 (task->task_pool, sizeof (*npart));
+	npart->type = type;
+
+	/* In this function, we just process comments and data separately */
+	p = data;
+	end = data + len;
+	c = data;
+	state = skip_spaces;
+	next_state = read_data;
+
+	while (p < end) {
+		switch (state) {
+		case skip_spaces:
+			if (!g_ascii_isspace (*p)) {
+				c = p;
+				state = next_state;
+			}
+			else {
+				p ++;
+			}
+			break;
+		case in_comment:
+			if (*p == '(') {
+				obraces ++;
+			}
+			else if (*p == ')') {
+				ebraces ++;
+
+				if (ebraces >= obraces) {
+					if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+						if (p > c) {
+							struct rspamd_received_comment *comment;
+
+							comment = rspamd_mempool_alloc (task->task_pool,
+									sizeof (*comment));
+
+							comment->data = rspamd_mempool_alloc (task->task_pool,
+									p - c);
+							memcpy (comment->data, c, p - c);
+							rspamd_str_lc (comment->data, p - c);
+							comment->dlen = p - c;
+
+							if (!npart->head_comment) {
+								comment->prev = NULL;
+								npart->head_comment = comment;
+								npart->tail_comment = comment;
+							}
+							else {
+								comment->prev = npart->tail_comment;
+								npart->tail_comment = comment;
+							}
+						}
+					}
+
+					p ++;
+					c = p;
+					state = skip_spaces;
+					next_state = read_data;
+
+					continue;
+				}
+			}
+
+			p ++;
+			break;
+		case read_data:
+			if (*p == '(') {
+				if (p > c) {
+					if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+						npart->data = rspamd_mempool_alloc (task->task_pool,
+								p - c);
+						memcpy (npart->data, c, p - c);
+						rspamd_str_lc (npart->data, p - c);
+						npart->dlen = p - c;
+					}
+				}
+
+				state = in_comment;
+				obraces = 1;
+				ebraces = 0;
+				p ++;
+				c = p;
+			}
+			else if (g_ascii_isspace (*p)) {
+				if (p > c) {
+					if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+						npart->data = rspamd_mempool_alloc (task->task_pool,
+								p - c);
+						memcpy (npart->data, c, p - c);
+						rspamd_str_lc (npart->data, p - c);
+						npart->dlen = p - c;
+					}
+				}
+
+				state = skip_spaces;
+				next_state = read_data;
+				c = p;
+			}
+			else if (*p == ';') {
+				/* It is actually delimiter of date part if not in the comments */
+				if (p > c) {
+					if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+						npart->data = rspamd_mempool_alloc (task->task_pool,
+								p - c);
+						memcpy (npart->data, c, p - c);
+						rspamd_str_lc (npart->data, p - c);
+						npart->dlen = p - c;
+					}
+				}
+
+				state = all_done;
+				continue;
+			}
+			else if (npart->dlen > 0) {
+				/* We have already received data and find something with no ( */
+				state = all_done;
+				continue;
+			}
+			else {
+				p ++;
+			}
+			break;
+		case all_done:
+			*last = p - (const guchar *)data;
+			return npart;
+			break;
+		}
+	}
+
+	/* Leftover */
+	switch (state) {
+	case read_data:
+		if (p > c) {
+			if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+				npart->data = rspamd_mempool_alloc (task->task_pool,
+						p - c);
+				memcpy (npart->data, c, p - c);
+				rspamd_str_lc (npart->data, p - c);
+				npart->dlen = p - c;
+			}
+
+			return npart;
+		}
+		break;
+	case skip_spaces:
+		return npart;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+static struct rspamd_received_part *
+rspamd_smtp_received_spill (struct rspamd_task *task,
+							const char *data,
+							size_t len,
+							goffset *date_pos)
+{
+	const guchar *p, *end;
+	struct rspamd_received_part *cur_part, *head = NULL;
+	goffset pos = 0;
+
+	p = data;
+	end = data + len;
+
+	while (p < end && g_ascii_isspace (*p)) {
+		p ++;
+	}
+
+	len = end - p;
+
+	/* Ignore all received but those started from from part */
+	if (len <= 4 || (lc_map[p[0]] != 'f' &&
+					 lc_map[p[1]] != 'r' &&
+					 lc_map[p[2]] != 'o' &&
+					 lc_map[p[3]] != 'm')) {
+		return NULL;
+	}
+
+	p += sizeof ("from") - 1;
+
+	/* We can now store from part */
+	cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+			RSPAMD_RECEIVED_PART_FROM, &pos);
+
+	if (!cur_part) {
+		return NULL;
+	}
+
+	p += pos;
+	len = end > p ? end - p : 0;
+	DL_APPEND (head, cur_part);
+
+
+	if (len > 2 && (lc_map[p[0]] == 'b' &&
+					lc_map[p[1]] == 'y')) {
+		p += sizeof ("by") - 1;
+
+		cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+				RSPAMD_RECEIVED_PART_BY, &pos);
+
+		if (!cur_part) {
+			return NULL;
+		}
+
+		p += pos;
+		len = end > p ? end - p : 0;
+		DL_APPEND (head, cur_part);
+	}
+
+	while (p > end) {
+		if (*p == ';') {
+			/* We are at the date separator, stop here */
+			*date_pos = p - (const guchar *)data + 1;
+			break;
+		}
+		else {
+			if (len > sizeof ("with") && (lc_map[p[0]] == 'w' &&
+										  lc_map[p[1]] == 'i' &&
+										  lc_map[p[2]] == 't' &&
+										  lc_map[p[3]] == 'h')) {
+				p += sizeof ("with") - 1;
+
+				cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+						RSPAMD_RECEIVED_PART_WITH, &pos);
+			}
+			else if (len > sizeof ("for") && (lc_map[p[0]] == 'f' &&
+											  lc_map[p[1]] == 'o' &&
+											  lc_map[p[2]] == 'r')) {
+				p += sizeof ("for") - 1;
+				cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+						RSPAMD_RECEIVED_PART_FOR, &pos);
+			}
+			else {
+				while (p < end) {
+					if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) {
+						p ++;
+					}
+					else {
+						break;
+					}
+				}
+
+				if (p == end) {
+					return NULL;
+				}
+				else if (*p == ';') {
+					*date_pos = p - (const guchar *)data + 1;
+					break;
+				}
+				else {
+					cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+							RSPAMD_RECEIVED_PART_UNKNOWN, &pos);
+				}
+			}
+
+			if (!cur_part) {
+				return NULL;
+			}
+			else {
+				p += pos;
+				len = end > p ? end - p : 0;
+				DL_APPEND (head, cur_part);
+			}
+		}
+	}
+
+	return head;
+}
+
+int
+rspamd_smtp_received_parse (struct rspamd_task *task,
+							const char *data,
+							size_t len,
+							struct received_header *rh)
+{
+	const gchar *p, *c, *end;
+	goffset date_pos = 0;
+	struct rspamd_received_part *head, *cur;
+
+	head = rspamd_smtp_received_spill (task, data, len, &date_pos);
+
+	if (head == NULL) {
+		return -1;
+	}
+
+	return 0;
+}
+\ No newline at end of file
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2019-02-06 12:36:10 +0000
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2019-02-06 12:36:10 +0000
commit	d525194397181456bba6edea4680a10403c3415c (patch)
tree	563b036764abad3be68cf6912613e6c78addf9a0 /src/libmime/mime_headers.c
parent	659ec2d02a354f9132f3a02d27b0b60aa156c551 (diff)
download	rspamd-d525194397181456bba6edea4680a10403c3415c.tar.gz rspamd-d525194397181456bba6edea4680a10403c3415c.zip