Rework project structure, remove trash files.

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2014-04-21 16:25:51 +0100
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2014-04-21 16:25:51 +0100
commit: 61555065f3d1c8badcc9573691232f1b6e42988c (patch)
tree: 563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libmime
parent: ad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff)
download: rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz
rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip
16 files changed, 7363 insertions, 0 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt
new file mode 100644
index 000000000..303b7a088
--- /dev/null
+++ b/src/libmime/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Librspamd mime
+SET(LIBRSPAMDMIMESRC
+				expressions.c
+				filter.c
+				images.c
+				message.c
+				protocol.c
+				smtp_utils.c
+				smtp_proto.c
+				worker_util.c)
+
+# Librspamdmime
+ADD_LIBRARY(rspamd-mime ${LINK_TYPE} ${LIBRSPAMDMIMESRC})
+IF(NOT DEBIAN_BUILD)
+SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES VERSION ${RSPAMD_VERSION})
+ENDIF(NOT DEBIAN_BUILD)
+SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES LINKER_LANGUAGE C)
+SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB")
+TARGET_LINK_LIBRARIES(rspamd-mime rspamd-server)
+TARGET_LINK_LIBRARIES(rspamd-mime rspamd-util)      
+IF(CMAKE_COMPILER_IS_GNUCC)
+SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing")
+ENDIF(CMAKE_COMPILER_IS_GNUCC)
+
+IF(NO_SHARED MATCHES "OFF")
+	INSTALL(TARGETS rspamd-mime 
+   	 	LIBRARY DESTINATION ${LIBDIR}  
+    	PUBLIC_HEADER DESTINATION ${INCLUDEDIR})
+ENDIF(NO_SHARED MATCHES "OFF")
+\ No newline at end of file
diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c
new file mode 100644
index 000000000..5d19626bb
--- /dev/null
+++ b/src/libmime/expressions.c
@@ -0,0 +1,1452 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "main.h"
+#include "message.h"
+#include "fuzzy.h"
+#include "expressions.h"
+#include "html.h"
+#include "lua/lua_common.h"
+#include "diff.h"
+
+gboolean                        rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_header_exists (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_parts_distance (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_has_only_html_part (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_is_recipients_sorted (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_compare_transfer_encoding (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_is_html_balanced (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_has_html_tag (struct rspamd_task *task, GList * args, void *unused);
+gboolean                        rspamd_has_fake_html (struct rspamd_task *task, GList * args, void *unused);
+
+/*
+ * List of internal functions of rspamd
+ * Sorted by name to use bsearch
+ */
+static struct _fl {
+	const gchar                     *name;
+	rspamd_internal_func_t          func;
+	void                           *user_data;
+} rspamd_functions_list[] = {
+	{"compare_encoding", rspamd_compare_encoding, NULL},
+	{"compare_parts_distance", rspamd_parts_distance, NULL},
+	{"compare_recipients_distance", rspamd_recipients_distance, NULL},
+	{"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
+	{"has_fake_html", rspamd_has_fake_html, NULL},
+	{"has_html_tag", rspamd_has_html_tag, NULL},
+	{"has_only_html_part", rspamd_has_only_html_part, NULL},
+	{"header_exists", rspamd_header_exists, NULL},
+	{"is_html_balanced", rspamd_is_html_balanced, NULL},
+	{"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}
+};
+
+static struct _fl              *list_ptr = &rspamd_functions_list[0];
+static guint32                  functions_number = sizeof (rspamd_functions_list) / sizeof (struct _fl);
+static gboolean                 list_allocated = FALSE;
+
+/* Bsearch routine */
+static gint
+fl_cmp (const void *s1, const void *s2)
+{
+	struct _fl                     *fl1 = (struct _fl *)s1;
+	struct _fl                     *fl2 = (struct _fl *)s2;
+	return strcmp (fl1->name, fl2->name);
+}
+
+/* Cache for regular expressions that are used in functions */
+void                           *
+re_cache_check (const gchar *line, rspamd_mempool_t *pool)
+{
+	GHashTable              *re_cache;
+	
+	re_cache = rspamd_mempool_get_variable (pool, "re_cache");
+
+	if (re_cache == NULL) {
+		re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+		rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy);
+		return NULL;
+	}
+	return g_hash_table_lookup (re_cache, line);
+}
+
+void
+re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool)
+{
+	GHashTable              *re_cache;
+	
+	re_cache = rspamd_mempool_get_variable (pool, "re_cache");
+
+	if (re_cache == NULL) {
+		re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+		rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy);
+	}
+
+	g_hash_table_insert (re_cache, (gpointer)line, pointer);
+}
+
+void
+re_cache_del (const gchar *line, rspamd_mempool_t *pool)
+{
+	GHashTable              *re_cache;
+
+	re_cache = rspamd_mempool_get_variable (pool, "re_cache");
+
+	if (re_cache != NULL) {
+		g_hash_table_remove (re_cache, line);
+	}
+
+}
+
+/*
+ * Functions for parsing expressions
+ */
+struct expression_stack {
+	gchar                           op;
+	struct expression_stack        *next;
+};
+
+/*
+ * Push operand or operator to stack  
+ */
+static struct expression_stack *
+push_expression_stack (rspamd_mempool_t * pool, struct expression_stack *head, gchar op)
+{
+	struct expression_stack        *new;
+	new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack));
+	new->op = op;
+	new->next = head;
+	return new;
+}
+
+/*
+ * Delete symbol from stack, return pointer to operand or operator (casted to void* )
+ */
+static gchar
+delete_expression_stack (struct expression_stack **head)
+{
+	struct expression_stack        *cur;
+	gchar                           res;
+
+	if (*head == NULL)
+		return 0;
+
+	cur = *head;
+	res = cur->op;
+
+	*head = cur->next;
+	return res;
+}
+
+/*
+ * Return operation priority
+ */
+static gint
+logic_priority (gchar a)
+{
+	switch (a) {
+	case '!':
+		return 3;
+	case '|':
+	case '&':
+		return 2;
+	case '(':
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Return FALSE if symbol is not operation symbol (operand)
+ * Return TRUE if symbol is operation symbol
+ */
+static                          gboolean
+is_operation_symbol (gchar *a)
+{
+	switch (*a) {
+	case '!':
+	case '&':
+	case '|':
+	case '(':
+	case ')':
+		return TRUE;
+	case 'O':
+	case 'o':
+		if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0&& g_ascii_isspace (a[2])) {
+			return TRUE;
+		}
+		break;
+	case 'A':
+	case 'a':
+		if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0&& g_ascii_isspace (a[3])) {
+			return TRUE;
+		}
+		break;
+	case 'N':
+	case 'n':
+		if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) {
+			return TRUE;
+		}
+		break;
+	}
+
+	return FALSE;
+}
+
+/* Return character representation of operation */
+static gchar
+op_to_char (gchar *a, gchar **next)
+{
+	switch (*a) {
+	case '!':
+	case '&':
+	case '|':
+	case '(':
+	case ')':
+		*next = a + 1;
+		return *a;
+	case 'O':
+	case 'o':
+		if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
+			*next = a + sizeof ("or") - 1;
+			return '|';
+		}
+		break;
+	case 'A':
+	case 'a':
+		if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
+			*next = a + sizeof ("and") - 1;
+			return '&';
+		}
+		break;
+	case 'N':
+	case 'n':
+		if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
+			*next = a + sizeof ("not") - 1;
+			return '!';
+		}
+		break;
+	}
+
+	return '\0';
+}
+
+/*
+ * Return TRUE if symbol can be regexp flag
+ */
+static                          gboolean
+is_regexp_flag (gchar a)
+{
+	switch (a) {
+	case 'i':
+	case 'm':
+	case 'x':
+	case 's':
+	case 'u':
+	case 'o':
+	case 'r':
+	case 'H':
+	case 'M':
+	case 'P':
+	case 'U':
+	case 'X':
+	case 'T':
+	case 'S':
+		return TRUE;
+	default:
+		return FALSE;
+	}
+}
+
+static void
+insert_expression (rspamd_mempool_t * pool, struct expression **head, gint type, gchar op, void *operand, const gchar *orig)
+{
+	struct expression              *new, *cur;
+
+	new = rspamd_mempool_alloc (pool, sizeof (struct expression));
+	new->type = type;
+	new->orig = orig;
+	if (new->type != EXPR_OPERATION) {
+		new->content.operand = operand;
+	}
+	else {
+		new->content.operation = op;
+	}
+	new->next = NULL;
+
+	if (!*head) {
+		*head = new;
+	}
+	else {
+		cur = *head;
+		while (cur->next) {
+			cur = cur->next;
+		}
+		cur->next = new;
+	}
+}
+
+static struct expression       *
+maybe_parse_expression (rspamd_mempool_t * pool, gchar *line)
+{
+	struct expression              *expr;
+	gchar                           *p = line;
+
+	while (*p) {
+		if (is_operation_symbol (p)) {
+			return parse_expression (pool, line);
+		}
+		p++;
+	}
+
+	expr = rspamd_mempool_alloc (pool, sizeof (struct expression));
+	expr->type = EXPR_STR;
+	expr->content.operand = rspamd_mempool_strdup (pool, line);
+	expr->next = NULL;
+
+	return expr;
+}
+
+/*
+ * Make inverse polish record for specified expression
+ * Memory is allocated from given pool
+ */
+struct expression              *
+parse_expression (rspamd_mempool_t * pool, gchar *line)
+{
+	struct expression              *expr = NULL;
+	struct expression_stack        *stack = NULL;
+	struct expression_function     *func = NULL;
+	struct expression              *arg;
+	GQueue                         *function_stack;
+	gchar                           *p, *c, *str, op, newop, *copy, *next;
+	gboolean                        in_regexp = FALSE;
+	gint                            brackets = 0;
+
+	enum {
+		SKIP_SPACES,
+		READ_OPERATOR,
+		READ_REGEXP,
+		READ_REGEXP_FLAGS,
+		READ_FUNCTION,
+		READ_FUNCTION_ARGUMENT,
+	} state = SKIP_SPACES;
+
+	if (line == NULL || pool == NULL) {
+		return NULL;
+	}
+
+	msg_debug ("parsing expression {{ %s }}", line);
+
+	function_stack = g_queue_new ();
+	copy = rspamd_mempool_strdup (pool, line);
+	p = line;
+	c = p;
+	while (*p) {
+		switch (state) {
+		case SKIP_SPACES:
+			if (!g_ascii_isspace (*p)) {
+				if (is_operation_symbol (p)) {
+					state = READ_OPERATOR;
+				}
+				else if (*p == '/') {
+					c = ++p;
+					state = READ_REGEXP;
+				}
+				else {
+					c = p;
+					state = READ_FUNCTION;
+				}
+			}
+			else {
+				p++;
+			}
+			break;
+		case READ_OPERATOR:
+			if (*p == ')') {
+				if (stack == NULL) {
+					return NULL;
+				}
+				/* Pop all operators from stack to nearest '(' or to head */
+				while (stack && stack->op != '(') {
+					op = delete_expression_stack (&stack);
+					if (op != '(') {
+						insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
+					}
+				}
+				if (stack) {
+					op = delete_expression_stack (&stack);
+				}
+			}
+			else if (*p == '(') {
+				/* Push it to stack */
+				stack = push_expression_stack (pool, stack, *p);
+			}
+			else {
+				if (stack == NULL) {
+					newop = op_to_char (p, &next);
+					if (newop != '\0') {
+						stack = push_expression_stack (pool, stack, newop);
+						p = next;
+						state = SKIP_SPACES;
+						continue;
+					}
+				}
+				/* Check priority of logic operation */
+				else {
+					newop = op_to_char (p, &next);
+					if (newop != '\0') {
+						if (logic_priority (stack->op) < logic_priority (newop)) {
+							stack = push_expression_stack (pool, stack, newop);
+						}
+						else {
+							/* Pop all operations that have higher priority than this one */
+							while ((stack != NULL) && (logic_priority (stack->op) >= logic_priority (newop))) {
+								op = delete_expression_stack (&stack);
+								if (op != '(') {
+									insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
+								}
+							}
+							stack = push_expression_stack (pool, stack, newop);
+						}
+					}
+					p = next;
+					state = SKIP_SPACES;
+					continue;
+				}
+			}
+			p++;
+			state = SKIP_SPACES;
+			break;
+
+		case READ_REGEXP:
+			if (*p == '/' && *(p - 1) != '\\') {
+				if (*(p + 1)) {
+					p++;
+				}
+				state = READ_REGEXP_FLAGS;
+			}
+			else {
+				p++;
+			}
+			break;
+
+		case READ_REGEXP_FLAGS:
+			if (!is_regexp_flag (*p) || *(p + 1) == '\0') {
+				if (c != p) {
+					if ((is_regexp_flag (*p) || *p == '/') && *(p + 1) == '\0') {
+						p++;
+					}
+					str = rspamd_mempool_alloc (pool, p - c + 2);
+					rspamd_strlcpy (str, c - 1, (p - c + 2));
+					g_strstrip (str);
+					msg_debug ("found regexp: %s", str);
+					if (strlen (str) > 0) {
+						insert_expression (pool, &expr, EXPR_REGEXP, 0, str, copy);
+					}
+				}
+				c = p;
+				state = SKIP_SPACES;
+			}
+			else {
+				p++;
+			}
+			break;
+
+		case READ_FUNCTION:
+			if (*p == '/') {
+				/* In fact it is regexp */
+				state = READ_REGEXP;
+				c++;
+				p++;
+			}
+			else if (*p == '(') {
+				func = rspamd_mempool_alloc (pool, sizeof (struct expression_function));
+				func->name = rspamd_mempool_alloc (pool, p - c + 1);
+				func->args = NULL;
+				rspamd_strlcpy (func->name, c, (p - c + 1));
+				g_strstrip (func->name);
+				state = READ_FUNCTION_ARGUMENT;
+				g_queue_push_tail (function_stack, func);
+				insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy);
+				c = ++p;
+			}
+			else if (is_operation_symbol (p)) {
+				/* In fact it is not function, but symbol */
+				if (c != p) {
+					str = rspamd_mempool_alloc (pool, p - c + 1);
+					rspamd_strlcpy (str, c, (p - c + 1));
+					g_strstrip (str);
+					if (strlen (str) > 0) {
+						insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
+					}
+				}
+				state = READ_OPERATOR;
+			}
+			else if (*(p + 1) == '\0') {
+				/* In fact it is not function, but symbol */
+				p++;
+				if (c != p) {
+					str = rspamd_mempool_alloc (pool, p - c + 1);
+					rspamd_strlcpy (str, c, (p - c + 1));
+					g_strstrip (str);
+					if (strlen (str) > 0) {
+						insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
+					}
+				}
+				state = SKIP_SPACES;
+			}
+			else {
+				p++;
+			}
+			break;
+
+		case READ_FUNCTION_ARGUMENT:
+			if (*p == '/' && !in_regexp) {
+				in_regexp = TRUE;
+				p++;
+			}
+			if (!in_regexp) {
+				/* Append argument to list */
+				if (*p == ',' || (*p == ')' && brackets == 0)) {
+					arg = NULL;
+					str = rspamd_mempool_alloc (pool, p - c + 1);
+					rspamd_strlcpy (str, c, (p - c + 1));
+					g_strstrip (str);
+					/* Recursive call */
+					arg = maybe_parse_expression (pool, str);
+					func->args = g_list_append (func->args, arg);
+					/* Pop function */
+					if (*p == ')') {
+						/* Last function in chain, goto skipping spaces state */
+						func = g_queue_pop_tail (function_stack);
+						if (g_queue_get_length (function_stack) == 0) {
+							state = SKIP_SPACES;
+						}
+					}
+					c = p + 1;
+				}
+				else if (*p == '(') {
+					brackets++;
+				}
+				else if (*p == ')') {
+					brackets--;
+				}
+			}
+			else if (*p == '/' && *(p - 1) != '\\') {
+				in_regexp = FALSE;
+			}
+			p++;
+			break;
+		}
+	}
+
+	g_queue_free (function_stack);
+	if (state != SKIP_SPACES) {
+		/* In fact we got bad expression */
+		msg_warn ("expression \"%s\" is invalid", line);
+		return NULL;
+	}
+	/* Pop everything from stack */
+	while (stack != NULL) {
+		op = delete_expression_stack (&stack);
+		if (op != '(') {
+			insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
+		}
+	}
+
+	return expr;
+}
+
+/*
+ * Rspamd regexp utility functions
+ */
+struct rspamd_regexp           *
+parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
+{
+	const gchar                    *begin, *end, *p, *src, *start;
+	gchar                          *dbegin, *dend;
+	struct rspamd_regexp           *result, *check;
+	gint                            regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE;
+	GError                         *err = NULL;
+
+	if (line == NULL) {
+		msg_err ("cannot parse NULL line");
+		return NULL;
+	}
+
+	src = line;
+	result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp));
+	/* Skip whitespaces */
+	while (g_ascii_isspace (*line)) {
+		line++;
+	}
+	if (*line == '\0') {
+		msg_warn ("got empty regexp");
+		return NULL;
+	}
+	start = line;
+	/* First try to find header name */
+	begin = strchr (line, '/');
+	if (begin != NULL) {
+		p = begin;
+		end = NULL;
+		while (p != line) {
+			if (*p == '=') {
+				end = p;
+				break;
+			}
+			p --;
+		}
+		if (end) {
+			result->header = rspamd_mempool_alloc (pool, end - line + 1);
+			rspamd_strlcpy (result->header, line, end - line + 1);
+			result->type = REGEXP_HEADER;
+			line = end;
+		}
+	}
+	else {
+		result->header = rspamd_mempool_strdup (pool, line);
+		result->type = REGEXP_HEADER;
+		line = start;
+	}
+	/* Find begin of regexp */
+	while (*line && *line != '/') {
+		line++;
+	}
+	if (*line != '\0') {
+		begin = line + 1;
+	}
+	else if (result->header == NULL) {
+		/* Assume that line without // is just a header name */
+		result->header = rspamd_mempool_strdup (pool, line);
+		result->type = REGEXP_HEADER;
+		return result;
+	}
+	else {
+		/* We got header name earlier but have not found // expression, so it is invalid regexp */
+		msg_warn ("got no header name (eg. header=) but without corresponding regexp, %s", src);
+		return NULL;
+	}
+	/* Find end */
+	end = begin;
+	while (*end && (*end != '/' || *(end - 1) == '\\')) {
+		end++;
+	}
+	if (end == begin || *end != '/') {
+		msg_warn ("no trailing / in regexp %s", src);
+		return NULL;
+	}
+	/* Parse flags */
+	p = end + 1;
+	while (p != NULL) {
+		switch (*p) {
+		case 'i':
+			regexp_flags |= G_REGEX_CASELESS;
+			p++;
+			break;
+		case 'm':
+			regexp_flags |= G_REGEX_MULTILINE;
+			p++;
+			break;
+		case 's':
+			regexp_flags |= G_REGEX_DOTALL;
+			p++;
+			break;
+		case 'x':
+			regexp_flags |= G_REGEX_EXTENDED;
+			p++;
+			break;
+		case 'u':
+			regexp_flags |= G_REGEX_UNGREEDY;
+			p++;
+			break;
+		case 'o':
+			regexp_flags |= G_REGEX_OPTIMIZE;
+			p++;
+			break;
+		case 'r':
+			regexp_flags |= G_REGEX_RAW;
+			result->is_raw = TRUE;
+			p++;
+			break;
+			/* Type flags */
+		case 'H':
+			if (result->type == REGEXP_NONE) {
+				result->type = REGEXP_HEADER;
+			}
+			p++;
+			break;
+		case 'M':
+			if (result->type == REGEXP_NONE) {
+				result->type = REGEXP_MESSAGE;
+			}
+			p++;
+			break;
+		case 'P':
+			if (result->type == REGEXP_NONE) {
+				result->type = REGEXP_MIME;
+			}
+			p++;
+			break;
+		case 'U':
+			if (result->type == REGEXP_NONE) {
+				result->type = REGEXP_URL;
+			}
+			p++;
+			break;
+		case 'X':
+			if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
+				result->type = REGEXP_RAW_HEADER;
+			}
+			p++;
+			break;
+		case 'T':
+			result->is_test = TRUE;
+			p ++;
+			break;
+		case 'S':
+			result->is_strong = TRUE;
+			p ++;
+			break;
+			/* Stop flags parsing */
+		default:
+			p = NULL;
+			break;
+		}
+	}
+
+	result->regexp_text = rspamd_mempool_strdup (pool, start);
+	dbegin = result->regexp_text + (begin - start);
+	dend = result->regexp_text + (end - start);
+	*dend = '\0';
+
+	if (raw_mode) {
+		regexp_flags |= G_REGEX_RAW;
+	}
+
+	/* Avoid multiply regexp structures for similar regexps */
+	if ((check = (struct rspamd_regexp *)re_cache_check (result->regexp_text, pool)) != NULL) {
+		/* Additional check for headers */
+		if (result->type == REGEXP_HEADER || result->type == REGEXP_RAW_HEADER) {
+			if (result->header && check->header) {
+				if (strcmp (result->header, check->header) == 0) {
+					return check;
+				}
+			}
+		}
+		else {
+			return check;
+		}
+	}
+	result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err);
+	if ((regexp_flags & G_REGEX_RAW) != 0) {
+		result->raw_regexp = result->regexp;
+	}
+	else {
+		result->raw_regexp = g_regex_new (dbegin, regexp_flags | G_REGEX_RAW, 0, &err);
+		rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->raw_regexp);
+	}
+	rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->regexp);
+
+	*dend = '/';
+
+	if (result->regexp == NULL || err != NULL) {
+		msg_warn ("could not read regexp: %s while reading regexp %s", err->message, src);
+		return NULL;
+	}
+
+	if (result->raw_regexp == NULL || err != NULL) {
+		msg_warn ("could not read raw regexp: %s while reading regexp %s", err->message, src);
+		return NULL;
+	}
+
+	/* Add to cache for further usage */
+	re_cache_add (result->regexp_text, result, pool);
+	return result;
+}
+
+gboolean
+call_expression_function (struct expression_function * func, struct rspamd_task * task, lua_State *L)
+{
+	struct _fl                     *selected, key;
+
+	key.name = func->name;
+
+	selected = bsearch (&key, list_ptr, functions_number, sizeof (struct _fl), fl_cmp);
+	if (selected == NULL) {
+		/* Try to check lua function */
+		return FALSE;
+	}
+
+	return selected->func (task, func->args, selected->user_data);
+}
+
+struct expression_argument     *
+get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string)
+{
+	GQueue                         *stack;
+	gsize                           cur, op1, op2;
+	struct expression_argument     *res;
+	struct expression              *it;
+
+	if (expr == NULL) {
+		msg_warn ("NULL expression passed");
+		return NULL;
+	}
+	if (expr->next == NULL) {
+		res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument));
+		if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type == EXPR_REGEXP_PARSED) {
+			res->type = EXPRESSION_ARGUMENT_NORMAL;
+			res->data = expr->content.operand;
+		}
+		else if (expr->type == EXPR_FUNCTION && !want_string) {
+			res->type = EXPRESSION_ARGUMENT_BOOL;
+			cur = call_expression_function (expr->content.operand, task, NULL);
+			res->data = GSIZE_TO_POINTER (cur);
+		}
+		else {
+			msg_warn ("cannot parse argument: it contains operator or bool expression that is not wanted");
+			return NULL;
+		}
+		return res;
+	}
+	else if (!want_string) {
+		res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument));
+		res->type = EXPRESSION_ARGUMENT_BOOL;
+		stack = g_queue_new ();
+		it = expr;
+
+		while (it) {
+			if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED || it->type == EXPR_STR) {
+				g_queue_free (stack);
+				res->type = EXPRESSION_ARGUMENT_EXPR;
+				res->data = expr;
+				return res;
+			}
+			else if (it->type == EXPR_FUNCTION) {
+				cur = (gsize) call_expression_function ((struct expression_function *)it->content.operand, task, NULL);
+				debug_task ("function %s returned %s", ((struct expression_function *)it->content.operand)->name, cur ? "true" : "false");
+			}
+			else if (it->type == EXPR_OPERATION) {
+				if (g_queue_is_empty (stack)) {
+					/* Queue has no operands for operation, exiting */
+					debug_task ("invalid expression");
+					g_queue_free (stack);
+					return NULL;
+				}
+				switch (it->content.operation) {
+				case '!':
+					op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+					op1 = !op1;
+					g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
+					break;
+				case '&':
+					op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+					op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+					g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
+					break;
+				case '|':
+					op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+					op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+					g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
+					break;
+				default:
+					it = it->next;
+					continue;
+				}
+			}
+			if (it) {
+				it = it->next;
+			}
+		}
+		if (!g_queue_is_empty (stack)) {
+			res->data = g_queue_pop_head (stack);
+		}
+		else {
+			res->data = GSIZE_TO_POINTER (FALSE);
+		}
+
+		return res;
+	}
+
+	msg_warn ("invalid expression argument");
+
+	return NULL;
+}
+
+void
+register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data)
+{
+	static struct _fl              *new;
+
+	functions_number++;
+
+	new = g_new (struct _fl, functions_number);
+	memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
+	if (list_allocated) {
+		g_free (list_ptr);
+	}
+
+	list_allocated = TRUE;
+	new[functions_number - 1].name = name;
+	new[functions_number - 1].func = func;
+	new[functions_number - 1].user_data = user_data;
+	qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
+	list_ptr = new;
+}
+
+gboolean
+rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused)
+{
+	struct expression_argument     *arg;
+
+	if (args == NULL || task == NULL) {
+		return FALSE;
+	}
+
+	arg = get_function_arg (args->data, task, TRUE);
+	if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
+		msg_warn ("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	/* XXX: really write this function */
+	return TRUE;
+}
+
+gboolean
+rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused)
+{
+	struct expression_argument     *arg;
+	GList                          *headerlist;
+
+	if (args == NULL || task == NULL) {
+		return FALSE;
+	}
+
+	arg = get_function_arg (args->data, task, TRUE);
+	if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
+		msg_warn ("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	debug_task ("try to get header %s", (gchar *)arg->data);
+	headerlist = message_get_header (task->task_pool, task->message, (gchar *)arg->data, FALSE);
+	if (headerlist) {
+		g_list_free (headerlist);
+		return TRUE;
+	}
+	return FALSE;
+}
+
+/*
+ * This function is designed to find difference between text/html and text/plain parts
+ * It takes one argument: difference threshold, if we have two text parts, compare 
+ * its hashes and check for threshold, if value is greater than threshold, return TRUE
+ * and return FALSE otherwise.
+ */
+gboolean
+rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused)
+{
+	gint                            threshold, threshold2 = -1, diff;
+	struct mime_text_part          *p1, *p2;
+	GList                          *cur;
+	struct expression_argument     *arg;
+	GMimeObject                    *parent;
+	const GMimeContentType         *ct;
+	gint                           *pdiff;
+
+	if (args == NULL) {
+		debug_task ("no threshold is specified, assume it 100");
+		threshold = 100;
+	}
+	else {
+		errno = 0;
+		arg = get_function_arg (args->data, task, TRUE);
+		threshold = strtoul ((gchar *)arg->data, NULL, 10);
+		if (errno != 0) {
+			msg_info ("bad numeric value for threshold \"%s\", assume it 100", (gchar *)args->data);
+			threshold = 100;
+		}
+		if (args->next) {
+			arg = get_function_arg (args->next->data, task, TRUE);
+			errno = 0;
+			threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
+			if (errno != 0) {
+				msg_info ("bad numeric value for threshold \"%s\", ignore it", (gchar *)arg->data);
+				threshold2 = -1;
+			}
+		}
+	}
+
+	if ((pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance")) != NULL) {
+		diff = *pdiff;
+		if (diff != -1) {
+			if (threshold2 > 0) {
+				if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) {
+					return TRUE;
+				}
+			}
+			else {
+				if (diff <= threshold) {
+					return TRUE;
+				}
+			}
+			return FALSE;
+		}
+		else {
+			return FALSE;
+		}
+	}
+
+	if (g_list_length (task->text_parts) == 2) {
+		cur = g_list_first (task->text_parts);
+		p1 = cur->data;
+		cur = g_list_next (cur);
+		pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
+		*pdiff = -1;
+
+		if (cur == NULL) {
+			msg_info ("bad parts list");
+			return FALSE;
+		}
+		p2 = cur->data;
+		/* First of all check parent object */
+		if (p1->parent && p1->parent == p2->parent) {
+			parent = p1->parent;
+			ct = g_mime_object_get_content_type (parent);
+#ifndef GMIME24
+			if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) {
+#else
+			if (ct == NULL || ! g_mime_content_type_is_type ((GMimeContentType *)ct, "multipart", "alternative")) {
+#endif
+				debug_task ("two parts are not belong to multipart/alternative container, skip check");
+				rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+				return FALSE;
+			}
+		}
+		else {
+			debug_task ("message contains two parts but they are in different multi-parts");
+			rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+			return FALSE;
+		}
+		if (!p1->is_empty && !p2->is_empty) {
+			if (p1->diff_str != NULL && p2->diff_str != NULL) {
+				diff = compare_diff_distance_normalized (p1->diff_str, p2->diff_str);
+			}
+			else {
+				diff = fuzzy_compare_parts (p1, p2);
+			}
+			debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold);
+			*pdiff = diff;
+			rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+			if (threshold2 > 0) {
+				if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) {
+					return TRUE;
+				}
+			}
+			else {
+				if (diff <= threshold) {
+					return TRUE;
+				}
+			}
+		}
+		else if ((p1->is_empty && !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
+			/* Empty and non empty parts are different */
+			*pdiff = 0;
+			rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+			return TRUE;
+		}
+	}
+	else {
+		debug_task ("message has too many text parts, so do not try to compare them with each other");
+		rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+		return FALSE;
+	}
+
+	rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+	return FALSE;
+}
+
+struct addr_list {
+	const gchar                     *name;
+	const gchar                     *addr;
+};
+
+#define COMPARE_RCPT_LEN 3
+#define MIN_RCPT_TO_COMPARE 7
+
+gboolean
+rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused)
+{
+	struct expression_argument     *arg;
+	InternetAddressList            *cur;
+	InternetAddress                *addr;
+	double                          threshold;
+	struct addr_list               *ar;
+	gchar                           *c;
+	gint                            num, i, j, hits = 0, total = 0;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = get_function_arg (args->data, task, TRUE);
+	errno = 0;
+	threshold = strtod ((gchar *)arg->data, NULL);
+	if (errno != 0) {
+		msg_warn ("invalid numeric value '%s': %s", (gchar *)arg->data, strerror (errno));
+		return FALSE;
+	}
+
+	if (!task->rcpts) {
+		return FALSE;
+	}
+	num = internet_address_list_length (task->rcpts);
+	if (num < MIN_RCPT_TO_COMPARE) {
+		return FALSE;
+	}
+	ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list));
+
+	/* Fill array */
+	cur = task->rcpts;
+#ifdef GMIME24
+	for (i = 0; i < num; i ++) {
+		addr = internet_address_list_get_address (cur, i);
+		ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_name (addr));
+		if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+			*c = '\0';
+			ar[i].addr = c + 1;
+		}
+	}
+#else
+	i = 0;
+	while (cur) {
+		addr = internet_address_list_get_address (cur);
+		if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+			ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_addr (addr));
+			if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+				*c = '\0';
+				ar[i].addr = c + 1;
+			}
+			cur = internet_address_list_next (cur);
+			i++;
+		}
+		else {
+			cur = internet_address_list_next (cur);
+		}
+	}
+#endif
+
+	/* Cycle all elements in array */
+	for (i = 0; i < num; i++) {
+		for (j = i + 1; j < num; j++) {
+			if (ar[i].name && ar[j].name && g_ascii_strncasecmp (ar[i].name, ar[j].name, COMPARE_RCPT_LEN) == 0) {
+				/* Common name part */
+				hits++;
+			}
+			else if (ar[i].addr && ar[j].addr && g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
+				/* Common address part, but different name */
+				hits++;
+			}
+			total++;
+		}
+	}
+
+	if ((double)(hits * num / 2.) / (double)total >= threshold) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_has_only_html_part (struct rspamd_task * task, GList * args, void *unused)
+{
+	struct mime_text_part          *p;
+	GList                          *cur;
+	gboolean                        res = FALSE;
+
+	cur = g_list_first (task->text_parts);
+	while (cur) {
+		p = cur->data;
+		if (p->is_html) {
+			res = TRUE;
+		}
+		else {
+			res = FALSE;
+			break;
+		}
+		cur = g_list_next (cur);
+	}
+
+	return res;
+}
+
+static                          gboolean
+is_recipient_list_sorted (const InternetAddressList * ia)
+{
+	const InternetAddressList      *cur;
+	InternetAddress                *addr;
+	gboolean                        res = TRUE;
+	struct addr_list                current = { NULL, NULL }, previous = {
+	NULL, NULL};
+#ifdef GMIME24
+	gint                            num, i;
+#endif
+
+	/* Do not check to short address lists */
+	if (internet_address_list_length ((InternetAddressList *)ia) < MIN_RCPT_TO_COMPARE) {
+		return FALSE;
+	}
+#ifdef GMIME24
+	num = internet_address_list_length ((InternetAddressList *)ia);
+	cur = ia;
+	for (i = 0; i < num; i ++) {
+		addr = internet_address_list_get_address ((InternetAddressList *)cur, i);
+		current.addr = (gchar *)internet_address_get_name (addr);
+		if (previous.addr != NULL) {
+			if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+				res = FALSE;
+				break;
+			}
+		}
+		previous.addr = current.addr;
+	}
+#else
+	cur = ia;
+	while (cur) {
+		addr = internet_address_list_get_address (cur);
+		if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+			current.addr = internet_address_get_addr (addr);
+			if (previous.addr != NULL) {
+				if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+					res = FALSE;
+					break;
+				}
+			}
+			previous.addr = current.addr;
+		}
+		cur = internet_address_list_next (cur);
+	}
+#endif
+
+	return res;
+}
+
+gboolean
+rspamd_is_recipients_sorted (struct rspamd_task * task, GList * args, void *unused)
+{
+	/* Check all types of addresses */
+	if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
+		return TRUE;
+	}
+	if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
+		return TRUE;
+	}
+	if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_compare_transfer_encoding (struct rspamd_task * task, GList * args, void *unused)
+{
+	GMimeObject                    *part;
+#ifndef GMIME24
+	GMimePartEncodingType           enc_req, part_enc;
+#else
+	GMimeContentEncoding            enc_req, part_enc;
+#endif
+	struct expression_argument     *arg;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = get_function_arg (args->data, task, TRUE);
+#ifndef GMIME24
+	enc_req = g_mime_part_encoding_from_string (arg->data);
+	if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
+#else
+	enc_req = g_mime_content_encoding_from_string (arg->data);
+	if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
+#endif
+		msg_warn ("bad encoding type: %s", (gchar *)arg->data);
+		return FALSE;
+	}
+
+	part = g_mime_message_get_mime_part (task->message);
+	if (part) {
+		if (GMIME_IS_PART (part)) {
+#ifndef GMIME24
+			part_enc = g_mime_part_get_encoding (GMIME_PART (part));
+			if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
+				/* Assume 7bit as default transfer encoding */
+				part_enc = GMIME_PART_ENCODING_7BIT;
+			}
+#else
+			part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
+			if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
+				/* Assume 7bit as default transfer encoding */
+				part_enc = GMIME_CONTENT_ENCODING_7BIT;
+			}
+#endif
+
+
+			debug_task ("got encoding in part: %d and compare with %d", (gint)part_enc, (gint)enc_req);
+#ifndef GMIME24
+			g_object_unref (part);
+#endif
+
+			return part_enc == enc_req;
+		}
+#ifndef GMIME24
+		g_object_unref (part);
+#endif
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused)
+{
+	struct mime_text_part          *p;
+	GList                          *cur;
+	gboolean                        res = TRUE;
+
+	cur = g_list_first (task->text_parts);
+	while (cur) {
+		p = cur->data;
+		if (!p->is_empty && p->is_html) {
+			if (p->is_balanced) {
+				res = TRUE;
+			}
+			else {
+				res = FALSE;
+				break;
+			}
+		}
+		cur = g_list_next (cur);
+	}
+
+	return res;
+
+}
+
+struct html_callback_data {
+	struct html_tag                *tag;
+	gboolean                       *res;
+};
+
+static                          gboolean
+search_html_node_callback (GNode * node, gpointer data)
+{
+	struct html_callback_data      *cd = data;
+	struct html_node               *nd;
+
+	nd = node->data;
+	if (nd) {
+		if (nd->tag == cd->tag) {
+			*cd->res = TRUE;
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused)
+{
+	struct mime_text_part          *p;
+	GList                          *cur;
+	struct expression_argument     *arg;
+	struct html_tag                *tag;
+	gboolean                        res = FALSE;
+	struct html_callback_data       cd;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = get_function_arg (args->data, task, TRUE);
+	tag = get_tag_by_name (arg->data);
+	if (tag == NULL) {
+		msg_warn ("unknown tag type passed as argument: %s", (gchar *)arg->data);
+		return FALSE;
+	}
+
+	cur = g_list_first (task->text_parts);
+	cd.res = &res;
+	cd.tag = tag;
+
+	while (cur && res == FALSE) {
+		p = cur->data;
+		if (!p->is_empty && p->is_html && p->html_nodes) {
+			g_node_traverse (p->html_nodes, G_PRE_ORDER, G_TRAVERSE_ALL, -1, search_html_node_callback, &cd);
+		}
+		cur = g_list_next (cur);
+	}
+
+	return res;
+
+}
+
+gboolean
+rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused)
+{
+	struct mime_text_part          *p;
+	GList                          *cur;
+	gboolean                        res = FALSE;
+
+	cur = g_list_first (task->text_parts);
+
+	while (cur && res == FALSE) {
+		p = cur->data;
+		if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
+			res = TRUE;
+		}
+		cur = g_list_next (cur);
+	}
+
+	return res;
+
+}
+
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libmime/expressions.h b/src/libmime/expressions.h
new file mode 100644
index 000000000..954cc74f7
--- /dev/null
+++ b/src/libmime/expressions.h
@@ -0,0 +1,133 @@
+/**
+ * @file expressions.h
+ * Rspamd expressions API
+ */
+
+#ifndef RSPAMD_EXPRESSIONS_H
+#define RSPAMD_EXPRESSIONS_H
+
+#include "config.h"
+#include <lua.h>
+
+struct rspamd_task;
+struct rspamd_regexp;
+
+/**
+ * Rspamd expression function
+ */
+struct expression_function {
+	gchar *name;													/**< name of function								*/
+	GList *args;												/**< its args										*/
+};
+
+/**
+ * Function's argument
+ */
+struct expression_argument {
+	enum {
+		EXPRESSION_ARGUMENT_NORMAL,
+		EXPRESSION_ARGUMENT_BOOL,
+		EXPRESSION_ARGUMENT_EXPR,
+	} type;														/**< type of argument (text or other function)		*/
+	void *data;													/**< pointer to its data							*/
+};
+
+/** 
+ * Logic expression 
+ */
+struct expression {
+	enum { 
+		EXPR_REGEXP,
+		EXPR_OPERATION, 
+		EXPR_FUNCTION, 
+		EXPR_STR, 
+		EXPR_REGEXP_PARSED,
+	} type;														/**< expression type								*/
+	union {
+		void *operand;
+		gchar operation;
+	} content;													/**< union for storing operand or operation code 	*/
+	const gchar *orig;											/**< original line									*/
+	struct expression *next;									/**< chain link										*/
+};
+
+typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args, void *user_data);
+
+/**
+ * Parse regexp line to regexp structure
+ * @param pool memory pool to use
+ * @param line incoming line
+ * @return regexp structure or NULL in case of error
+ */
+struct rspamd_regexp* parse_regexp (rspamd_mempool_t *pool, const gchar *line, gboolean raw_mode);
+
+/**
+ * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3")
+ * @param pool memory pool to use
+ * @param line incoming line
+ * @return expression structure or NULL in case of error
+ */
+struct expression* parse_expression (rspamd_mempool_t *pool, gchar *line);
+
+/**
+ * Call specified fucntion and return boolean result
+ * @param func function to call
+ * @param task task object
+ * @param L lua specific state
+ * @return TRUE or FALSE depending on function result
+ */
+gboolean call_expression_function (struct expression_function *func, struct rspamd_task *task, lua_State *L);
+
+/**
+ * Register specified function to rspamd internal functions list
+ * @param name name of function
+ * @param func pointer to function
+ */
+void register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data);
+
+/**
+ * Add regexp to regexp cache
+ * @param line symbolic representation
+ * @param pointer regexp data
+ */
+void re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool);
+
+/**
+ * Check regexp in cache
+ * @param line symbolic representation
+ * @return pointer to regexp data or NULL if regexp is not found
+ */
+void * re_cache_check (const gchar *line, rspamd_mempool_t *pool);
+
+/**
+ * Remove regexp from regexp cache
+ * @param line symbolic representation
+ */
+void re_cache_del (const gchar *line, rspamd_mempool_t *pool);
+
+/**
+ * Add regexp to regexp task cache
+ * @param task task object
+ * @param pointer regexp data
+ * @param result numeric result of this regexp
+ */
+void task_cache_add (struct rspamd_task *task, struct rspamd_regexp *re, gint32 result);
+
+/**
+ * Check regexp in cache
+ * @param task task object
+ * @param pointer regexp data
+ * @return numeric result if value exists or -1 if not
+ */
+gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp *re);
+
+/**
+ * Parse and return a single function argument for a function (may recurse)
+ * @param expr expression structure that represents function's argument
+ * @param task task object
+ * @param want_string return NULL if argument is not a string
+ * @return expression argument structure or NULL if failed
+ */
+struct expression_argument *get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string);
+
+#endif
diff --git a/src/libmime/filter.c b/src/libmime/filter.c
new file mode 100644
index 000000000..cb0630d9d
--- /dev/null
+++ b/src/libmime/filter.c
@@ -0,0 +1,1096 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "mem_pool.h"
+#include "filter.h"
+#include "main.h"
+#include "message.h"
+#include "cfg_file.h"
+#include "util.h"
+#include "expressions.h"
+#include "settings.h"
+#include "binlog.h"
+#include "diff.h"
+#include "classifiers/classifiers.h"
+#include "tokenizers/tokenizers.h"
+
+#ifdef WITH_LUA
+#   include "lua/lua_common.h"
+#endif
+
+#define COMMON_PART_FACTOR 95
+
+#ifndef PARAM_H_HAS_BITSET
+/* Bit map related macros. */
+#define NBBY    8               /* number of bits in a byte */
+#define setbit(a,i)     (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY))
+#define clrbit(a,i)     (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY)))
+#define isset(a,i)                                                      \
+        (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY)))
+#define isclr(a,i)                                                      \
+        ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
+#endif
+#define BITSPERBYTE	(8*sizeof (gchar))
+#define NBYTES(nbits)	(((nbits) + BITSPERBYTE - 1) / BITSPERBYTE)
+
+static inline                   GQuark
+filter_error_quark (void)
+{
+	return g_quark_from_static_string ("g-filter-error-quark");
+}
+
+static void
+insert_metric_result (struct rspamd_task *task, struct metric *metric, const gchar *symbol,
+		double flag, GList * opts, gboolean single)
+{
+	struct metric_result           *metric_res;
+	struct symbol                  *s;
+	gdouble                        *weight, w;
+
+	metric_res = g_hash_table_lookup (task->results, metric->name);
+
+	if (metric_res == NULL) {
+		/* Create new metric chain */
+		metric_res = rspamd_mempool_alloc (task->task_pool, sizeof (struct metric_result));
+		metric_res->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+		metric_res->checked = FALSE;
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, metric_res->symbols);
+		metric_res->metric = metric;
+		metric_res->grow_factor = 0;
+		metric_res->score = 0;
+		metric_res->domain_settings = NULL;
+		metric_res->user_settings = NULL;
+		apply_metric_settings (task, metric, metric_res);
+		g_hash_table_insert (task->results, (gpointer) metric->name, metric_res);
+	}
+	
+	weight = g_hash_table_lookup (metric->symbols, symbol);
+	if (weight == NULL) {
+		w = 0.0;
+	}
+	else {
+		w = (*weight) * flag;
+	}
+
+
+	/* Add metric score */
+	if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) {
+		if (s->options && opts && opts != s->options) {
+			/* Append new options */
+			s->options = g_list_concat (s->options, g_list_copy(opts));
+			/*
+			 * Note that there is no need to add new destructor of GList as elements of appended
+			 * GList are used directly, so just free initial GList
+			 */
+		}
+		else if (opts) {
+			s->options = g_list_copy (opts);
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options);
+		}
+		if (!single) {
+			/* Handle grow factor */
+			if (metric_res->grow_factor && w > 0) {
+				w *= metric_res->grow_factor;
+				metric_res->grow_factor *= metric->grow_factor;
+			}
+			s->score += w;
+			metric_res->score += w;
+		}
+		else {
+			if (fabs (s->score) < fabs (w)) {
+				/* Replace less weight with a bigger one */
+				metric_res->score = metric_res->score - s->score + w;
+				s->score = w;
+			}
+		}
+	}
+	else {
+		s = rspamd_mempool_alloc (task->task_pool, sizeof (struct symbol));
+
+		/* Handle grow factor */
+		if (metric_res->grow_factor && w > 0) {
+			w *= metric_res->grow_factor;
+			metric_res->grow_factor *= metric->grow_factor;
+		}
+		else if (w > 0) {
+			metric_res->grow_factor = metric->grow_factor;
+		}
+
+		s->score = w;
+		s->name = symbol;
+		metric_res->score += w;
+
+		if (opts) {
+			s->options = g_list_copy (opts);
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options);
+		}
+		else {
+			s->options = NULL;
+		}
+
+		g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s);
+	}
+	debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w);
+	
+}
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+static GStaticMutex result_mtx =  G_STATIC_MUTEX_INIT;
+#else
+G_LOCK_DEFINE (result_mtx);
+#endif
+
+static void
+insert_result_common (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts, gboolean single)
+{
+	struct metric                  *metric;
+	struct cache_item              *item;
+	GList                          *cur, *metric_list;
+
+	/* Avoid concurrenting inserting of results */
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+	g_static_mutex_lock (&result_mtx);
+#else
+	G_LOCK (result_mtx);
+#endif
+	metric_list = g_hash_table_lookup (task->cfg->metrics_symbols, symbol);
+	if (metric_list) {
+		cur = metric_list;
+		
+		while (cur) {
+			metric = cur->data;
+			insert_metric_result (task, metric, symbol, flag, opts, single);
+			cur = g_list_next (cur);
+		}
+	}
+	else {
+		/* Insert symbol to default metric */
+		insert_metric_result (task, task->cfg->default_metric, symbol, flag, opts, single);
+	}
+
+	/* Process cache item */
+	if (task->cfg->cache) {
+		item = g_hash_table_lookup (task->cfg->cache->items_by_symbol, symbol);
+		if (item != NULL) {
+			item->s->frequency++;
+		}
+	}
+
+	if (opts != NULL) {
+		/* XXX: it is not wise to destroy them here */
+		g_list_free (opts);
+	}
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+	g_static_mutex_unlock (&result_mtx);
+#else
+	G_UNLOCK (result_mtx);
+#endif
+}
+
+/* Insert result that may be increased on next insertions */
+void
+insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts)
+{
+	insert_result_common (task, symbol, flag, opts, task->cfg->one_shot_mode);
+}
+
+/* Insert result as a single option */
+void
+insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts)
+{
+	insert_result_common (task, symbol, flag, opts, TRUE);
+}
+
+/* Return true if metric has score that is more than spam score for it */
+static                          gboolean
+check_metric_is_spam (struct rspamd_task *task, struct metric *metric)
+{
+	struct metric_result           *res;
+	double                          ms, rs;
+
+	/* Avoid concurrency while checking results */
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+	g_static_mutex_lock (&result_mtx);
+#else
+	G_LOCK (result_mtx);
+#endif
+	res = g_hash_table_lookup (task->results, metric->name);
+	if (res) {
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+		g_static_mutex_unlock (&result_mtx);
+#else
+		G_UNLOCK (result_mtx);
+#endif
+		if (!check_metric_settings (res, &ms, &rs)) {
+			ms = metric->actions[METRIC_ACTION_REJECT].score;
+		}
+		return (ms > 0 && res->score >= ms);
+	}
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+	g_static_mutex_unlock (&result_mtx);
+#else
+	G_UNLOCK (result_mtx);
+#endif
+
+	return FALSE;
+}
+
+gint
+process_filters (struct rspamd_task *task)
+{
+	GList                          *cur;
+	struct metric                  *metric;
+	gpointer                        item = NULL;
+
+	/* Process metrics symbols */
+	while (call_symbol_callback (task, task->cfg->cache, &item)) {
+		/* Check reject actions */
+		cur = task->cfg->metrics_list;
+		while (cur) {
+			metric = cur->data;
+			if (!task->pass_all_filters &&
+						metric->actions[METRIC_ACTION_REJECT].score > 0 &&
+						check_metric_is_spam (task, metric)) {
+				task->state = WRITE_REPLY;
+				return 1;
+			}
+			cur = g_list_next (cur);
+		}
+	}
+
+	task->state = WAIT_FILTER;
+
+	return 1;
+}
+
+
+struct composites_data {
+	struct rspamd_task             *task;
+	struct metric_result           *metric_res;
+	GTree                          *symbols_to_remove;
+	guint8						   *checked;
+};
+
+struct symbol_remove_data {
+	struct symbol                  *ms;
+	gboolean                        remove_weight;
+	gboolean                        remove_symbol;
+};
+
+static gint
+remove_compare_data (gconstpointer a, gconstpointer b)
+{
+	const gchar                    *ca = a, *cb = b;
+
+	return strcmp (ca, cb);
+}
+
+static void
+composites_foreach_callback (gpointer key, gpointer value, void *data)
+{
+	struct composites_data         *cd = (struct composites_data *)data;
+	struct rspamd_composite        *composite = value, *ncomp;
+	struct expression              *expr;
+	GQueue                         *stack;
+	GList                          *symbols = NULL, *s;
+	gsize                           cur, op1, op2;
+	gchar                           logbuf[256], *sym, *check_sym;
+	gint                            r;
+	struct symbol                  *ms;
+	struct symbol_remove_data      *rd;
+
+
+	expr = composite->expr;
+	if (isset (cd->checked, composite->id)) {
+		/* Symbol was already checked */
+		return;
+	}
+
+	stack = g_queue_new ();
+
+	while (expr) {
+		if (expr->type == EXPR_STR) {
+			/* Find corresponding symbol */
+			sym = expr->content.operand;
+			if (*sym == '~' || *sym == '-') {
+				sym ++;
+			}
+			if (g_hash_table_lookup (cd->metric_res->symbols, sym) == NULL) {
+				cur = 0;
+				if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, sym)) != NULL) {
+					/* Set checked for this symbol to avoid cyclic references */
+					if (isclr (cd->checked, ncomp->id)) {
+						setbit (cd->checked, composite->id);
+						composites_foreach_callback (sym, ncomp, cd);
+						if (g_hash_table_lookup (cd->metric_res->symbols, sym) != NULL) {
+							cur = 1;
+						}
+					}
+				}
+			}
+			else {
+				cur = 1;
+				symbols = g_list_prepend (symbols, expr->content.operand);
+			}
+			g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
+		}
+		else {
+			if (g_queue_is_empty (stack)) {
+				/* Queue has no operands for operation, exiting */
+				g_list_free (symbols);
+				g_queue_free (stack);
+				setbit (cd->checked, composite->id);
+				return;
+			}
+			switch (expr->content.operation) {
+			case '!':
+				op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+				op1 = !op1;
+				g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
+				break;
+			case '&':
+				op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+				op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+				g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
+				break;
+			case '|':
+				op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+				op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+				g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
+				break;
+			default:
+				expr = expr->next;
+				continue;
+			}
+		}
+		expr = expr->next;
+	}
+	if (!g_queue_is_empty (stack)) {
+		op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+		if (op1) {
+			/* Remove all symbols that are in composite symbol */
+			s = g_list_first (symbols);
+			r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key);
+			while (s) {
+				sym = s->data;
+				if (*sym == '~' || *sym == '-') {
+					check_sym = sym + 1;
+				}
+				else {
+					check_sym = sym;
+				}
+				ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym);
+
+				if (ms == NULL) {
+					/* Try to process other composites */
+					if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, check_sym)) != NULL) {
+						/* Set checked for this symbol to avoid cyclic references */
+						if (isclr (cd->checked, ncomp->id)) {
+							setbit (cd->checked, composite->id);
+							composites_foreach_callback (check_sym, ncomp, cd);
+							ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym);
+						}
+					}
+				}
+
+				if (ms != NULL) {
+					rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (struct symbol_remove_data));
+					rd->ms = ms;
+					if (G_UNLIKELY (*sym == '~')) {
+						rd->remove_weight = FALSE;
+						rd->remove_symbol = TRUE;
+					}
+					else if (G_UNLIKELY (*sym == '-')) {
+						rd->remove_symbol = FALSE;
+						rd->remove_weight = FALSE;
+					}
+					else {
+						rd->remove_symbol = TRUE;
+						rd->remove_weight = TRUE;
+					}
+					if (!g_tree_lookup (cd->symbols_to_remove, rd)) {
+						g_tree_insert (cd->symbols_to_remove, (gpointer)ms->name, rd);
+					}
+				}
+				else {
+
+				}
+
+				if (s->next) {
+					r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data);
+				}
+				else {
+					r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s", s->data);
+				}
+				s = g_list_next (s);
+			}
+			/* Add new symbol */
+			insert_result_single (cd->task, key, 1.0, NULL);
+			msg_info ("%s", logbuf);
+		}
+	}
+
+	setbit (cd->checked, composite->id);
+	g_queue_free (stack);
+	g_list_free (symbols);
+
+	return;
+}
+
+static                          gboolean
+check_autolearn (struct statfile_autolearn_params *params, struct rspamd_task *task)
+{
+	gchar                          *metric_name = DEFAULT_METRIC;
+	struct metric_result           *metric_res;
+	GList                          *cur;
+
+	if (params->metric != NULL) {
+		metric_name = (gchar *)params->metric;
+	}
+
+	/* First check threshold */
+	metric_res = g_hash_table_lookup (task->results, metric_name);
+	if (metric_res == NULL) {
+		if (params->symbols == NULL && params->threshold_max > 0) {
+			/* For ham messages */
+			return TRUE;
+		}
+		debug_task ("metric %s has no results", metric_name);
+		return FALSE;
+	}
+	else {
+		/* Process score of metric */
+		if ((params->threshold_min != 0 && metric_res->score > params->threshold_min) || (params->threshold_max != 0 && metric_res->score < params->threshold_max)) {
+			/* Now check for specific symbols */
+			if (params->symbols) {
+				cur = params->symbols;
+				while (cur) {
+					if (g_hash_table_lookup (metric_res->symbols, cur->data) == NULL) {
+						return FALSE;
+					}
+					cur = g_list_next (cur);
+				}
+			}
+			/* Now allow processing of actual autolearn */
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+void
+process_autolearn (struct statfile *st, struct rspamd_task *task, GTree * tokens, struct classifier *classifier, gchar *filename, struct classifier_ctx *ctx)
+{
+	stat_file_t                    *statfile;
+	struct statfile                *unused;
+
+	if (check_autolearn (st->autolearn, task)) {
+		if (tokens) {
+			/* Take care of subject */
+			tokenize_subject (task, &tokens);
+			msg_info ("message with id <%s> autolearned statfile '%s'", task->message_id, filename);
+			
+			/* Get or create statfile */
+			statfile = get_statfile_by_symbol (task->worker->srv->statfile_pool, ctx->cfg,
+						st->symbol, &unused, TRUE);
+			
+			if (statfile == NULL) {
+				return;
+			}
+
+			classifier->learn_func (ctx, task->worker->srv->statfile_pool, st->symbol, tokens, TRUE, NULL, 1., NULL);
+			maybe_write_binlog (ctx->cfg, st, statfile, tokens);
+			statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, DEFAULT_STATFILE_INVALIDATE_TIME, DEFAULT_STATFILE_INVALIDATE_JITTER);
+		}
+	}
+}
+
+static gboolean
+composites_remove_symbols (gpointer key, gpointer value, gpointer data)
+{
+	struct composites_data         *cd = data;
+	struct symbol_remove_data      *rd = value;
+
+	if (rd->remove_symbol) {
+		g_hash_table_remove (cd->metric_res->symbols, key);
+	}
+	if (rd->remove_weight) {
+		cd->metric_res->score -= rd->ms->score;
+	}
+
+	return FALSE;
+}
+
+static void
+composites_metric_callback (gpointer key, gpointer value, gpointer data)
+{
+	struct rspamd_task             *task = (struct rspamd_task *)data;
+	struct composites_data         *cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
+	struct metric_result           *metric_res = (struct metric_result *)value;
+
+	cd->task = task;
+	cd->metric_res = (struct metric_result *)metric_res;
+	cd->symbols_to_remove = g_tree_new (remove_compare_data);
+	cd->checked = rspamd_mempool_alloc0 (task->task_pool, NBYTES (g_hash_table_size (task->cfg->composite_symbols)));
+
+	/* Process hash table */
+	g_hash_table_foreach (task->cfg->composite_symbols, composites_foreach_callback, cd);
+
+	/* Remove symbols that are in composites */
+	g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
+	/* Free list */
+	g_tree_destroy (cd->symbols_to_remove);
+}
+
+void
+make_composites (struct rspamd_task *task)
+{
+	g_hash_table_foreach (task->results, composites_metric_callback, task);
+}
+
+struct classifiers_cbdata {
+	struct rspamd_task *task;
+	struct lua_locked_state *nL;
+};
+
+static void
+classifiers_callback (gpointer value, void *arg)
+{
+	struct classifiers_cbdata	   *cbdata = arg;
+	struct rspamd_task             *task;
+	struct classifier_config       *cl = value;
+	struct classifier_ctx          *ctx;
+	struct mime_text_part          *text_part, *p1, *p2;
+	struct statfile                *st;
+	GTree                          *tokens = NULL;
+	GList                          *cur;
+	f_str_t                         c;
+	gchar                          *header = NULL;
+	gint                           *dist = NULL, diff;
+	gboolean                        is_twopart = FALSE;
+	
+	task = cbdata->task;
+
+	if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+		cur = message_get_header (task->task_pool, task->message, header, FALSE);
+		if (cur) {
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur);
+		}
+	}
+	else {
+		cur = g_list_first (task->text_parts);
+		dist =  rspamd_mempool_get_variable (task->task_pool, "parts_distance");
+		if (cur != NULL && cur->next != NULL && cur->next->next == NULL) {
+			is_twopart = TRUE;
+		}
+	}
+	ctx = cl->classifier->init_func (task->task_pool, cl);
+
+	if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) {
+		while (cur != NULL) {
+			if (header) {
+				c.len = strlen (cur->data);
+				if (c.len > 0) {
+					c.begin = cur->data;
+					if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, FALSE, NULL)) {
+						msg_info ("cannot tokenize input");
+						return;
+					}
+				}
+			}
+			else {
+				text_part = (struct mime_text_part *)cur->data;
+				if (text_part->is_empty) {
+					cur = g_list_next (cur);
+					continue;
+				}
+				if (dist != NULL && cur->next == NULL) {
+					/* Compare part's content */
+
+					if (*dist >= COMMON_PART_FACTOR) {
+						msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id);
+						break;
+					}
+				}
+				else if (cur->next == NULL && is_twopart) {
+					p1 = cur->prev->data;
+					p2 = text_part;
+					if (p1->diff_str != NULL && p2->diff_str != NULL) {
+						diff = compare_diff_distance (p1->diff_str, p2->diff_str);
+					}
+					else {
+						diff = fuzzy_compare_parts (p1, p2);
+					}
+					if (diff >= COMMON_PART_FACTOR) {
+						msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id);
+						break;
+					}
+				}
+				c.begin = (gchar *)text_part->content->data;
+				c.len = text_part->content->len;
+				/* Tree would be freed at task pool freeing */
+				if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens,
+						FALSE, text_part->is_utf, text_part->urls_offset)) {
+					msg_info ("cannot tokenize input");
+					return;
+				}
+			}
+			cur = g_list_next (cur);
+		}
+		g_hash_table_insert (task->tokens, cl->tokenizer, tokens);
+	}
+
+	/* Take care of subject */
+	tokenize_subject (task, &tokens);
+
+	if (tokens == NULL) {
+		return;
+	}
+
+	if (cbdata->nL != NULL) {
+		rspamd_mutex_lock (cbdata->nL->m);
+		cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, cbdata->nL->L);
+		rspamd_mutex_unlock (cbdata->nL->m);
+	}
+	else {
+		/* Non-threaded case */
+		cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, task->cfg->lua_state);
+	}
+
+	/* Autolearning */
+	cur = g_list_first (cl->statfiles);
+	while (cur) {
+		st = cur->data;
+		if (st->autolearn) {
+			if (check_autolearn (st->autolearn, task)) {
+				/* Process autolearn */
+				process_autolearn (st, task, tokens, cl->classifier, st->path, ctx);
+			}
+		}
+		cur = g_list_next (cur);
+	}
+}
+
+
+void
+process_statfiles (struct rspamd_task *task)
+{
+	struct classifiers_cbdata		cbdata;
+
+	if (task->is_skipped) {
+		return;
+	}
+
+	if (task->tokens == NULL) {
+		task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal);
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens);
+	}
+	cbdata.task = task;
+	cbdata.nL = NULL;
+	g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata);
+
+	/* Process results */
+	make_composites (task);
+}
+
+void
+process_statfiles_threaded (gpointer data, gpointer user_data)
+{
+	struct rspamd_task             *task = (struct rspamd_task *)data;
+	struct lua_locked_state 	   *nL = user_data;
+	struct classifiers_cbdata		cbdata;
+
+	if (task->is_skipped) {
+		remove_async_thread (task->s);
+		return;
+	}
+
+	if (task->tokens == NULL) {
+		task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal);
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens);
+	}
+
+	cbdata.task = task;
+	cbdata.nL = nL;
+	g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata);
+	remove_async_thread (task->s);
+}
+
+static void
+insert_metric_header (gpointer metric_name, gpointer metric_value, gpointer data)
+{
+#ifndef GLIB_HASH_COMPAT
+	struct rspamd_task             *task = (struct rspamd_task *)data;
+	gint                            r = 0;
+	/* Try to be rfc2822 compatible and avoid long headers with folding */
+	gchar                           header_name[128], outbuf[1000];
+	GList                          *symbols = NULL, *cur;
+	struct metric_result           *metric_res = (struct metric_result *)metric_value;
+	double                          ms, rs;
+
+	rspamd_snprintf (header_name, sizeof (header_name), "X-Spam-%s", metric_res->metric->name);
+
+	if (!check_metric_settings (metric_res, &ms, &rs)) {
+		ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score;
+	}
+	if (ms > 0 && metric_res->score >= ms) {
+		r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "yes; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs);
+	}
+	else {
+		r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "no; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs);
+	}
+
+	symbols = g_hash_table_get_keys (metric_res->symbols);
+	cur = symbols;
+	while (cur) {
+		if (g_list_next (cur) != NULL) {
+			r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s,", (gchar *)cur->data);
+		}
+		else {
+			r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s", (gchar *)cur->data);
+		}
+		cur = g_list_next (cur);
+	}
+	g_list_free (symbols);
+#ifdef GMIME24
+	g_mime_object_append_header (GMIME_OBJECT (task->message), header_name, outbuf);
+#else
+	g_mime_message_add_header (task->message, header_name, outbuf);
+#endif
+
+#endif /* GLIB_COMPAT */
+}
+
+void
+insert_headers (struct rspamd_task *task)
+{
+	g_hash_table_foreach (task->results, insert_metric_header, task);
+}
+
+gboolean
+check_action_str (const gchar *data, gint *result)
+{
+	if (g_ascii_strncasecmp (data, "reject", sizeof ("reject") - 1) == 0) {
+		*result = METRIC_ACTION_REJECT;
+	}
+	else if (g_ascii_strncasecmp (data, "greylist", sizeof ("greylist") - 1) == 0) {
+		*result = METRIC_ACTION_GREYLIST;
+	}
+	else if (g_ascii_strncasecmp (data, "add_header", sizeof ("add_header") - 1) == 0) {
+		*result = METRIC_ACTION_ADD_HEADER;
+	}
+	else if (g_ascii_strncasecmp (data, "rewrite_subject", sizeof ("rewrite_subject") - 1) == 0) {
+		*result = METRIC_ACTION_REWRITE_SUBJECT;
+	}
+	else {
+		return FALSE;
+	}
+	return TRUE;
+}
+
+const gchar *
+str_action_metric (enum rspamd_metric_action action)
+{
+	switch (action) {
+	case METRIC_ACTION_REJECT:
+		return "reject";
+	case METRIC_ACTION_SOFT_REJECT:
+		return "soft_reject";
+	case METRIC_ACTION_REWRITE_SUBJECT:
+		return "rewrite_subject";
+	case METRIC_ACTION_ADD_HEADER:
+		return "add_header";
+	case METRIC_ACTION_GREYLIST:
+		return "greylist";
+	case METRIC_ACTION_NOACTION:
+		return "no_action";
+	case METRIC_ACTION_MAX:
+		return "invalid max action";
+	}
+
+	return "unknown action";
+}
+
+gint
+check_metric_action (double score, double required_score, struct metric *metric)
+{
+	struct metric_action           *action, *selected_action = NULL;
+	double                          max_score = 0;
+	int                             i;
+
+	if (score >= required_score) {
+		return METRIC_ACTION_REJECT;
+	}
+	else if (metric->actions == NULL) {
+		return METRIC_ACTION_NOACTION;
+	}
+	else {
+		for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) {
+			action = &metric->actions[i];
+			if (action->score < 0) {
+				continue;
+			}
+			if (score >= action->score && action->score > max_score) {
+				selected_action = action;
+				max_score = action->score;
+			}
+		}
+		if (selected_action) {
+			return selected_action->action;
+		}
+		else {
+			return METRIC_ACTION_NOACTION;
+		}
+	}
+}
+
+gboolean
+learn_task (const gchar *statfile, struct rspamd_task *task, GError **err)
+{
+	GList                          *cur, *ex;
+	struct classifier_config       *cl;
+	struct classifier_ctx          *cls_ctx;
+	gchar                          *s;
+	f_str_t                         c;
+	GTree                          *tokens = NULL;
+	struct statfile                *st;
+	stat_file_t                    *stf;
+	gdouble                         sum;
+	struct mime_text_part          *part, *p1, *p2;
+	gboolean                        is_utf = FALSE, is_twopart = FALSE;
+	gint                            diff;
+
+
+	/* Load classifier by symbol */
+	cl = g_hash_table_lookup (task->cfg->classifiers_symbols, statfile);
+	if (cl == NULL) {
+		g_set_error (err, filter_error_quark(), 1, "Statfile %s is not configured in any classifier", statfile);
+		return FALSE;
+	}
+
+	/* If classifier has 'header' option just classify header of this type */
+	if ((s = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+		cur = message_get_header (task->task_pool, task->message, s, FALSE);
+		if (cur) {
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur);
+		}
+	}
+	else {
+		/* Classify message otherwise */
+		cur = g_list_first (task->text_parts);
+		if (cur != NULL && cur->next != NULL && cur->next->next == NULL) {
+			is_twopart = TRUE;
+		}
+	}
+
+	/* Get tokens from each element */
+	while (cur) {
+		if (s != NULL) {
+			c.len = strlen (cur->data);
+			c.begin = cur->data;
+			ex = NULL;
+		}
+		else {
+			part = cur->data;
+			/* Skip empty parts */
+			if (part->is_empty) {
+				cur = g_list_next (cur);
+				continue;
+			}
+			c.begin = (gchar *)part->content->data;
+			c.len = part->content->len;
+			is_utf = part->is_utf;
+			ex = part->urls_offset;
+			if (is_twopart && cur->next == NULL) {
+				/* Compare part's content */
+				p1 = cur->prev->data;
+				p2 = part;
+				if (p1->diff_str != NULL && p2->diff_str != NULL) {
+					diff = compare_diff_distance (p1->diff_str, p2->diff_str);
+				}
+				else {
+					diff = fuzzy_compare_parts (p1, p2);
+				}
+				if (diff >= COMMON_PART_FACTOR) {
+					msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id);
+					break;
+				}
+			}
+		}
+		/* Get tokens */
+		if (!cl->tokenizer->tokenize_func (
+				cl->tokenizer, task->task_pool,
+				&c, &tokens, FALSE, is_utf, ex)) {
+			g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message");
+			return FALSE;
+		}
+		cur = g_list_next (cur);
+	}
+
+	/* Handle messages without text */
+	if (tokens == NULL) {
+		g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data");
+		msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id);
+		return FALSE;
+	}
+
+	/* Take care of subject */
+	tokenize_subject (task, &tokens);
+
+	/* Init classifier */
+	cls_ctx = cl->classifier->init_func (
+			task->task_pool, cl);
+	/* Get or create statfile */
+	stf = get_statfile_by_symbol (task->worker->srv->statfile_pool,
+			cl, statfile, &st, TRUE);
+
+	/* Learn */
+	if (stf== NULL || !cl->classifier->learn_func (
+			cls_ctx, task->worker->srv->statfile_pool,
+			statfile, tokens, TRUE, &sum,
+			1.0, err)) {
+		if (*err) {
+			msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message);
+			return FALSE;
+		}
+		else {
+			g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error");
+			msg_info ("learn failed for message <%s>, unknown learn error", task->message_id);
+			return FALSE;
+		}
+	}
+	/* Increase statistics */
+	task->worker->srv->stat->messages_learned++;
+
+	maybe_write_binlog (cl, st, stf, tokens);
+	msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f",
+			task->message_id, statfile, sum);
+	statfile_pool_plan_invalidate (task->worker->srv->statfile_pool,
+			DEFAULT_STATFILE_INVALIDATE_TIME,
+			DEFAULT_STATFILE_INVALIDATE_JITTER);
+
+	return TRUE;
+}
+
+gboolean
+learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err)
+{
+	GList                          *cur, *ex;
+	struct classifier_ctx          *cls_ctx;
+	f_str_t                         c;
+	GTree                          *tokens = NULL;
+	struct mime_text_part          *part, *p1, *p2;
+	gboolean                        is_utf = FALSE, is_twopart = FALSE;
+	gint                            diff;
+
+	cur = g_list_first (task->text_parts);
+	if (cur != NULL && cur->next != NULL && cur->next->next == NULL) {
+		is_twopart = TRUE;
+	}
+
+	/* Get tokens from each element */
+	while (cur) {
+		part = cur->data;
+		/* Skip empty parts */
+		if (part->is_empty) {
+			cur = g_list_next (cur);
+			continue;
+		}
+		c.begin = (gchar *)part->content->data;
+		c.len = part->content->len;
+		is_utf = part->is_utf;
+		ex = part->urls_offset;
+		if (is_twopart && cur->next == NULL) {
+			/*
+			 * Compare part's content
+			 * Note: here we don't have filters proceeded this message, so using pool variable is a bad idea
+			 */
+			p1 = cur->prev->data;
+			p2 = part;
+			if (p1->diff_str != NULL && p2->diff_str != NULL) {
+				diff = compare_diff_distance (p1->diff_str, p2->diff_str);
+			}
+			else {
+				diff = fuzzy_compare_parts (p1, p2);
+			}
+			if (diff >= COMMON_PART_FACTOR) {
+				msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id);
+				break;
+			}
+		}
+		/* Get tokens */
+		if (!cl->tokenizer->tokenize_func (
+				cl->tokenizer, task->task_pool,
+				&c, &tokens, FALSE, is_utf, ex)) {
+			g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message");
+			return FALSE;
+		}
+		cur = g_list_next (cur);
+	}
+
+	/* Handle messages without text */
+	if (tokens == NULL) {
+		g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data");
+		msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id);
+		return FALSE;
+	}
+
+	/* Take care of subject */
+	tokenize_subject (task, &tokens);
+
+	/* Init classifier */
+	cls_ctx = cl->classifier->init_func (
+			task->task_pool, cl);
+	/* Learn */
+	if (!cl->classifier->learn_spam_func (
+			cls_ctx, task->worker->srv->statfile_pool,
+			tokens, task, is_spam, task->cfg->lua_state, err)) {
+		if (*err) {
+			msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message);
+			return FALSE;
+		}
+		else {
+			g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error");
+			msg_info ("learn failed for message <%s>, unknown learn error", task->message_id);
+			return FALSE;
+		}
+	}
+	/* Increase statistics */
+	task->worker->srv->stat->messages_learned++;
+
+	msg_info ("learn success for message <%s>",
+			task->message_id);
+	statfile_pool_plan_invalidate (task->worker->srv->statfile_pool,
+			DEFAULT_STATFILE_INVALIDATE_TIME,
+			DEFAULT_STATFILE_INVALIDATE_JITTER);
+
+	return TRUE;
+}
+
+/* 
+ * vi:ts=4 
+ */
diff --git a/src/libmime/filter.h b/src/libmime/filter.h
new file mode 100644
index 000000000..258bd9447
--- /dev/null
+++ b/src/libmime/filter.h
@@ -0,0 +1,167 @@
+/**
+ * @file filter.h
+ * Filters logic implemetation
+ */
+
+#ifndef RSPAMD_FILTER_H
+#define RSPAMD_FILTER_H
+
+#include "config.h"
+#include "symbols_cache.h"
+#include "task.h"
+
+struct rspamd_task;
+struct rspamd_settings;
+struct classifier_config;
+
+typedef double (*metric_cons_func)(struct rspamd_task *task, const gchar *metric_name, const gchar *func_name);
+typedef void (*filter_func)(struct rspamd_task *task);
+
+enum filter_type { C_FILTER, PERL_FILTER };
+
+/**
+ * Filter structure
+ */
+struct filter {
+	gchar *func_name;								/**< function name							*/
+	enum filter_type type;							/**< filter type (c or perl)				*/
+    module_t *module;
+};
+
+/**
+ * Rspamd symbol
+ */
+struct symbol {
+	double score;									/**< symbol's score							*/
+	GList *options;									/**< list of symbol's options				*/
+	const gchar *name;
+};
+
+struct metric_action {
+	enum rspamd_metric_action action;
+	gdouble score;
+};
+
+/**
+ * Common definition of metric
+ */
+struct metric {
+	const gchar *name;								/**< name of metric									*/
+	gchar *func_name;								/**< name of consolidation function					*/
+	metric_cons_func func;							/**< c consolidation function						*/
+	double grow_factor;								/**< grow factor for metric							*/
+	GHashTable *symbols;							/**< weights of symbols in metric					*/
+	GHashTable *descriptions;						/**< descriptions of symbols in metric				*/
+	struct metric_action actions[METRIC_ACTION_MAX]; /**< all actions of the metric					*/
+	gchar *subject;									/**< subject rewrite string							*/
+};
+
+/**
+ * Result of metric processing
+ */
+struct metric_result {
+	struct metric *metric;							/**< pointer to metric structure			*/
+	double score;									/**< total score							*/
+	GHashTable *symbols;							/**< symbols of metric						*/
+	gboolean checked;								/**< whether metric result is consolidated  */
+	double grow_factor;								/**< current grow factor					*/
+	struct rspamd_settings *user_settings;			/**< settings for metric					*/
+	struct rspamd_settings *domain_settings;		/**< settings for metric					*/
+};
+
+/**
+ * Composite structure
+ */
+struct rspamd_composite {
+	struct expression *expr;
+	gint id;
+};
+
+/**
+ * Process all filters
+ * @param task worker's task that present message from user
+ * @return 0 - if there is non-finished tasks and 1 if processing is completed
+ */
+gint process_filters (struct rspamd_task *task);
+
+/**
+ * Process message with statfiles
+ * @param task worker's task that present message from user
+ */
+void process_statfiles (struct rspamd_task *task);
+
+/**
+ * Process message with statfiles threaded
+ * @param data worker's task that present message from user
+ */
+void process_statfiles_threaded (gpointer data, gpointer user_data);
+
+/**
+ * Insert a result to task
+ * @param task worker's task that present message from user
+ * @param metric_name metric's name to which we need to insert result
+ * @param symbol symbol to insert
+ * @param flag numeric weight for symbol
+ * @param opts list of symbol's options
+ */
+void insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts);
+
+/**
+ * Insert a single result to task
+ * @param task worker's task that present message from user
+ * @param metric_name metric's name to which we need to insert result
+ * @param symbol symbol to insert
+ * @param flag numeric weight for symbol
+ * @param opts list of symbol's options
+ */
+void insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts);
+
+/**
+ * Process all results and form composite metrics from existent metrics as it is defined in config
+ * @param task worker's task that present message from user
+ */
+void make_composites (struct rspamd_task *task);
+
+/**
+ * Default consolidation function for metric, it get all symbols and multiply symbol 
+ * weight by some factor that is specified in config. Default factor is 1.
+ * @param task worker's task that present message from user
+ * @param metric_name name of metric
+ * @return result metric weight
+ */
+double factor_consolidation_func (struct rspamd_task *task, const gchar *metric_name, const gchar *unused);
+
+/*
+ * Learn specified statfile with message in a task
+ * @param statfile symbol of statfile
+ * @param task worker's task object
+ * @param err pointer to GError
+ * @return true if learn succeed
+ */
+gboolean learn_task (const gchar *statfile, struct rspamd_task *task, GError **err);
+
+/*
+ * Learn specified statfile with message in a task
+ * @param statfile symbol of statfile
+ * @param task worker's task object
+ * @param err pointer to GError
+ * @return true if learn succeed
+ */
+gboolean learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err);
+
+/*
+ * Get action from a string
+ */
+gboolean check_action_str (const gchar *data, gint *result);
+
+/*
+ * Return textual representation of action enumeration
+ */
+const gchar *str_action_metric (enum rspamd_metric_action action);
+
+/*
+ * Get action for specific metric
+ */
+gint check_metric_action (double score, double required_score, struct metric *metric);
+
+#endif
diff --git a/src/libmime/images.c b/src/libmime/images.c
new file mode 100644
index 000000000..ff07bbd72
--- /dev/null
+++ b/src/libmime/images.c
@@ -0,0 +1,255 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *       * Redistributions of source code must retain the above copyright
+ *         notice, this list of conditions and the following disclaimer.
+ *       * Redistributions in binary form must reproduce the above copyright
+ *         notice, this list of conditions and the following disclaimer in the
+ *         documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "images.h"
+#include "main.h"
+#include "message.h"
+
+static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
+static const guint8 jpg_sig1[] = {0xff, 0xd8};
+static const guint8 jpg_sig2[] = {'J', 'F', 'I', 'F'};
+static const guint8 gif_signature[] = {'G', 'I', 'F', '8'};
+static const guint8 bmp_signature[] = {'B', 'M'};
+
+static void                     process_image (struct rspamd_task *task, struct mime_part *part);
+
+
+void
+process_images (struct rspamd_task *task)
+{
+	GList                          *cur;
+	struct mime_part			   *part;
+
+	cur = task->parts;
+	while (cur) {
+		part = cur->data;
+		if (g_mime_content_type_is_type (part->type, "image", "*") && part->content->len > 0) {
+			process_image (task, part);
+		}
+		cur = g_list_next (cur);
+	}
+
+}
+
+static enum known_image_types
+detect_image_type (GByteArray *data)
+{
+	if (data->len > sizeof (png_signature) / sizeof (png_signature[0])) {
+		if (memcmp (data->data, png_signature, sizeof (png_signature)) == 0) {
+			return IMAGE_TYPE_PNG;
+		}
+	}
+	if (data->len > 10) {
+		if (memcmp (data->data, jpg_sig1, sizeof (jpg_sig1)) == 0) {
+			if (memcmp (data->data + 6, jpg_sig2, sizeof (jpg_sig2)) == 0) {
+				return IMAGE_TYPE_JPG;
+			}
+		}
+	}
+	if (data->len > sizeof (gif_signature) / sizeof (gif_signature[0])) {
+		if (memcmp (data->data, gif_signature, sizeof (gif_signature)) == 0) {
+			return IMAGE_TYPE_GIF;
+		}
+	}
+	if (data->len > sizeof (bmp_signature) / sizeof (bmp_signature[0])) {
+		if (memcmp (data->data, bmp_signature, sizeof (bmp_signature)) == 0) {
+			return IMAGE_TYPE_BMP;
+		}
+	}
+
+	return IMAGE_TYPE_UNKNOWN;
+}
+
+
+static struct rspamd_image *
+process_png_image (struct rspamd_task *task, GByteArray *data)
+{
+	struct rspamd_image             *img;
+	guint32                          t;
+	guint8                          *p;
+
+	if (data->len < 24) {
+		msg_info ("bad png detected (maybe striped): <%s>", task->message_id);
+		return NULL;
+	}
+
+	/* In png we should find iHDR section and get data from it */
+	/* Skip signature and read header section */
+	p = data->data + 12;
+	if (memcmp (p, "IHDR", 4) != 0) {
+		msg_info ("png doesn't begins with IHDR section", task->message_id);
+		return NULL;
+	}
+
+	img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image));
+	img->type = IMAGE_TYPE_PNG;
+	img->data = data;
+
+	p += 4;
+	memcpy (&t, p, sizeof (guint32));
+	img->width = ntohl (t);
+	p += 4;
+	memcpy (&t, p, sizeof (guint32));
+	img->height = ntohl (t);
+
+	return img;
+}
+
+static struct rspamd_image *
+process_jpg_image (struct rspamd_task *task, GByteArray *data)
+{
+	guint8                          *p;
+	guint16                          t;
+	gsize                            remain;
+	struct rspamd_image             *img;
+
+	img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image));
+	img->type = IMAGE_TYPE_JPG;
+	img->data = data;
+
+	p = data->data;
+	remain = data->len;
+	/* In jpeg we should find any data stream (ff c0 .. ff c3) and extract its height and width */
+	while (remain --) {
+		if (*p == 0xFF && remain > 8 && (*(p + 1) >= 0xC0 && *(p + 1) <= 0xC3)) {
+			memcpy (&t, p + 5, sizeof (guint16));
+			img->height = ntohs (t);
+			memcpy (&t, p + 7, sizeof (guint16));
+			img->width = ntohs (t);
+			return img;
+		}
+		p ++;
+	}
+
+	return NULL;
+}
+
+static struct rspamd_image *
+process_gif_image (struct rspamd_task *task, GByteArray *data)
+{
+	struct rspamd_image             *img;
+	guint8                          *p;
+	guint16                          t;
+
+	if (data->len < 10) {
+		msg_info ("bad gif detected (maybe striped): <%s>", task->message_id);
+		return NULL;
+	}
+
+	img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image));
+	img->type = IMAGE_TYPE_GIF;
+	img->data = data;
+
+	p = data->data + 6;
+	memcpy (&t, p, sizeof (guint16));
+	img->width = GUINT16_FROM_LE (t);
+	memcpy (&t, p + 2, sizeof (guint16));
+	img->height = GUINT16_FROM_LE (t);
+
+	return img;
+}
+
+static struct rspamd_image *
+process_bmp_image (struct rspamd_task *task, GByteArray *data)
+{
+	struct rspamd_image             *img;
+	gint32                           t;
+	guint8                          *p;
+
+
+
+	if (data->len < 28) {
+		msg_info ("bad bmp detected (maybe striped): <%s>", task->message_id);
+		return NULL;
+	}
+
+	img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image));
+	img->type = IMAGE_TYPE_BMP;
+	img->data = data;
+	p = data->data + 18;
+	memcpy (&t, p, sizeof (gint32));
+	img->width = abs (GINT32_FROM_LE (t));
+	memcpy (&t, p + 4, sizeof (gint32));
+	img->height = abs (GINT32_FROM_LE (t));
+
+	return img;
+}
+
+static void
+process_image (struct rspamd_task *task, struct mime_part *part)
+{
+	enum known_image_types          type;
+	struct rspamd_image            *img = NULL;
+	if ((type = detect_image_type (part->content)) != IMAGE_TYPE_UNKNOWN) {
+		switch (type) {
+		case IMAGE_TYPE_PNG:
+			img = process_png_image (task, part->content);
+			break;
+		case IMAGE_TYPE_JPG:
+			img = process_jpg_image (task, part->content);
+			break;
+		case IMAGE_TYPE_GIF:
+			img = process_gif_image (task, part->content);
+			break;
+		case IMAGE_TYPE_BMP:
+			img = process_bmp_image (task, part->content);
+			break;
+		default:
+			img = NULL;
+			break;
+		}
+	}
+
+	if (img != NULL) {
+		debug_task ("detected %s image of size %ud x %ud in message <%s>",
+				image_type_str (img->type),
+				img->width, img->height,
+				task->message_id);
+		img->filename = part->filename;
+		task->images = g_list_prepend (task->images, img);
+	}
+}
+
+const gchar *
+image_type_str (enum known_image_types type)
+{
+	switch (type) {
+	case IMAGE_TYPE_PNG:
+		return "PNG";
+		break;
+	case IMAGE_TYPE_JPG:
+		return "JPEG";
+		break;
+	case IMAGE_TYPE_GIF:
+		return "GIF";
+		break;
+	case IMAGE_TYPE_BMP:
+		return "BMP";
+		break;
+	default:
+		return "unknown";
+	}
+
+	return "unknown";
+}
diff --git a/src/libmime/images.h b/src/libmime/images.h
new file mode 100644
index 000000000..c43941ebc
--- /dev/null
+++ b/src/libmime/images.h
@@ -0,0 +1,33 @@
+#ifndef IMAGES_H_
+#define IMAGES_H_
+
+#include "config.h"
+#include "main.h"
+
+enum known_image_types {
+	IMAGE_TYPE_PNG,
+	IMAGE_TYPE_JPG,
+	IMAGE_TYPE_GIF,
+	IMAGE_TYPE_BMP,
+	IMAGE_TYPE_UNKNOWN = 9000
+};
+
+struct rspamd_image {
+	enum known_image_types type;
+	GByteArray *data;
+	guint32 width;
+	guint32 height;
+	const gchar *filename;
+};
+
+/*
+ * Process images from a worker task
+ */
+void process_images (struct rspamd_task *task);
+
+/*
+ * Get textual representation of an image's type
+ */
+const gchar *image_type_str (enum known_image_types type);
+
+#endif /* IMAGES_H_ */
diff --git a/src/libmime/message.c b/src/libmime/message.c
new file mode 100644
index 000000000..4567869e9
--- /dev/null
+++ b/src/libmime/message.c
@@ -0,0 +1,1764 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "main.h"
+#include "message.h"
+#include "cfg_file.h"
+#include "html.h"
+#include "images.h"
+
+#define RECURSION_LIMIT 30
+#define UTF8_CHARSET "UTF-8"
+
+GByteArray                     *
+strip_html_tags (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, GByteArray * src, gint *stateptr)
+{
+	uint8_t                        *p, *rp, *tbegin = NULL, *end, c, lc;
+	gint                            br, i = 0, depth = 0, in_q = 0;
+	gint                            state = 0;
+	GByteArray                     *buf;
+	GNode                          *level_ptr = NULL;
+	gboolean                        erase = FALSE;
+
+	if (stateptr)
+		state = *stateptr;
+
+	buf = g_byte_array_sized_new (src->len);
+	g_byte_array_append (buf, src->data, src->len);
+
+	c = *src->data;
+	lc = '\0';
+	p = src->data;
+	rp = buf->data;
+	end = src->data + src->len;
+	br = 0;
+
+	while (i < (gint)src->len) {
+		switch (c) {
+		case '\0':
+			break;
+		case '<':
+			if (g_ascii_isspace (*(p + 1))) {
+				goto reg_char;
+			}
+			if (state == 0) {
+				lc = '<';
+				tbegin = p + 1;
+				state = 1;
+			}
+			else if (state == 1) {
+				/* Opening bracket without closing one */
+				p --;
+				while (g_ascii_isspace (*p) && p > src->data) {
+					p --;
+				}
+				p ++;
+				goto unbreak_tag;
+			}
+			break;
+
+		case '(':
+			if (state == 2) {
+				if (lc != '"' && lc != '\'') {
+					lc = '(';
+					br++;
+				}
+			}
+			else if (state == 0 && !erase) {
+				*(rp++) = c;
+			}
+			break;
+
+		case ')':
+			if (state == 2) {
+				if (lc != '"' && lc != '\'') {
+					lc = ')';
+					br--;
+				}
+			}
+			else if (state == 0 && !erase) {
+				*(rp++) = c;
+			}
+			break;
+
+		case '>':
+			if (depth) {
+				depth--;
+				break;
+			}
+
+			if (in_q) {
+				break;
+			}
+unbreak_tag:
+			switch (state) {
+			case 1:			/* HTML/XML */
+				lc = '>';
+				in_q = state = 0;
+				erase = !add_html_node (task, pool, part, tbegin, p - tbegin, end - tbegin, &level_ptr);
+				break;
+
+			case 2:			/* PHP */
+				if (!br && lc != '\"' && *(p - 1) == '?') {
+					in_q = state = 0;
+				}
+				break;
+
+			case 3:
+				in_q = state = 0;
+				break;
+
+			case 4:			/* JavaScript/CSS/etc... */
+				if (p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '-') {
+					in_q = state = 0;
+				}
+				break;
+
+			default:
+				if (!erase) {
+					*(rp++) = c;
+				}
+				break;
+			}
+			break;
+
+		case '"':
+		case '\'':
+			if (state == 2 && *(p - 1) != '\\') {
+				if (lc == c) {
+					lc = '\0';
+				}
+				else if (lc != '\\') {
+					lc = c;
+				}
+			}
+			else if (state == 0 && !erase) {
+				*(rp++) = c;
+			}
+			if (state && p != src->data && *(p - 1) != '\\' && (!in_q || *p == in_q)) {
+				if (in_q) {
+					in_q = 0;
+				}
+				else {
+					in_q = *p;
+				}
+			}
+			break;
+
+		case '!':
+			/* JavaScript & Other HTML scripting languages */
+			if (state == 1 && *(p - 1) == '<') {
+				state = 3;
+				lc = c;
+			}
+			else {
+				if (state == 0 && !erase) {
+					*(rp++) = c;
+				}
+			}
+			break;
+
+		case '-':
+			if (state == 3 && p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '!') {
+				state = 4;
+			}
+			else {
+				goto reg_char;
+			}
+			break;
+
+		case '?':
+
+			if (state == 1 && *(p - 1) == '<') {
+				br = 0;
+				state = 2;
+				break;
+			}
+
+		case 'E':
+		case 'e':
+			/* !DOCTYPE exception */
+			if (state == 3 && p > src->data + 6
+				&& g_ascii_tolower (*(p - 1)) == 'p'
+				&& g_ascii_tolower (*(p - 2)) == 'y'
+				&& g_ascii_tolower (*(p - 3)) == 't' && g_ascii_tolower (*(p - 4)) == 'c' && g_ascii_tolower (*(p - 5)) == 'o' && g_ascii_tolower (*(p - 6)) == 'd') {
+				state = 1;
+				break;
+			}
+			/* fall-through */
+
+		case 'l':
+
+			/* swm: If we encounter '<?xml' then we shouldn't be in
+			 * state == 2 (PHP). Switch back to HTML.
+			 */
+
+			if (state == 2 && p > src->data + 2 && *(p - 1) == 'm' && *(p - 2) == 'x') {
+				state = 1;
+				break;
+			}
+
+			/* fall-through */
+		default:
+		  reg_char:
+			if (state == 0 && !erase) {
+				*(rp++) = c;
+			}
+			break;
+		}
+		i++;
+		if (i < (gint)src->len) {
+			c = *(++p);
+		}
+	}
+	if (rp < buf->data + src->len) {
+		*rp = '\0';
+		g_byte_array_set_size (buf, rp - buf->data);
+	}
+
+	/* Check tag balancing */
+	if (level_ptr && level_ptr->data != NULL) {
+		part->is_balanced = FALSE;
+	}
+
+	if (stateptr) {
+		*stateptr = state;
+	}
+
+	return buf;
+}
+
+static void
+parse_qmail_recv (rspamd_mempool_t * pool, gchar *line, struct received_header *r)
+{
+	gchar                           *s, *p, t;
+
+	/* We are interested only with received from network headers */
+	if ((p = strstr (line, "from network")) == NULL) {
+		r->is_error = 2;
+		return;
+	}
+
+	p += sizeof ("from network") - 1;
+	while (g_ascii_isspace (*p) || *p == '[') {
+		p++;
+	}
+	/* format is ip/host */
+	s = p;
+	if (*p) {
+		while (g_ascii_isdigit (*++p) || *p == '.');
+		if (*p != '/') {
+			r->is_error = 1;
+			return;
+		}
+		else {
+			*p = '\0';
+			r->real_ip = rspamd_mempool_strdup (pool, s);
+			*p = '/';
+			/* Now try to parse hostname */
+			s = ++p;
+			while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
+				p++;
+			}
+			t = *p;
+			*p = '\0';
+			r->real_hostname = rspamd_mempool_strdup (pool, s);
+			*p = t;
+		}
+	}
+}
+
+static void
+parse_recv_header (rspamd_mempool_t * pool, gchar *line, struct received_header *r)
+{
+	gchar                           *p, *s, t, **res = NULL;
+	enum {
+		RSPAMD_RECV_STATE_INIT = 0,
+		RSPAMD_RECV_STATE_FROM,
+		RSPAMD_RECV_STATE_IP_BLOCK,
+		RSPAMD_RECV_STATE_BRACES_BLOCK,
+		RSPAMD_RECV_STATE_BY_BLOCK,
+		RSPAMD_RECV_STATE_PARSE_IP,
+		RSPAMD_RECV_STATE_SKIP_SPACES,
+		RSPAMD_RECV_STATE_ERROR
+	}                               state = RSPAMD_RECV_STATE_INIT,
+	                                next_state = RSPAMD_RECV_STATE_INIT;
+	gboolean                        is_exim = FALSE;
+
+	g_strstrip (line);
+	p = line;
+	s = line;
+
+	while (*p) {
+		switch (state) {
+			/* Initial state, search for from */
+		case RSPAMD_RECV_STATE_INIT:
+			if (*p == 'f' || *p == 'F') {
+				if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) == 'o' && g_ascii_tolower (*++p) == 'm') {
+					p++;
+					state = RSPAMD_RECV_STATE_SKIP_SPACES;
+					next_state = RSPAMD_RECV_STATE_FROM;
+				}
+			}
+			else if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') {
+				state = RSPAMD_RECV_STATE_IP_BLOCK;
+			}
+			else {
+				/* This can be qmail header, parse it separately */
+				parse_qmail_recv (pool, line, r);
+				return;
+			}
+			break;
+			/* Read hostname */
+		case RSPAMD_RECV_STATE_FROM:
+			if (*p == '[') {
+				/* This should be IP address */
+				res = &r->from_ip;
+				state = RSPAMD_RECV_STATE_PARSE_IP;
+				next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+				s = ++p;
+			}
+			else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
+				p++;
+			}
+			else {
+				t = *p;
+				*p = '\0';
+				r->from_hostname = rspamd_mempool_strdup (pool, s);
+				*p = t;
+				state = RSPAMD_RECV_STATE_SKIP_SPACES;
+				next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+			}
+			break;
+			/* Try to extract additional info */
+		case RSPAMD_RECV_STATE_IP_BLOCK:
+			/* Try to extract ip or () info or by */
+			if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') {
+				p += 2;
+				/* Skip spaces after by */
+				state = RSPAMD_RECV_STATE_SKIP_SPACES;
+				next_state = RSPAMD_RECV_STATE_BY_BLOCK;
+			}
+			else if (*p == '(') {
+				state = RSPAMD_RECV_STATE_SKIP_SPACES;
+				next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+				p++;
+			}
+			else if (*p == '[') {
+				/* Got ip before '(' so extract it */
+				s = ++p;
+				res = &r->from_ip;
+				state = RSPAMD_RECV_STATE_PARSE_IP;
+				next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+			}
+			else {
+				p++;
+			}
+			break;
+			/* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */
+		case RSPAMD_RECV_STATE_BRACES_BLOCK:
+			/* End of block */
+			if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' ||
+					*p == '_' || *p == ':') {
+				p++;
+			}
+			else if (*p == '[') {
+				s = ++p;
+				state = RSPAMD_RECV_STATE_PARSE_IP;
+				res = &r->real_ip;
+				next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+			}
+			else {
+				if (p > s) {
+					/* Got some real hostname */
+					/* check whether it is helo or p is not space symbol */
+					if (!g_ascii_isspace (*p) || *(p + 1) != '[') {
+						/* Exim style ([ip]:port helo=hostname) */
+						if (*s == ':' && (g_ascii_isspace (*p) || *p == ')')) {
+							/* Ip ending */
+							is_exim = TRUE;
+							state = RSPAMD_RECV_STATE_SKIP_SPACES;
+							next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+						}
+						else if (p - s == 4 && memcmp (s, "helo=", 5) == 0) {
+							p ++;
+							is_exim = TRUE;
+							if (r->real_hostname == NULL && r->from_hostname != NULL) {
+								r->real_hostname = r->from_hostname;
+							}
+							s = p;
+							while (*p != ')' && !g_ascii_isspace (*p) && *p != '\0') {
+								p ++;
+							}
+							if (p > s) {
+								r->from_hostname = rspamd_mempool_alloc (pool, p - s + 1);
+								rspamd_strlcpy (r->from_hostname, s, p - s + 1);
+							}
+						}
+						else if (p - s == 4 && memcmp (s, "port=", 5) == 0) {
+							p ++;
+							is_exim = TRUE;
+							while (g_ascii_isdigit (*p)) {
+								p ++;
+							}
+							state = RSPAMD_RECV_STATE_SKIP_SPACES;
+							next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+						}
+						else if (*p == '=' && is_exim) {
+							/* Just skip unknown pairs */
+							p ++;
+							while (!g_ascii_isspace (*p) && *p != ')' && *p != '\0') {
+								p ++;
+							}
+							state = RSPAMD_RECV_STATE_SKIP_SPACES;
+							next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+						}
+						else {
+							/* skip all  */
+							while (*p++ != ')' && *p != '\0');
+							state = RSPAMD_RECV_STATE_IP_BLOCK;
+						}
+					}
+					else {
+						/* Postfix style (hostname [ip]) */
+						t = *p;
+						*p = '\0';
+						r->real_hostname = rspamd_mempool_strdup (pool, s);
+						*p = t;
+						/* Now parse ip */
+						p += 2;
+						s = p;
+						res = &r->real_ip;
+						state = RSPAMD_RECV_STATE_PARSE_IP;
+						next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+						continue;
+					}
+					if (*p == ')') {
+						p ++;
+						state = RSPAMD_RECV_STATE_SKIP_SPACES;
+						next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+					}
+				}
+				else if (*p == ')') {
+					p ++;
+					state = RSPAMD_RECV_STATE_SKIP_SPACES;
+					next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+				}
+				else {
+					r->is_error = 1;
+					return;
+				}
+			}
+			break;
+			/* Got by word */
+		case RSPAMD_RECV_STATE_BY_BLOCK:
+			/* Here can be only hostname */
+			if ((g_ascii_isalnum (*p) || *p == '.' || *p == '-'
+					|| *p == '_') && p[1] != '\0') {
+				p++;
+			}
+			else {
+				/* We got something like hostname */
+				if (p[1] != '\0') {
+					t = *p;
+					*p = '\0';
+					r->by_hostname = rspamd_mempool_strdup (pool, s);
+					*p = t;
+				}
+				else {
+					r->by_hostname = rspamd_mempool_strdup (pool, s);
+				}
+				/* Now end of parsing */
+				if (is_exim) {
+					/* Adjust for exim received */
+					if (r->real_ip == NULL && r->from_ip != NULL) {
+						r->real_ip = r->from_ip;
+					}
+					else if (r->from_ip == NULL && r->real_ip != NULL) {
+						r->from_ip = r->real_ip;
+						if (r->real_hostname == NULL && r->from_hostname != NULL) {
+							r->real_hostname = r->from_hostname;
+						}
+					}
+				}
+				return;
+			}
+			break;
+
+			/* Extract ip */
+		case RSPAMD_RECV_STATE_PARSE_IP:
+			while (g_ascii_isxdigit (*p) || *p == '.' || *p == ':') {
+				p ++;
+			}
+			if (*p != ']') {
+				/* Not an ip in fact */
+				state = RSPAMD_RECV_STATE_SKIP_SPACES;
+				p++;
+			}
+			else {
+				*p = '\0';
+				*res = rspamd_mempool_strdup (pool, s);
+				*p = ']';
+				p++;
+				state = RSPAMD_RECV_STATE_SKIP_SPACES;
+			}
+			break;
+
+			/* Skip spaces */
+		case RSPAMD_RECV_STATE_SKIP_SPACES:
+			if (!g_ascii_isspace (*p)) {
+				state = next_state;
+				s = p;
+			}
+			else {
+				p++;
+			}
+			break;
+		default:
+			r->is_error = 1;
+			return;
+			break;
+		}
+	}
+
+	r->is_error = 1;
+	return;
+}
+
+/* Convert raw headers to a list of struct raw_header * */
+static void
+process_raw_headers (struct rspamd_task *task)
+{
+	struct raw_header              *new = NULL, *lp;
+	gchar                          *p, *c, *tmp, *tp;
+	gint                            state = 0, l, next_state = 100, err_state = 100, t_state;
+	gboolean                        valid_folding = FALSE;
+
+	p = task->raw_headers_str;
+	c = p;
+	while (*p) {
+		/* FSM for processing headers */
+		switch (state) {
+		case 0:
+			/* Begin processing headers */
+			if (!g_ascii_isalpha (*p)) {
+				/* We have some garbage at the beginning of headers, skip this line */
+				state = 100;
+				next_state = 0;
+			}
+			else {
+				state = 1;
+				c = p;
+			}
+			break;
+		case 1:
+			/* We got something like header's name */
+			if (*p == ':') {
+				new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct raw_header));
+				l = p - c;
+				tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+				rspamd_strlcpy (tmp, c, l + 1);
+				new->name = tmp;
+				new->empty_separator = TRUE;
+				p ++;
+				state = 2;
+				c = p;
+			}
+			else if (g_ascii_isspace (*p)) {
+				/* Not header but some garbage */
+				state = 100;
+				next_state = 0;
+			}
+			else {
+				p ++;
+			}
+			break;
+		case 2:
+			/* We got header's name, so skip any \t or spaces */
+			if (*p == '\t') {
+				new->tab_separated = TRUE;
+				new->empty_separator = FALSE;
+				p ++;
+			}
+			else if (*p == ' ') {
+				new->empty_separator = FALSE;
+				p ++;
+			}
+			else if (*p == '\n' || *p == '\r') {
+				/* Process folding */
+				state = 99;
+				l = p - c;
+				if (l > 0) {
+					tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+					rspamd_strlcpy (tmp, c, l + 1);
+					new->separator = tmp;
+				}
+				next_state = 3;
+				err_state = 5;
+				c = p;
+			}
+			else {
+				/* Process value */
+				l = p - c;
+				if (l >= 0) {
+					tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+					rspamd_strlcpy (tmp, c, l + 1);
+					new->separator = tmp;
+				}
+				c = p;
+				state = 3;
+			}
+			break;
+		case 3:
+			if (*p == '\r' || *p == '\n') {
+				/* Hold folding */
+				state = 99;
+				next_state = 3;
+				err_state = 4;
+			}
+			else if (*(p + 1) == '\0') {
+				state = 4;
+			}
+			else {
+				p ++;
+			}
+			break;
+		case 4:
+			/* Copy header's value */
+			l = p - c;
+			tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+			tp = tmp;
+			t_state = 0;
+			while (l --) {
+				if (t_state == 0) {
+					/* Before folding */
+					if (*c == '\n' || *c == '\r') {
+						t_state = 1;
+						c ++;
+						*tp ++ = ' ';
+					}
+					else {
+						*tp ++ = *c ++;
+					}
+				}
+				else if (t_state == 1) {
+					/* Inside folding */
+					if (g_ascii_isspace (*c)) {
+						c++;
+					}
+					else {
+						t_state = 0;
+						*tp ++ = *c ++;
+					}
+				}
+			}
+			/* Strip last space that can be added by \r\n parsing */
+			if (*(tp - 1) == ' ') {
+				tp --;
+			}
+			*tp = '\0';
+			new->value = tmp;
+			new->next = NULL;
+			if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) {
+				while (lp->next != NULL) {
+					lp = lp->next;
+				}
+				lp->next = new;
+			}
+			else {
+				g_hash_table_insert (task->raw_headers, new->name, new);
+			}
+			debug_task ("add raw header %s: %s", new->name, new->value);
+			state = 0;
+			break;
+		case 5:
+			/* Header has only name, no value */
+			new->next = NULL;
+			new->value = "";
+			if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) {
+				while (lp->next != NULL) {
+					lp = lp->next;
+				}
+				lp->next = new;
+			}
+			else {
+				g_hash_table_insert (task->raw_headers, new->name, new);
+			}
+			state = 0;
+			debug_task ("add raw header %s: %s", new->name, new->value);
+			break;
+		case 99:
+			/* Folding state */
+			if (*(p + 1) == '\0') {
+				state = err_state;
+			}
+			else {
+				if (*p == '\r' || *p == '\n') {
+					p ++;
+					valid_folding = FALSE;
+				}
+				else if (*p == '\t' || *p == ' ') {
+					/* Valid folding */
+					p ++;
+					valid_folding = TRUE;
+				}
+				else {
+					if (valid_folding) {
+						debug_task ("go to state: %d->%d", state, next_state);
+						state = next_state;
+					}
+					else {
+						/* Fall back */
+						debug_task ("go to state: %d->%d", state, err_state);
+						state = err_state;
+					}
+				}
+			}
+			break;
+		case 100:
+			/* Fail state, skip line */
+			if (*p == '\r') {
+				if (*(p + 1) == '\n') {
+					p ++;
+				}
+				p ++;
+				state = next_state;
+			}
+			else if (*p == '\n') {
+				if (*(p + 1) == '\r') {
+					p ++;
+				}
+				p ++;
+				state = next_state;
+			}
+			else if (*(p + 1) == '\0') {
+				state = next_state;
+				p ++;
+			}
+			else {
+				p ++;
+			}
+			break;
+		}
+	}
+}
+
+static void
+free_byte_array_callback (void *pointer)
+{
+	GByteArray                     *arr = (GByteArray *) pointer;
+	g_byte_array_free (arr, TRUE);
+}
+
+static GByteArray              *
+convert_text_to_utf (struct rspamd_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part)
+{
+	GError                         *err = NULL;
+	gsize                           read_bytes, write_bytes;
+	const gchar                     *charset;
+	gchar                          *res_str;
+	GByteArray                     *result_array;
+
+	if (task->cfg->raw_mode) {
+		text_part->is_raw = TRUE;
+		return part_content;
+	}
+
+	if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) {
+		text_part->is_raw = TRUE;
+		return part_content;
+	}
+
+	if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
+		if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
+			text_part->is_raw = FALSE;
+			text_part->is_utf = TRUE;
+			return part_content;
+		}
+		else {
+			msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id);
+			text_part->is_raw = TRUE;
+			return part_content;
+		}
+	}
+
+	res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err);
+	if (res_str == NULL) {
+		msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem");
+		text_part->is_raw = TRUE;
+		return part_content;
+	}
+
+	result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
+	result_array->data = res_str;
+	result_array->len = write_bytes;
+	rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, res_str);
+	text_part->is_raw = FALSE;
+	text_part->is_utf = TRUE;
+
+	return result_array;
+}
+
+static void
+process_text_part (struct rspamd_task *task, GByteArray *part_content, GMimeContentType *type,
+		GMimeObject *part, GMimeObject *parent, gboolean is_empty)
+{
+	struct mime_text_part          *text_part;
+	const gchar                     *cd;
+
+	/* Skip attachements */
+#ifndef GMIME24
+	cd = g_mime_part_get_content_disposition (GMIME_PART (part));
+	if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) {
+		debug_task ("skip attachments for checking as text parts");
+		return;
+	}
+#else
+	cd = g_mime_object_get_disposition (GMIME_OBJECT (part));
+	if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) {
+		debug_task ("skip attachments for checking as text parts");
+		return;
+	}
+#endif
+
+	if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) {
+		debug_task ("got urls from text/html part");
+
+		text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part));
+		text_part->is_html = TRUE;
+		if (is_empty) {
+			text_part->is_empty = TRUE;
+			text_part->orig = NULL;
+			text_part->content = NULL;
+			task->text_parts = g_list_prepend (task->text_parts, text_part);
+			return;
+		}
+		text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
+		text_part->is_balanced = TRUE;
+		text_part->html_nodes = NULL;
+		text_part->parent = parent;
+
+		text_part->content = strip_html_tags (task, task->task_pool, text_part, text_part->orig, NULL);
+
+		if (text_part->html_nodes == NULL) {
+			url_parse_text (task->task_pool, task, text_part, FALSE);
+		}
+		else {
+			decode_entitles (text_part->content->data, &text_part->content->len);
+			url_parse_text (task->task_pool, task, text_part, FALSE);
+#if 0
+			url_parse_text (task->task_pool, task, text_part, TRUE);
+#endif
+		}
+
+		fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff);
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, text_part->content);
+		task->text_parts = g_list_prepend (task->text_parts, text_part);
+	}
+	else if (g_mime_content_type_is_type (type, "text", "*")) {
+		debug_task ("got urls from text/plain part");
+
+		text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part));
+		text_part->is_html = FALSE;
+		text_part->parent = parent;
+		if (is_empty) {
+			text_part->is_empty = TRUE;
+			text_part->orig = NULL;
+			text_part->content = NULL;
+			task->text_parts = g_list_prepend (task->text_parts, text_part);
+			return;
+		}
+		text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
+		text_part->content = text_part->orig;
+		url_parse_text (task->task_pool, task, text_part, FALSE);
+		fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff);
+		task->text_parts = g_list_prepend (task->text_parts, text_part);
+	}
+}
+
+#ifdef GMIME24
+static void
+mime_foreach_callback (GMimeObject * parent, GMimeObject * part, gpointer user_data)
+#else
+static void
+mime_foreach_callback (GMimeObject * part, gpointer user_data)
+#endif
+{
+	struct rspamd_task             *task = (struct rspamd_task *)user_data;
+	struct mime_part               *mime_part;
+	GMimeContentType               *type;
+	GMimeDataWrapper               *wrapper;
+	GMimeStream                    *part_stream;
+	GByteArray                     *part_content;
+
+	task->parts_count++;
+
+	/* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
+
+	/* find out what class 'part' is... */
+	if (GMIME_IS_MESSAGE_PART (part)) {
+		/* message/rfc822 or message/news */
+		GMimeMessage                   *message;
+
+		/* g_mime_message_foreach_part() won't descend into
+		   child message parts, so if we want to count any
+		   subparts of this child message, we'll have to call
+		   g_mime_message_foreach_part() again here. */
+
+		message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
+		if (task->parser_recursion++ < RECURSION_LIMIT) {
+#ifdef GMIME24
+			g_mime_message_foreach (message, mime_foreach_callback, task);
+#else
+			g_mime_message_foreach_part (message, mime_foreach_callback, task);
+#endif
+		}
+		else {
+			msg_err ("endless recursion detected: %d", task->parser_recursion);
+			return;
+		}
+#ifndef GMIME24
+		g_object_unref (message);
+#endif
+	}
+	else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
+		/* message/partial */
+
+		/* this is an incomplete message part, probably a
+		   large message that the sender has broken into
+		   smaller parts and is sending us bit by bit. we
+		   could save some info about it so that we could
+		   piece this back together again once we get all the
+		   parts? */
+	}
+	else if (GMIME_IS_MULTIPART (part)) {
+		/* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
+		task->parser_parent_part = part;
+#ifndef GMIME24	
+		debug_task ("detected multipart part");
+		/* we'll get to finding out if this is a signed/encrypted multipart later... */
+		if (task->parser_recursion++ < RECURSION_LIMIT) {
+			g_mime_multipart_foreach ((GMimeMultipart *) part, mime_foreach_callback, task);
+		}
+		else {
+			msg_err ("endless recursion detected: %d", task->parser_recursion);
+			return;
+		}
+#endif
+	}
+	else if (GMIME_IS_PART (part)) {
+		/* a normal leaf part, could be text/plain or image/jpeg etc */
+#ifdef GMIME24
+		type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (part));
+#else
+		type = (GMimeContentType *) g_mime_part_get_content_type (GMIME_PART (part));
+#endif
+		if (type == NULL) {
+			msg_warn ("type of part is unknown, assume text/plain");
+			type = g_mime_content_type_new ("text", "plain");
+#ifdef GMIME24
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, type);
+#else
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_mime_content_type_destroy, type);
+#endif
+		}
+		wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+#ifdef GMIME24
+		if (wrapper != NULL && GMIME_IS_DATA_WRAPPER (wrapper)) {
+#else
+		if (wrapper != NULL) {
+#endif
+			part_stream = g_mime_stream_mem_new ();
+			if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
+				g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), FALSE);
+				part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
+				g_object_unref (part_stream);
+				mime_part = rspamd_mempool_alloc (task->task_pool, sizeof (struct mime_part));
+				mime_part->type = type;
+				mime_part->content = part_content;
+				mime_part->parent = task->parser_parent_part;
+				mime_part->filename = g_mime_part_get_filename (GMIME_PART (part));
+				debug_task ("found part with content-type: %s/%s", type->type, type->subtype);
+				task->parts = g_list_prepend (task->parts, mime_part);
+				/* Skip empty parts */
+				process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0));
+			}
+			else {
+				msg_warn ("write to stream failed: %d, %s", errno, strerror (errno));
+			}
+#ifndef GMIME24
+			g_object_unref (wrapper);
+#endif
+		}
+		else {
+			msg_warn ("cannot get wrapper for mime part, type of part: %s/%s", type->type, type->subtype);
+		}
+	}
+	else {
+		g_assert_not_reached ();
+	}
+}
+
+static void
+destroy_message (void *pointer)
+{
+	GMimeMessage                   *msg = pointer;
+
+	msg_debug ("freeing pointer %p", msg);
+	g_object_unref (msg);
+}
+
+gint
+process_message (struct rspamd_task *task)
+{
+	GMimeMessage                   *message;
+	GMimeParser                    *parser;
+	GMimeStream                    *stream;
+	GByteArray                     *tmp;
+	GList                          *first, *cur;
+	GMimePart                      *part;
+	GMimeDataWrapper               *wrapper;
+	struct received_header         *recv;
+	gchar                          *mid, *url_str, *p, *end, *url_end;
+	struct uri                     *subject_url;
+	gsize                           len;
+	gint                            rc;
+
+	tmp = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
+	tmp->data = task->msg->str;
+	tmp->len = task->msg->len;
+
+	stream = g_mime_stream_mem_new_with_byte_array (tmp);
+	/* 
+	* This causes g_mime_stream not to free memory by itself as it is memory allocated by
+	* pool allocator
+	*/
+	g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE);
+
+	if (task->is_mime) {
+
+		debug_task ("construct mime parser from string length %d", (gint)task->msg->len);
+		/* create a new parser object to parse the stream */
+		parser = g_mime_parser_new_with_stream (stream);
+		g_object_unref (stream);
+
+		/* parse the message from the stream */
+		message = g_mime_parser_construct_message (parser);
+
+		if (message == NULL) {
+			msg_warn ("cannot construct mime from stream");
+			return -1;
+		}
+
+		task->message = message;
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message);
+
+		/* Save message id for future use */
+		task->message_id = g_mime_message_get_message_id (task->message);
+		if (task->message_id == NULL) {
+			task->message_id = "undef";
+		}
+
+		task->parser_recursion = 0;
+#ifdef GMIME24
+		g_mime_message_foreach (message, mime_foreach_callback, task);
+#else
+		/*
+		 * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback
+		 * so we need to set up parent part by hands
+		 */
+		task->parser_parent_part = g_mime_message_get_mime_part (message);
+		g_object_unref (task->parser_parent_part);
+		g_mime_message_foreach_part (message, mime_foreach_callback, task);
+#endif
+
+		debug_task ("found %d parts in message", task->parts_count);
+		if (task->queue_id == NULL) {
+			task->queue_id = "undef";
+		}
+
+#ifdef GMIME24
+		task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message));
+#else
+		task->raw_headers_str = g_mime_message_get_headers (task->message);
+#endif
+
+		process_images (task);
+
+		/* Parse received headers */
+		first = message_get_header (task->task_pool, message, "Received", FALSE);
+		cur = first;
+		while (cur) {
+			recv = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct received_header));
+			parse_recv_header (task->task_pool, cur->data, recv);
+			task->received = g_list_prepend (task->received, recv);
+			cur = g_list_next (cur);
+		}
+		if (first) {
+			g_list_free (first);
+		}
+
+		if (task->raw_headers_str) {
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, task->raw_headers_str);
+			process_raw_headers (task);
+		}
+
+		task->rcpts = g_mime_message_get_all_recipients (message);
+		if (task->rcpts) {
+#ifdef GMIME24
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, task->rcpts);
+#else
+			rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) internet_address_list_destroy, task->rcpts);
+#endif
+		}
+
+
+		/* free the parser (and the stream) */
+		g_object_unref (parser);
+	}
+	else {
+		/* We got only message, no mime headers or anything like this */
+		/* Construct fake message for it */
+		task->message = g_mime_message_new (TRUE);
+		if (task->from) {
+			g_mime_message_set_sender (task->message, task->from);
+		}
+		/* Construct part for it */
+		part = g_mime_part_new_with_type ("text", "html");
+#ifdef GMIME24
+		wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_CONTENT_ENCODING_8BIT);
+#else
+		wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_PART_ENCODING_8BIT);
+#endif
+		g_mime_part_set_content_object (part, wrapper);
+		g_mime_message_set_mime_part (task->message, GMIME_OBJECT (part));
+		/* Register destructors */
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, wrapper);
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, part);
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message);
+		/* Now parse in a normal way */
+		task->parser_recursion = 0;
+#ifdef GMIME24
+		g_mime_message_foreach (task->message, mime_foreach_callback, task);
+#else
+		g_mime_message_foreach_part (task->message, mime_foreach_callback, task);
+#endif
+		/* Generate message ID */
+		mid = g_mime_utils_generate_message_id ("localhost.localdomain");
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, mid);
+		g_mime_message_set_message_id (task->message, mid);
+		task->message_id = mid;
+		task->queue_id = mid;
+		/* Set headers for message */
+		if (task->subject) {
+			g_mime_message_set_subject (task->message, task->subject);
+		}
+
+		/* Add recipients */
+#ifndef GMIME24
+		if (task->rcpt) {
+			cur = task->rcpt;
+			while (cur) {
+				g_mime_message_add_recipient (task->message, GMIME_RECIPIENT_TYPE_TO, NULL, (gchar *)cur->data);
+				cur = g_list_next (cur);
+			}
+		}
+#endif
+	}
+
+	/* Parse urls inside Subject header */
+	cur = message_get_header (task->task_pool, task->message, "Subject", FALSE);
+	if (cur) {
+		p = cur->data;
+		len = strlen (p);
+		end = p + len;
+
+		while (p < end) {
+			/* Search to the end of url */
+			if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str, FALSE)) {
+				if (url_str != NULL) {
+					subject_url = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri));
+					if (subject_url != NULL) {
+						/* Try to parse url */
+						rc = parse_uri (subject_url, url_str, task->task_pool);
+						if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) &&
+								subject_url->hostlen > 0) {
+							if (subject_url->protocol != PROTOCOL_MAILTO) {
+								if (!g_tree_lookup (task->urls, subject_url)) {
+									g_tree_insert (task->urls, subject_url, subject_url);
+								}
+							}
+						}
+						else if (rc != URI_ERRNO_OK) {
+							msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc));
+						}
+					}
+				}
+			}
+			else {
+				break;
+			}
+			p = url_end + 1;
+		}
+		/* Free header's list */
+		g_list_free (cur);
+	}
+
+	return 0;
+}
+
+struct gmime_raw_header {
+	struct raw_header              *next;
+	gchar                           *name;
+	gchar                           *value;
+};
+
+typedef struct _GMimeHeader {
+	GHashTable                     *hash;
+	GHashTable                     *writers;
+	struct raw_header              *headers;
+} local_GMimeHeader;
+
+
+/* known header field types */
+enum {
+	HEADER_FROM = 0,
+	HEADER_REPLY_TO,
+	HEADER_TO,
+	HEADER_CC,
+	HEADER_BCC,
+	HEADER_SUBJECT,
+	HEADER_DATE,
+	HEADER_MESSAGE_ID,
+	HEADER_UNKNOWN
+};
+
+/*
+ * Iterate throught all headers and make a list
+ */
+#ifndef GMIME24
+static void
+header_iterate (rspamd_mempool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong)
+{
+	while (h) {
+		if (G_LIKELY (!strong)) {
+			if (h->value && !g_ascii_strncasecmp (field, h->name, strlen (field))) {
+				if (pool != NULL) {
+					*ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value));
+				}
+				else {
+					*ret = g_list_prepend (*ret, g_strdup (h->value));
+				}
+			}
+		}
+		else {
+			if (h->value && !strncmp (field, h->name, strlen (field))) {
+				if (pool != NULL) {
+					*ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value));
+				}
+				else {
+					*ret = g_list_prepend (*ret, g_strdup (h->value));
+				}
+			}
+		}
+		h = (struct gmime_raw_header *)h->next;
+	}
+}
+#else
+static void
+header_iterate (rspamd_mempool_t * pool, GMimeHeaderList * ls, GList ** ret, const gchar *field, gboolean strong)
+{
+	/* Use iterator in case of gmime 2.4 */
+	GMimeHeaderIter                *iter;
+	const gchar                     *name;
+
+	if (ls == NULL) {
+		*ret = NULL;
+		return;
+	}
+
+	iter = g_mime_header_iter_new ();
+	if (g_mime_header_list_get_iter (ls, iter) && g_mime_header_iter_first (iter)) {
+		/* Iterate throught headers */
+		while (g_mime_header_iter_is_valid (iter)) {
+			name = g_mime_header_iter_get_name (iter);
+			if (G_LIKELY (!strong)) {
+				if (!g_ascii_strncasecmp (field, name, strlen (name))) {
+					if (pool != NULL) {
+						*ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter)));
+					}
+					else {
+						*ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter)));
+					}
+				}
+			}
+			else {
+				if (!strncmp (field, name, strlen (name))) {
+					if (pool != NULL) {
+						*ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter)));
+					}
+					else {
+						*ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter)));
+					}
+				}
+			}
+			if (!g_mime_header_iter_next (iter)) {
+				break;
+			}
+		}
+	}
+	g_mime_header_iter_free (iter);
+}
+#endif
+
+
+struct multipart_cb_data {
+	GList                          *ret;
+	rspamd_mempool_t                  *pool;
+	const gchar                     *field;
+	gboolean                        try_search;
+	gboolean                        strong;
+	gint                            rec;
+};
+
+#define MAX_REC 10
+
+static void
+#ifdef GMIME24
+multipart_iterate (GMimeObject * parent, GMimeObject * part, gpointer user_data)
+#else
+multipart_iterate (GMimeObject * part, gpointer user_data)
+#endif
+{
+	struct multipart_cb_data       *data = user_data;
+#ifndef GMIME24
+	struct gmime_raw_header              *h;
+#endif
+	GList                          *l = NULL;
+
+	if (data->try_search && part != NULL && GMIME_IS_PART (part)) {
+#ifdef GMIME24
+		GMimeHeaderList                *ls;
+
+		ls = g_mime_object_get_header_list (GMIME_OBJECT (part));
+		header_iterate (data->pool, ls, &l, data->field, data->strong);
+#else
+		h = (struct gmime_raw_header *)part->headers->headers;
+		header_iterate (data->pool, h, &l, data->field, data->strong);
+#endif
+		if (l == NULL) {
+			/* Header not found, abandon search results */
+			data->try_search = FALSE;
+			g_list_free (data->ret);
+			data->ret = NULL;
+		}
+		else {
+			data->ret = g_list_concat (l, data->ret);
+		}
+	}
+	else if (data->try_search && GMIME_IS_MULTIPART (part)) {
+		/* Maybe endless recursion here ? */
+		if (data->rec++ < MAX_REC) {
+			g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, data);
+		}
+		else {
+			msg_info ("maximum recurse limit is over, stop recursing, %d", data->rec);
+			data->try_search = FALSE;
+		}
+	}
+}
+
+static GList                   *
+local_message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong)
+{
+	GList                          *gret = NULL;
+	GMimeObject                    *part;
+	struct multipart_cb_data        cb = {
+		.try_search = TRUE,
+		.rec = 0,
+		.ret = NULL,
+	};
+	cb.pool = pool;
+	cb.field = field;
+	cb.strong = strong;
+
+#ifndef GMIME24
+	struct gmime_raw_header       *h;
+
+	if (field == NULL) {
+		return NULL;
+	}
+
+	msg_debug ("iterate over headers to find header %s", field);
+	h = (struct gmime_raw_header *) (GMIME_OBJECT (message)->headers->headers);
+	header_iterate (pool, h, &gret, field, strong);
+
+	if (gret == NULL) {
+		/* Try to iterate with mime part headers */
+		msg_debug ("iterate over headers of mime part to find header %s", field);
+		part = g_mime_message_get_mime_part (message);
+		if (part) {
+			h = (struct gmime_raw_header *)part->headers->headers;
+			header_iterate (pool, h, &gret, field, strong);
+			if (gret == NULL && GMIME_IS_MULTIPART (part)) {
+				msg_debug ("iterate over headers of each multipart's subparts %s", field);
+				g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb);
+				if (cb.ret != NULL) {
+					gret = cb.ret;
+				}
+			}
+#ifndef GMIME24
+			g_object_unref (part);
+#endif
+		}
+	}
+
+	return gret;
+#else
+	GMimeHeaderList                *ls;
+
+	ls = g_mime_object_get_header_list (GMIME_OBJECT (message));
+	header_iterate (pool, ls, &gret, field, strong);
+	if (gret == NULL) {
+		/* Try to iterate with mime part headers */
+		part = g_mime_message_get_mime_part (message);
+		if (part) {
+			ls = g_mime_object_get_header_list (GMIME_OBJECT (part));
+			header_iterate (pool, ls, &gret, field, strong);
+			if (gret == NULL && GMIME_IS_MULTIPART (part)) {
+				g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb);
+				if (cb.ret != NULL) {
+					gret = cb.ret;
+				}
+			}
+#ifndef GMIME24
+			g_object_unref (part);
+#endif
+		}
+	}
+
+
+	return gret;
+#endif
+}
+
+/**
+* g_mime_message_set_date_from_string: Set the message sent-date
+* @message: MIME Message
+* @string: A string of date
+* 
+* Set the sent-date on a MIME Message.
+**/
+void
+local_mime_message_set_date_from_string (GMimeMessage * message, const gchar * string)
+{
+	time_t                          date;
+	gint                            offset = 0;
+
+	date = g_mime_utils_header_decode_date (string, &offset);
+	g_mime_message_set_date (message, date, offset);
+}
+
+/*
+ * Replacements for standart gmime functions but converting adresses to IA
+ */
+static const gchar              *
+local_message_get_sender (GMimeMessage * message)
+{
+	gchar                           *res;
+	const gchar                     *from = g_mime_message_get_sender (message);
+	InternetAddressList            *ia;
+
+#ifndef	GMIME24
+	ia = internet_address_parse_string (from);
+#else
+	ia = internet_address_list_parse_string (from);
+#endif
+	if (!ia) {
+		return NULL;
+	}
+	res = internet_address_list_to_string (ia, FALSE);
+#ifndef	GMIME24
+	internet_address_list_destroy (ia);
+#else
+	g_object_unref (ia);
+#endif
+
+	return res;
+}
+
+static const gchar              *
+local_message_get_reply_to (GMimeMessage * message)
+{
+	gchar                           *res;
+	const gchar                     *from = g_mime_message_get_reply_to (message);
+	InternetAddressList            *ia;
+
+#ifndef	GMIME24
+	ia = internet_address_parse_string (from);
+#else
+	ia = internet_address_list_parse_string (from);
+#endif
+	if (!ia) {
+		return NULL;
+	}
+	res = internet_address_list_to_string (ia, FALSE);
+#ifndef	GMIME24
+	internet_address_list_destroy (ia);
+#else
+	g_object_unref (ia);
+#endif
+
+	return res;
+}
+
+#ifdef GMIME24
+
+#   define ADD_RECIPIENT_TEMPLATE(type,def)														\
+static void																						\
+local_message_add_recipients_from_string_##type (GMimeMessage *message, const gchar *string, const gchar *value)	\
+{																								\
+	InternetAddressList *il, *new;																\
+																								\
+	il = g_mime_message_get_recipients (message, (def));										\
+	new = internet_address_list_parse_string (string);											\
+	internet_address_list_append (il, new);														\
+}																								\
+
+ADD_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO)
+	ADD_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC)
+	ADD_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC)
+#   define GET_RECIPIENT_TEMPLATE(type,def)														\
+static InternetAddressList*																		\
+local_message_get_recipients_##type (GMimeMessage *message, const gchar *unused)					\
+{																								\
+	return g_mime_message_get_recipients (message, (def));										\
+}
+	GET_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO)
+	GET_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC)
+	GET_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC)
+#endif
+/* different declarations for different types of set and get functions */
+  typedef const gchar             *(*GetFunc) (GMimeMessage * message);
+  typedef InternetAddressList    *(*GetRcptFunc) (GMimeMessage * message, const gchar *type);
+  typedef GList                  *(*GetListFunc) (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *type, gboolean strong);
+  typedef void                    (*SetFunc) (GMimeMessage * message, const gchar *value);
+  typedef void                    (*SetListFunc) (GMimeMessage * message, const gchar *field, const gchar *value);
+
+/** different types of functions
+*
+* FUNC_CHARPTR
+*	- function with no arguments
+*	- get returns gchar*
+*
+* FUNC_IA (from Internet Address)
+*	- function with additional "field" argument from the fieldfunc table,
+*	- get returns Glist*
+*
+* FUNC_LIST
+*	- function with additional "field" argument (given arbitrary header field name)
+*	- get returns Glist*
+**/
+  enum {
+	  FUNC_CHARPTR = 0,
+	  FUNC_CHARFREEPTR,
+	  FUNC_IA,
+	  FUNC_LIST
+  };
+
+/**
+* fieldfunc struct: structure of MIME fields and corresponding get and set
+* functions.
+**/
+  static struct {
+	  gchar                           *name;
+	  GetFunc                         func;
+	  GetRcptFunc                     rcptfunc;
+	  GetListFunc                     getlistfunc;
+	  SetFunc                         setfunc;
+	  SetListFunc                     setlfunc;
+	  gint                            functype;
+  } fieldfunc[] =
+{
+	{
+	"From", local_message_get_sender, NULL, NULL, g_mime_message_set_sender, NULL, FUNC_CHARFREEPTR}, {
+	"Reply-To", local_message_get_reply_to, NULL, NULL, g_mime_message_set_reply_to, NULL, FUNC_CHARFREEPTR},
+#ifndef GMIME24
+	{
+	"To", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
+	"Cc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
+	"Bcc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
+	"Date", (GetFunc) g_mime_message_get_date_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR},
+#else
+	{
+	"To", NULL, local_message_get_recipients_to, NULL, NULL, local_message_add_recipients_from_string_to, FUNC_IA}, {
+	"Cc", NULL, local_message_get_recipients_cc, NULL, NULL, local_message_add_recipients_from_string_cc, FUNC_IA}, {
+	"Bcc", NULL, local_message_get_recipients_bcc, NULL, NULL, local_message_add_recipients_from_string_bcc, FUNC_IA}, {
+	"Date", (GetFunc)g_mime_message_get_date_as_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR},
+#endif
+	{
+	"Subject", g_mime_message_get_subject, NULL, NULL, g_mime_message_set_subject, NULL, FUNC_CHARPTR}, {
+	"Message-Id", g_mime_message_get_message_id, NULL, NULL, g_mime_message_set_message_id, NULL, FUNC_CHARPTR},
+#ifndef GMIME24
+	{
+	NULL, NULL, NULL, local_message_get_header, NULL, g_mime_message_add_header, FUNC_LIST}
+#else
+	{
+	NULL, NULL, NULL, local_message_get_header, NULL, (SetListFunc)g_mime_object_append_header, FUNC_LIST}
+#endif
+};
+
+/**
+* message_set_header: set header of any type excluding special (Content- and MIME-Version:)
+**/
+void
+message_set_header (GMimeMessage * message, const gchar *field, const gchar *value)
+{
+	gint                            i;
+
+	if (!g_ascii_strcasecmp (field, "MIME-Version:") || !g_ascii_strncasecmp (field, "Content-", 8)) {
+		return;
+	}
+	for (i = 0; i <= HEADER_UNKNOWN; ++i) {
+		if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) {
+			switch (fieldfunc[i].functype) {
+			case FUNC_CHARPTR:
+				(*(fieldfunc[i].setfunc)) (message, value);
+				break;
+			case FUNC_IA:
+				(*(fieldfunc[i].setlfunc)) (message, fieldfunc[i].name, value);
+				break;
+			case FUNC_LIST:
+				(*(fieldfunc[i].setlfunc)) (message, field, value);
+				break;
+			}
+			break;
+		}
+	}
+}
+
+
+/**
+* message_get_header: returns the list of 'any header' values
+* (except of unsupported yet Content- and MIME-Version special headers)
+*
+* You should free the GList list by yourself.
+**/
+GList                          *
+message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong)
+{
+	gint                            i;
+	gchar                           *ret = NULL, *ia_string;
+	GList                          *gret = NULL;
+	InternetAddressList            *ia_list = NULL, *ia;
+
+	for (i = 0; i <= HEADER_UNKNOWN; ++i) {
+		if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) {
+			switch (fieldfunc[i].functype) {
+			case FUNC_CHARFREEPTR:
+				ret = (gchar *)(*(fieldfunc[i].func)) (message);
+				break;
+			case FUNC_CHARPTR:
+				ret = (gchar *)(*(fieldfunc[i].func)) (message);
+				break;
+			case FUNC_IA:
+				ia_list = (*(fieldfunc[i].rcptfunc)) (message, field);
+				ia = ia_list;
+#ifndef GMIME24
+				while (ia && ia->address) {
+
+					ia_string = internet_address_to_string ((InternetAddress *) ia->address, FALSE);
+					if (pool != NULL) {
+						rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string);
+					}
+					gret = g_list_prepend (gret, ia_string);
+					ia = ia->next;
+				}
+#else
+				i = internet_address_list_length (ia);
+				while (--i >= 0) {
+					ia_string = internet_address_to_string (internet_address_list_get_address (ia, i), FALSE);
+					if (pool != NULL) {
+						rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string);
+					}
+					gret = g_list_prepend (gret, ia_string);
+				}
+#endif
+				break;
+			case FUNC_LIST:
+				gret = (*(fieldfunc[i].getlistfunc)) (pool, message, field, strong);
+				break;
+			}
+			break;
+		}
+	}
+	if (gret == NULL && ret != NULL) {
+		if (pool != NULL) {
+			gret = g_list_prepend (gret, rspamd_mempool_strdup (pool, ret));
+		}
+		else {
+			gret = g_list_prepend (gret, g_strdup (ret));
+		}
+	}
+	if (fieldfunc[i].functype == FUNC_CHARFREEPTR && ret) {
+		g_free (ret);
+	}
+
+	return gret;
+}
+
+GList*
+message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong)
+{
+	GList                               *gret = NULL;
+	struct raw_header                   *rh;
+
+	rh = g_hash_table_lookup (task->raw_headers, field);
+
+	if (rh == NULL) {
+		return NULL;
+	}
+
+	while (rh) {
+		if (strong) {
+			if (strcmp (rh->name, field) == 0) {
+				gret = g_list_prepend (gret, rh);
+			}
+		}
+		else {
+			if (g_ascii_strcasecmp (rh->name, field) == 0) {
+				gret = g_list_prepend (gret, rh);
+			}
+		}
+		rh = rh->next;
+	}
+
+	if (gret != NULL) {
+		rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, gret);
+	}
+
+	return gret;
+}
diff --git a/src/libmime/message.h b/src/libmime/message.h
new file mode 100644
index 000000000..5e27579d1
--- /dev/null
+++ b/src/libmime/message.h
@@ -0,0 +1,91 @@
+/**
+ * @file message.h
+ * Message processing functions and structures
+ */
+
+#ifndef RSPAMD_MESSAGE_H
+#define RSPAMD_MESSAGE_H
+
+#include "config.h"
+#include "fuzzy.h"
+
+struct rspamd_task;
+struct controller_session;
+
+struct mime_part {
+	GMimeContentType *type;
+	GByteArray *content;
+	GMimeObject *parent;
+	gchar *checksum;
+	const gchar *filename;
+};
+
+struct mime_text_part {
+	gboolean is_html;
+	gboolean is_raw;
+	gboolean is_balanced;
+	gboolean is_empty;
+	gboolean is_utf;
+	const gchar *real_charset;
+	GByteArray *orig;
+	GByteArray *content;
+	GNode *html_nodes;
+	GList *urls_offset;										    /**< list of offsets of urls						*/
+	fuzzy_hash_t *fuzzy;
+	fuzzy_hash_t *double_fuzzy;
+	GMimeObject *parent;
+	GUnicodeScript script;
+	f_str_t *diff_str;
+};
+
+struct received_header {
+	gchar *from_hostname;
+	gchar *from_ip;
+	gchar *real_hostname;
+	gchar *real_ip;
+	gchar *by_hostname;
+	gint is_error;
+};
+
+struct raw_header {
+	gchar *name;
+	gchar *value;
+	gboolean tab_separated;
+	gboolean empty_separator;
+	gchar *separator;
+	struct raw_header *next;
+};
+
+/**
+ * Process message with all filters/statfiles, extract mime parts, urls and 
+ * call metrics consolidation functions
+ * @param task worker_task object
+ * @return 0 if we have delayed filters to process and 1 if we have finished with processing
+ */
+gint process_message (struct rspamd_task *task);
+
+/*
+ * Set header with specified name and value
+ */
+void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value);
+
+/*
+ * Get a list of header's values with specified header's name
+ * @param pool if not NULL this pool would be used for storing header's values
+ * @param message g_mime_message object
+ * @param field header's name
+ * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
+ * @return A list of header's values or NULL. If list is not NULL it MUST be freed. If pool is NULL elements must be freed as well.
+ */
+GList* message_get_header (rspamd_mempool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong);
+
+/*
+ * Get a list of header's values with specified header's name using raw headers
+ * @param task worker task structure
+ * @param field header's name
+ * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
+ * @return A list of header's values or NULL. Unlike previous function it is NOT required to free list or values. I should rework one of these functions some time.
+ */
+GList* message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong);
+
+#endif
diff --git a/src/libmime/protocol.c b/src/libmime/protocol.c
new file mode 100644
index 000000000..8a5c3f0df
--- /dev/null
+++ b/src/libmime/protocol.c
@@ -0,0 +1,821 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "settings.h"
+#include "message.h"
+
+/* Max line size */
+#define OUTBUFSIZ BUFSIZ
+/*
+ * Just check if the passed message is spam or not and reply as
+ * described below
+ */
+#define MSG_CMD_CHECK "check"
+/* 
+ * Check if message is spam or not, and return score plus list
+ * of symbols hit
+ */
+#define MSG_CMD_SYMBOLS "symbols"
+/*
+ * Check if message is spam or not, and return score plus report
+ */
+#define MSG_CMD_REPORT "report"
+/*
+ * Check if message is spam or not, and return score plus report
+ * if the message is spam
+ */
+#define MSG_CMD_REPORT_IFSPAM "report_ifspam"
+/*
+ * Ignore this message -- client opened connection then changed
+ */
+#define MSG_CMD_SKIP "skip"
+/*
+ * Return a confirmation that spamd is alive
+ */
+#define MSG_CMD_PING "ping"
+/*
+ * Process this message as described above and return modified message
+ */
+#define MSG_CMD_PROCESS "process"
+
+/*
+ * Learn specified statfile using message
+ */
+#define MSG_CMD_LEARN "learn"
+
+/*
+ * spamassassin greeting:
+ */
+#define SPAMC_GREETING "SPAMC"
+/*
+ * rspamd greeting:
+ */
+#define RSPAMC_GREETING "RSPAMC"
+/*
+ * Headers
+ */
+#define CONTENT_LENGTH_HEADER "Content-length"
+#define HELO_HEADER "Helo"
+#define FROM_HEADER "From"
+#define IP_ADDR_HEADER "IP"
+#define NRCPT_HEADER "Recipient-Number"
+#define RCPT_HEADER "Rcpt"
+#define SUBJECT_HEADER "Subject"
+#define STATFILE_HEADER "Statfile"
+#define QUEUE_ID_HEADER "Queue-ID"
+#define ERROR_HEADER "Error"
+#define USER_HEADER "User"
+#define PASS_HEADER "Pass"
+#define JSON_HEADER "Json"
+#define HOSTNAME_HEADER "Hostname"
+#define DELIVER_TO_HEADER "Deliver-To"
+#define NO_LOG_HEADER "Log"
+
+static GList                   *custom_commands = NULL;
+
+
+/*
+ * Remove <> from the fixed string and copy it to the pool
+ */
+static gchar *
+rspamd_protocol_escape_braces (GString *in)
+{
+	gint                          len = 0;
+	gchar                        *orig, *p;
+
+	orig = in->str;
+	while ((g_ascii_isspace (*orig) || *orig == '<') && orig - in->str < (gint)in->len) {
+		orig ++;
+	}
+
+	g_string_erase (in, 0, orig - in->str);
+
+	p = orig;
+	while ((!g_ascii_isspace (*p) && *p != '>') && p - in->str < (gint)in->len) {
+		p ++;
+		len ++;
+	}
+
+	g_string_truncate (in, len);
+
+	return in->str;
+}
+
+static gboolean
+rspamd_protocol_handle_url (struct rspamd_task *task, struct rspamd_http_message *msg)
+{
+	GList                          *cur;
+	struct custom_command          *cmd;
+	const gchar *p;
+
+	if (msg->url == NULL || msg->url->len == 0) {
+		task->last_error = "command is absent";
+		task->error_code = 400;
+		return FALSE;
+	}
+
+	if (msg->url->str[0] == '/') {
+		p = &msg->url->str[1];
+	}
+	else {
+		p = msg->url->str;
+	}
+
+	switch (*p) {
+	case 'c':
+	case 'C':
+		/* check */
+		if (g_ascii_strcasecmp (p + 1, MSG_CMD_CHECK + 1) == 0) {
+			task->cmd = CMD_CHECK;
+		}
+		else {
+			goto err;
+		}
+		break;
+	case 's':
+	case 'S':
+		/* symbols, skip */
+		if (g_ascii_strcasecmp (p + 1, MSG_CMD_SYMBOLS + 1) == 0) {
+			task->cmd = CMD_SYMBOLS;
+		}
+		else if (g_ascii_strcasecmp (p + 1, MSG_CMD_SKIP + 1) == 0) {
+			task->cmd = CMD_SKIP;
+		}
+		else {
+			goto err;
+		}
+		break;
+	case 'p':
+	case 'P':
+		/* ping, process */
+		if (g_ascii_strcasecmp (p + 1, MSG_CMD_PING + 1) == 0) {
+			task->cmd = CMD_PING;
+		}
+		else if (g_ascii_strcasecmp (p + 1, MSG_CMD_PROCESS + 1) == 0) {
+			task->cmd = CMD_PROCESS;
+		}
+		else {
+			goto err;
+		}
+		break;
+	case 'r':
+	case 'R':
+		/* report, report_ifspam */
+		if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT + 1) == 0) {
+			task->cmd = CMD_REPORT;
+		}
+		else if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) {
+			task->cmd = CMD_REPORT_IFSPAM;
+		}
+		else {
+			goto err;
+		}
+		break;
+	default:
+		cur = custom_commands;
+		while (cur) {
+			cmd = cur->data;
+			if (g_ascii_strcasecmp (p, cmd->name) == 0) {
+				task->cmd = CMD_OTHER;
+				task->custom_cmd = cmd;
+				break;
+			}
+			cur = g_list_next (cur);
+		}
+
+		if (cur == NULL) {
+			goto err;
+		}
+		break;
+	}
+
+	return TRUE;
+
+err:
+	debug_task ("bad command: %s", p);
+	task->last_error = "invalid command";
+	task->error_code = 400;
+	return FALSE;
+}
+
+static gboolean
+rspamd_protocol_handle_headers (struct rspamd_task *task, struct rspamd_http_message *msg)
+{
+	gchar                           *headern, *err, *tmp;
+	gboolean                         res = TRUE;
+	struct rspamd_http_header      *h;
+
+	LL_FOREACH (msg->headers, h) {
+		headern = h->name->str;
+
+		switch (headern[0]) {
+		case 'd':
+		case 'D':
+			if (g_ascii_strcasecmp (headern, DELIVER_TO_HEADER) == 0) {
+				task->deliver_to = rspamd_protocol_escape_braces (h->value);
+				debug_task ("read deliver-to header, value: %s", task->deliver_to);
+			}
+			else {
+				debug_task ("wrong header: %s", headern);
+				res = FALSE;
+			}
+			break;
+		case 'h':
+		case 'H':
+			if (g_ascii_strcasecmp (headern, HELO_HEADER) == 0) {
+				task->helo = h->value->str;
+				debug_task ("read helo header, value: %s", task->helo);
+			}
+			else if (g_ascii_strcasecmp (headern, HOSTNAME_HEADER) == 0) {
+				task->hostname = h->value->str;
+				debug_task ("read hostname header, value: %s", task->hostname);
+			}
+			else {
+				debug_task ("wrong header: %s", headern);
+				res = FALSE;
+			}
+			break;
+		case 'f':
+		case 'F':
+			if (g_ascii_strcasecmp (headern, FROM_HEADER) == 0) {
+				task->from = rspamd_protocol_escape_braces (h->value);
+				debug_task ("read from header, value: %s", task->from);
+			}
+			else {
+				debug_task ("wrong header: %s", headern);
+				res = FALSE;
+			}
+			break;
+		case 'j':
+		case 'J':
+			if (g_ascii_strcasecmp (headern, JSON_HEADER) == 0) {
+				task->is_json = parse_flag (h->value->str);
+			}
+			else {
+				debug_task ("wrong header: %s", headern);
+				res = FALSE;
+			}
+			break;
+		case 'q':
+		case 'Q':
+			if (g_ascii_strcasecmp (headern, QUEUE_ID_HEADER) == 0) {
+				task->queue_id = h->value->str;
+				debug_task ("read queue_id header, value: %s", task->queue_id);
+			}
+			else {
+				debug_task ("wrong header: %s", headern);
+				res = FALSE;
+			}
+			break;
+		case 'r':
+		case 'R':
+			if (g_ascii_strcasecmp (headern, RCPT_HEADER) == 0) {
+				tmp = rspamd_protocol_escape_braces (h->value);
+				task->rcpt = g_list_prepend (task->rcpt, tmp);
+				debug_task ("read rcpt header, value: %s", tmp);
+			}
+			else if (g_ascii_strcasecmp (headern, NRCPT_HEADER) == 0) {
+				task->nrcpt = strtoul (h->value->str, &err, 10);
+				debug_task ("read rcpt header, value: %d", (gint)task->nrcpt);
+			}
+			else {
+				msg_info ("wrong header: %s", headern);
+				res = FALSE;
+			}
+			break;
+		case 'i':
+		case 'I':
+			if (g_ascii_strcasecmp (headern, IP_ADDR_HEADER) == 0) {
+				tmp = h->value->str;
+				if (!rspamd_parse_inet_address (&task->from_addr, tmp)) {
+					msg_err ("bad ip header: '%s'", tmp);
+					return FALSE;
+				}
+				debug_task ("read IP header, value: %s", tmp);
+			}
+			else {
+				debug_task ("wrong header: %s", headern);
+				res = FALSE;
+			}
+			break;
+		case 'p':
+		case 'P':
+			if (g_ascii_strcasecmp (headern, PASS_HEADER) == 0) {
+				if (h->value->len == sizeof ("all") - 1 &&
+						g_ascii_strcasecmp (h->value->str, "all") == 0) {
+					task->pass_all_filters = TRUE;
+					debug_task ("pass all filters");
+				}
+			}
+			else {
+				res = FALSE;
+			}
+			break;
+		case 's':
+		case 'S':
+			if (g_ascii_strcasecmp (headern, SUBJECT_HEADER) == 0) {
+				task->subject = h->value->str;
+			}
+			else {
+				res = FALSE;
+			}
+			break;
+		case 'u':
+		case 'U':
+			if (g_ascii_strcasecmp (headern, USER_HEADER) == 0) {
+				task->user = h->value->str;
+			}
+			else {
+				res = FALSE;
+			}
+			break;
+		case 'l':
+		case 'L':
+			if (g_ascii_strcasecmp (headern, NO_LOG_HEADER) == 0) {
+				if (g_ascii_strcasecmp (h->value->str, "no") == 0) {
+					task->no_log = TRUE;
+				}
+			}
+			else {
+				res = FALSE;
+			}
+			break;
+		default:
+			debug_task ("wrong header: %s", headern);
+			res = FALSE;
+			break;
+		}
+	}
+
+	if (!res && task->cfg->strict_protocol_headers) {
+		msg_err ("deny processing of a request with incorrect or unknown headers");
+		task->last_error = "invalid header";
+		task->error_code = 400;
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+gboolean
+rspamd_protocol_handle_request (struct rspamd_task *task,
+		struct rspamd_http_message *msg)
+{
+	gboolean ret = TRUE;
+
+	if (msg->method == HTTP_SYMBOLS) {
+		task->cmd = CMD_SYMBOLS;
+		task->is_json = FALSE;
+	}
+	else if (msg->method == HTTP_CHECK) {
+		task->cmd = CMD_CHECK;
+		task->is_json = FALSE;
+	}
+	else {
+		task->is_json = TRUE;
+		ret = rspamd_protocol_handle_url (task, msg);
+	}
+
+	if (ret) {
+		ret = rspamd_protocol_handle_headers (task, msg);
+	}
+
+	return ret;
+}
+
+static void
+write_hashes_to_log (struct rspamd_task *task, GString *logbuf)
+{
+	GList                          *cur;
+	struct mime_text_part          *text_part;
+	
+	cur = task->text_parts;
+
+	while (cur) {
+		text_part = cur->data;
+		if (text_part->fuzzy) {
+			if (cur->next != NULL) {
+				rspamd_printf_gstring (logbuf, " part: %Xd,", text_part->fuzzy->h);
+			}
+			else {
+				rspamd_printf_gstring (logbuf, " part: %Xd", text_part->fuzzy->h);
+			}
+		}
+		cur = g_list_next (cur);
+	}
+}
+
+
+/* Structure for writing tree data */
+struct tree_cb_data {
+	ucl_object_t *top;
+	struct rspamd_task *task;
+};
+
+/*
+ * Callback for writing urls
+ */
+static gboolean
+urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
+{
+	struct tree_cb_data             *cb = ud;
+	struct uri                      *url = value;
+	ucl_object_t                     *obj;
+
+	obj = ucl_object_fromlstring (url->host, url->hostlen);
+	DL_APPEND (cb->top->value.av, obj);
+
+	if (cb->task->cfg->log_urls) {
+		msg_info ("<%s> URL: %s - %s: %s", cb->task->message_id, cb->task->user ?
+				cb->task->user : (cb->task->from ? cb->task->from : "unknown"),
+				rspamd_inet_address_to_string (&cb->task->from_addr),
+				struri (url));
+	}
+
+	return FALSE;
+}
+
+static ucl_object_t *
+rspamd_urls_tree_ucl (GTree *input, struct rspamd_task *task)
+{
+	struct tree_cb_data             cb;
+	ucl_object_t                    *obj;
+
+	obj = ucl_object_typed_new (UCL_ARRAY);
+	cb.top = obj;
+	cb.task = task;
+
+	g_tree_foreach (input, urls_protocol_cb, &cb);
+
+	return obj;
+}
+
+static gboolean
+emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
+{
+	struct tree_cb_data             *cb = ud;
+	struct uri                      *url = value;
+	ucl_object_t                     *obj;
+
+	obj = ucl_object_fromlstring (url->user, url->userlen + url->hostlen + 1);
+	DL_APPEND (cb->top->value.av, obj);
+
+	return FALSE;
+}
+
+static ucl_object_t *
+rspamd_emails_tree_ucl (GTree *input, struct rspamd_task *task)
+{
+	struct tree_cb_data             cb;
+	ucl_object_t                    *obj;
+
+	obj = ucl_object_typed_new (UCL_ARRAY);
+	cb.top = obj;
+	cb.task = task;
+
+	g_tree_foreach (input, emails_protocol_cb, &cb);
+
+	return obj;
+}
+
+
+/* Write new subject */
+static const gchar *
+make_rewritten_subject (struct metric *metric, struct rspamd_task *task)
+{
+	static gchar                    subj_buf[1024];
+	gchar                          *p = subj_buf, *end, *c, *res;
+	const gchar                    *s;
+
+	end = p + sizeof(subj_buf);
+	c = metric->subject;
+	s = g_mime_message_get_subject (task->message);
+
+	while (p < end) {
+		if (*c == '\0') {
+			*p = '\0';
+			break;
+		}
+		else if (*c == '%' && *(c + 1) == 's') {
+			p += rspamd_strlcpy (p, (s != NULL) ? s : "", end - p);
+			c += 2;
+		}
+		else {
+			*p = *c ++;
+		}
+		p ++;
+	}
+	res = g_mime_utils_header_encode_text (subj_buf);
+
+	rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_free, res);
+
+	return res;
+}
+
+static ucl_object_t *
+rspamd_str_list_ucl (GList *str_list)
+{
+	ucl_object_t                    *top = NULL, *obj;
+	GList                           *cur;
+
+	top = ucl_object_typed_new (UCL_ARRAY);
+	cur = str_list;
+	while (cur) {
+		obj = ucl_object_fromstring (cur->data);
+		DL_APPEND (top->value.av, obj);
+		cur = g_list_next (cur);
+	}
+
+	return top;
+}
+
+static ucl_object_t *
+rspamd_metric_symbol_ucl (struct rspamd_task *task, struct metric *m,
+		struct symbol *sym, GString *logbuf)
+{
+	ucl_object_t                    *obj = NULL;
+	const gchar                     *description = NULL;
+
+	rspamd_printf_gstring (logbuf, "%s,", sym->name);
+	description = g_hash_table_lookup (m->descriptions, sym->name);
+
+	obj = ucl_object_typed_new (UCL_OBJECT);
+	ucl_object_insert_key (obj, ucl_object_fromstring (sym->name), "name", 0, false);
+	ucl_object_insert_key (obj, ucl_object_fromdouble (sym->score), "score", 0, false);
+	if (description) {
+		ucl_object_insert_key (obj, ucl_object_fromstring (description), "description", 0, false);
+	}
+	if (sym->options != NULL) {
+		ucl_object_insert_key (obj, rspamd_str_list_ucl (sym->options), "options", 0, false);
+	}
+
+	return obj;
+}
+
+static ucl_object_t *
+rspamd_metric_result_ucl (struct rspamd_task *task, struct metric_result *mres, GString *logbuf)
+{
+	GHashTableIter                   hiter;
+	struct symbol                  *sym;
+	struct metric                  *m;
+	gboolean                         is_spam;
+	enum rspamd_metric_action        action = METRIC_ACTION_NOACTION;
+	ucl_object_t                    *obj = NULL, *sobj;
+	gdouble                          required_score;
+	gpointer                         h, v;
+	const gchar                     *subject;
+	gchar                            action_char;
+
+	m = mres->metric;
+
+	/* XXX: handle settings */
+	required_score = m->actions[METRIC_ACTION_REJECT].score;
+	is_spam = (mres->score >= required_score);
+	action = check_metric_action (mres->score, required_score, m);
+	if (task->is_skipped) {
+		action_char = 'S';
+	}
+	else if (is_spam) {
+		action_char = 'T';
+	}
+	else {
+		action_char = 'F';
+	}
+	rspamd_printf_gstring (logbuf, "(%s: %c (%s): [%.2f/%.2f] [",
+			m->name, action_char,
+			str_action_metric (action),
+			mres->score, required_score);
+
+	obj = ucl_object_typed_new (UCL_OBJECT);
+	ucl_object_insert_key (obj, ucl_object_frombool (is_spam),
+			"is_spam", 0, false);
+	ucl_object_insert_key (obj, ucl_object_frombool (task->is_skipped),
+			"is_skipped", 0, false);
+	ucl_object_insert_key (obj, ucl_object_fromdouble (mres->score),
+			"score", 0, false);
+	ucl_object_insert_key (obj, ucl_object_fromdouble (required_score),
+			"required_score", 0, false);
+	ucl_object_insert_key (obj, ucl_object_fromstring (str_action_metric (action)),
+			"action", 0, false);
+
+	if (action == METRIC_ACTION_REWRITE_SUBJECT) {
+		subject = make_rewritten_subject (m, task);
+		ucl_object_insert_key (obj, ucl_object_fromstring (subject),
+					"subject", 0, false);
+	}
+	/* Now handle symbols */
+	g_hash_table_iter_init (&hiter, mres->symbols);
+	while (g_hash_table_iter_next (&hiter, &h, &v)) {
+		sym = (struct symbol *)v;
+		sobj = rspamd_metric_symbol_ucl (task, m, sym, logbuf);
+		ucl_object_insert_key (obj, sobj, h, 0, false);
+	}
+
+	/* Cut the trailing comma if needed */
+	if (logbuf->str[logbuf->len - 1] == ',') {
+		logbuf->len --;
+	}
+
+#ifdef HAVE_CLOCK_GETTIME
+	rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,",
+			task->msg->len, calculate_check_time (&task->tv, &task->ts,
+			task->cfg->clock_res, &task->scan_milliseconds), task->dns_requests);
+#else
+	rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,",
+			task->msg->len,
+			calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds),
+			task->dns_requests);
+#endif
+
+	return obj;
+}
+
+static void
+rspamd_ucl_tolegacy_output (struct rspamd_task *task, ucl_object_t *top, GString *out)
+{
+	const ucl_object_t *metric, *score,
+		*required_score, *is_spam, *elt, *symbols;
+	ucl_object_iter_t iter = NULL;
+
+	metric = ucl_object_find_key (top, DEFAULT_METRIC);
+	if (metric != NULL) {
+		score = ucl_object_find_key (metric, "score");
+		required_score = ucl_object_find_key (metric, "required_score");
+		is_spam = ucl_object_find_key (metric, "is_spam");
+		g_string_append_printf (out, "Metric: default; %s; %.2f / %.2f / 0.0\r\n",
+				ucl_object_toboolean (is_spam) ? "True" : "False",
+				ucl_object_todouble (score),
+				ucl_object_todouble (required_score));
+		elt = ucl_object_find_key (metric, "action");
+		if (elt != NULL) {
+			g_string_append_printf (out, "Action: %s\r\n",
+				ucl_object_tostring (elt));
+		}
+
+		symbols = ucl_object_find_key (metric, "symbols");
+		while ((elt = ucl_iterate_object (symbols, &iter, true)) != NULL) {
+			const ucl_object_t *sym_score;
+			sym_score = ucl_object_find_key (elt, "score");
+			g_string_append_printf (out, "Symbol: %s; %.2f\r\n",
+					ucl_object_key (elt),
+					ucl_object_todouble (sym_score));
+		}
+
+		elt = ucl_object_find_key (metric, "subject");
+		if (elt != NULL) {
+			g_string_append_printf (out, "Subject: %s\r\n",
+					ucl_object_tostring (elt));
+		}
+	}
+	g_string_append_printf (out, "Message-ID: %s\r\n", task->message_id);
+}
+
+static void
+write_check_reply (struct rspamd_http_message *msg, struct rspamd_task *task)
+{
+	GString                         *logbuf;
+	struct metric_result           *metric_res;
+	GHashTableIter                   hiter;
+	gpointer                         h, v;
+	ucl_object_t                    *top = NULL, *obj;
+
+	/* Output the first line - check status */
+	logbuf = g_string_sized_new (BUFSIZ);
+	rspamd_printf_gstring (logbuf, "id: <%s>, qid: <%s>, ", task->message_id, task->queue_id);
+
+	if (task->user) {
+		rspamd_printf_gstring (logbuf, "user: %s, ", task->user);
+	}
+
+	if (!task->no_log) {
+		rspamd_roll_history_update (task->worker->srv->history, task);
+	}
+	g_hash_table_iter_init (&hiter, task->results);
+
+	top = ucl_object_typed_new (UCL_OBJECT);
+	/* Convert results to an ucl object */
+	while (g_hash_table_iter_next (&hiter, &h, &v)) {
+		metric_res = (struct metric_result *)v;
+		obj = rspamd_metric_result_ucl (task, metric_res, logbuf);
+		ucl_object_insert_key (top, obj, h, 0, false);
+	}
+
+	if (task->messages != NULL) {
+		ucl_object_insert_key (top, rspamd_str_list_ucl (task->messages), "messages", 0, false);
+	}
+	if (g_tree_nnodes (task->urls) > 0) {
+		ucl_object_insert_key (top, rspamd_urls_tree_ucl (task->urls, task), "urls", 0, false);
+	}
+	if (g_tree_nnodes (task->emails) > 0) {
+		ucl_object_insert_key (top, rspamd_emails_tree_ucl (task->emails, task),
+				"emails", 0, false);
+	}
+	
+	ucl_object_insert_key (top, ucl_object_fromstring (task->message_id),
+			"message-id", 0, false);
+
+	write_hashes_to_log (task, logbuf);
+	if (!task->no_log) {
+		msg_info ("%v", logbuf);
+	}
+	g_string_free (logbuf, TRUE);
+
+	msg->body = g_string_sized_new (BUFSIZ);
+
+	if (msg->method < HTTP_SYMBOLS) {
+		rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body);
+	}
+	else {
+		rspamd_ucl_tolegacy_output (task, top, msg->body);
+	}
+	ucl_object_unref (top);
+
+	/* Increase counters */
+	task->worker->srv->stat->messages_scanned++;
+}
+
+void
+rspamd_protocol_write_reply (struct rspamd_task *task)
+{
+	struct rspamd_http_message    *msg;
+	const gchar                   *ctype = "application/json";
+	ucl_object_t                   *top = NULL;
+
+	msg = rspamd_http_new_message (HTTP_RESPONSE);
+	if (!task->is_json) {
+		/* Turn compatibility on */
+		msg->method = HTTP_SYMBOLS;
+	}
+	msg->date = time (NULL);
+
+	task->state = CLOSING_CONNECTION;
+
+	top = ucl_object_typed_new (UCL_OBJECT);
+	debug_task ("writing reply to client");
+	if (task->error_code != 0) {
+		msg->code = task->error_code;
+		ucl_object_insert_key (top, ucl_object_fromstring (task->last_error), "error", 0, false);
+		msg->body = g_string_sized_new (256);
+		rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body);
+		ucl_object_unref (top);
+	}
+	else {
+		switch (task->cmd) {
+		case CMD_REPORT_IFSPAM:
+		case CMD_REPORT:
+		case CMD_CHECK:
+		case CMD_SYMBOLS:
+		case CMD_PROCESS:
+		case CMD_SKIP:
+			write_check_reply (msg, task);
+			break;
+		case CMD_PING:
+			msg->body = g_string_new ("pong");
+			break;
+		case CMD_OTHER:
+			msg_err ("BROKEN");
+			break;
+		}
+	}
+
+	rspamd_http_connection_reset (task->http_conn);
+	rspamd_http_connection_write_message (task->http_conn, msg, NULL,
+					ctype, task, task->sock, &task->tv, task->ev_base);
+}
+
+void
+register_protocol_command (const gchar *name, protocol_reply_func func)
+{
+	struct custom_command          *cmd;
+
+	cmd = g_malloc (sizeof (struct custom_command));
+	cmd->name = name;
+	cmd->func = func;
+
+	custom_commands = g_list_prepend (custom_commands, cmd);
+}
diff --git a/src/libmime/protocol.h b/src/libmime/protocol.h
new file mode 100644
index 000000000..8d2efe118
--- /dev/null
+++ b/src/libmime/protocol.h
@@ -0,0 +1,46 @@
+/**
+ * @file protocol.h
+ * Rspamd protocol definition
+ */
+
+#ifndef RSPAMD_PROTOCOL_H
+#define RSPAMD_PROTOCOL_H
+
+#include "config.h"
+#include "filter.h"
+#include "http.h"
+#include "task.h"
+
+#define RSPAMD_BASE_ERROR 500
+#define RSPAMD_FILTER_ERROR RSPAMD_BASE_ERROR + 1
+#define RSPAMD_NETWORK_ERROR RSPAMD_BASE_ERROR + 2
+#define RSPAMD_PROTOCOL_ERROR RSPAMD_BASE_ERROR + 3
+#define RSPAMD_LENGTH_ERROR RSPAMD_BASE_ERROR + 4
+#define RSPAMD_STATFILE_ERROR RSPAMD_BASE_ERROR + 5
+
+struct metric;
+
+/**
+ * Process HTTP request to the task structure
+ * @param task
+ * @param msg
+ * @return
+ */
+gboolean rspamd_protocol_handle_request (struct rspamd_task *task, struct rspamd_http_message *msg);
+
+/**
+ * Write reply for specified task command
+ * @param task task object
+ * @return 0 if we wrote reply and -1 if there was some error
+ */
+void rspamd_protocol_write_reply (struct rspamd_task *task);
+
+
+/**
+ * Register custom fucntion to extend protocol
+ * @param name symbolic name of custom function
+ * @param func callback function for writing reply
+ */
+void register_protocol_command (const gchar *name, protocol_reply_func func);
+
+#endif
diff --git a/src/libmime/smtp_proto.c b/src/libmime/smtp_proto.c
new file mode 100644
index 000000000..3af1c3910
--- /dev/null
+++ b/src/libmime/smtp_proto.c
@@ -0,0 +1,701 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "cfg_file.h"
+#include "util.h"
+#include "smtp.h"
+#include "smtp_proto.h"
+#include "smtp_utils.h"
+
+gchar                           *
+make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...)
+{
+	va_list                         vp;
+	gchar                           *result = NULL, *p;
+	size_t                          len;
+	
+	va_start (vp, format);
+	len = g_printf_string_upper_bound (format, vp);
+	va_end (vp);
+	va_start (vp, format);
+	len += sizeof ("65535 ") + sizeof (CRLF) - 1;
+	result = rspamd_mempool_alloc (pool, len);
+	p = result + rspamd_snprintf (result, len, "%d ", error_code);
+	p = rspamd_vsnprintf (p, len - (p - result), format, vp);
+	*p++ = CR; *p++ = LF; *p = '\0';
+	va_end (vp);
+
+	return result;
+}
+
+
+gboolean
+parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd)
+{
+	enum {
+		SMTP_PARSE_START = 0,
+		SMTP_PARSE_SPACES,
+		SMTP_PARSE_ARGUMENT,
+		SMTP_PARSE_DONE
+	}                              state;
+	gchar                         *p, *c, ch, cmd_buf[4];
+	guint                          i;
+	f_str_t                       *arg = NULL;
+	struct smtp_command           *pcmd;
+	
+	if (line->len == 0) {
+		return FALSE;
+	}
+
+	state = SMTP_PARSE_START;
+	c = line->begin;
+	p = c;
+	*cmd = rspamd_mempool_alloc0 (session->pool, sizeof (struct smtp_command));
+	pcmd = *cmd;
+
+	for (i = 0; i < line->len; i ++, p ++) {
+		ch = *p;
+		switch (state) {
+			case SMTP_PARSE_START:
+				if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) {
+					if (i == line->len - 1) {
+						p ++;
+					}
+					if (p - c == 4) {
+						cmd_buf[0] = g_ascii_toupper (c[0]);
+						cmd_buf[1] = g_ascii_toupper (c[1]);
+						cmd_buf[2] = g_ascii_toupper (c[2]);
+						cmd_buf[3] = g_ascii_toupper (c[3]);
+
+						if (memcmp (cmd_buf, "HELO", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_HELO;
+						}
+						else if (memcmp (cmd_buf, "EHLO", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_EHLO;
+						}
+						else if (memcmp (cmd_buf, "MAIL", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_MAIL;
+						}
+						else if (memcmp (cmd_buf, "RCPT", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_RCPT;
+						}
+						else if (memcmp (cmd_buf, "DATA", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_DATA;
+						}
+						else if (memcmp (cmd_buf, "QUIT", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_QUIT;
+						}
+						else if (memcmp (cmd_buf, "NOOP", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_NOOP;
+						}
+						else if (memcmp (cmd_buf, "EXPN", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_EXPN;
+						}
+						else if (memcmp (cmd_buf, "RSET", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_RSET;
+						}
+						else if (memcmp (cmd_buf, "HELP", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_HELP;
+						}
+						else if (memcmp (cmd_buf, "VRFY", 4) == 0) {
+							pcmd->command = SMTP_COMMAND_VRFY;
+						}
+						else {
+							msg_info ("invalid command: %*s", 4, cmd_buf);
+							return FALSE;
+						}
+					}
+					else {
+						/* Invalid command */
+						msg_info ("invalid command: %*s", 4, c);
+						return FALSE;
+					}
+					/* Now check what we have */
+					if (ch == ' ' || ch == ':') {
+						state = SMTP_PARSE_SPACES;
+					}
+					else if (ch == CR) {
+						state = SMTP_PARSE_DONE;
+					}
+					else if (ch == LF) {
+						return TRUE;
+					}
+				}
+				else if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z')) {
+					msg_info ("invalid letter code in SMTP command: %d", (gint)ch);
+					return FALSE;
+				}
+				break;
+			case SMTP_PARSE_SPACES:
+				if (ch == CR) {
+					state = SMTP_PARSE_DONE;
+				}
+				else if (ch == LF) {
+					goto end;
+				}
+				else if (ch != ' ' && ch != ':') {
+					state = SMTP_PARSE_ARGUMENT;
+					arg = rspamd_mempool_alloc (session->pool, sizeof (f_str_t));
+					c = p;
+				}
+				break;
+			case SMTP_PARSE_ARGUMENT:
+				if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) {
+					if (i == line->len - 1 && (ch != ' ' && ch != CR && ch != LF)) {
+						p ++;
+					}
+					arg->len = p - c;
+					arg->begin = rspamd_mempool_alloc (session->pool, arg->len);
+					memcpy (arg->begin, c, arg->len);
+					pcmd->args = g_list_prepend (pcmd->args, arg);
+					if (ch == ' ' || ch == ':') {
+						state = SMTP_PARSE_SPACES;
+					}
+					else if (ch == CR) {
+						state = SMTP_PARSE_DONE;
+					}
+					else {
+						goto end;
+					}
+				}
+				break;
+			case SMTP_PARSE_DONE:
+				if (ch == LF) {
+					goto end;
+				}
+				msg_info ("CR without LF in SMTP command");
+				return FALSE;
+		}
+	}
+
+end:
+	if (pcmd->args) {
+		pcmd->args = g_list_reverse (pcmd->args);
+		rspamd_mempool_add_destructor (session->pool, (rspamd_mempool_destruct_t)g_list_free, pcmd->args);
+	}
+	return TRUE;
+}
+
+static gboolean
+check_smtp_path (f_str_t *path)
+{
+	guint                            i;
+	gchar                           *p;
+
+	p = path->begin;
+	if (*p != '<' || path->len < 2) {
+		return FALSE;
+	}
+	for (i = 0; i < path->len; i++, p ++) {
+		if (*p == '>' && i != path->len - 1) {
+			return FALSE;
+		}
+	}
+
+	return *(p - 1) == '>';
+}
+
+gboolean
+parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd)
+{
+	f_str_t                       *arg;
+
+	if (cmd->args == NULL) {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+	arg = cmd->args->data;
+	session->helo = rspamd_mempool_alloc (session->pool, arg->len + 1);
+	rspamd_strlcpy (session->helo, arg->begin, arg->len + 1);
+	/* Now try to write reply */
+	if (cmd->command == SMTP_COMMAND_HELO) {
+		/* No ESMTP */
+		session->error = SMTP_ERROR_OK;
+		session->esmtp = FALSE;
+		return TRUE;
+	}
+	else {
+		/* Try to write all capabilities */
+		session->esmtp = TRUE;
+		if (session->ctx->smtp_capabilities == NULL) {
+			session->error = SMTP_ERROR_OK;
+			return TRUE;
+		}
+		else {
+			session->error = session->ctx->smtp_capabilities;
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+gboolean
+parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd)
+{
+	f_str_t                       *arg;
+	GList                         *cur = cmd->args;
+
+	if (cmd->args == NULL) {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+	arg = cur->data;
+	/* First argument MUST be FROM */
+	if (arg->len != 4 || (
+		g_ascii_toupper (arg->begin[0]) != 'F' ||
+		g_ascii_toupper (arg->begin[1]) != 'R' ||
+		g_ascii_toupper (arg->begin[2]) != 'O' ||
+		g_ascii_toupper (arg->begin[3]) != 'M')) {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+	/* Next one is from address */
+	cur = g_list_next (cur);
+	if (cur == NULL) {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+	arg = cur->data;
+	if (check_smtp_path (arg)) {
+		session->from = cur;
+	}
+	else {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+gboolean
+parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd)
+{
+	f_str_t                       *arg;
+	GList                         *cur = cmd->args;
+
+	if (cmd->args == NULL) {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+	arg = cur->data;
+	/* First argument MUST be FROM */
+	if (arg->len != 2 || (
+		g_ascii_toupper (arg->begin[0]) != 'T' ||
+		g_ascii_toupper (arg->begin[1]) != 'O')) {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+	/* Next one is from address */
+	cur = g_list_next (cur);
+	if (cur == NULL) {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+	arg = cur->data;
+	if (check_smtp_path (arg)) {
+		session->rcpt = g_list_prepend (session->rcpt, cur);
+	}
+	else {
+		session->error = SMTP_ERROR_BAD_ARGUMENTS;
+		return FALSE;
+	}
+
+	return TRUE;
+
+}
+
+/* Return -1 if there are some error, 1 if all is ok and 0 in case of incomplete reply */
+static gint
+check_smtp_ustream_reply (f_str_t *in, gchar success_code)
+{
+	gchar                           *p;
+
+	/* Check for 250 at the begin of line */
+	if (in->len >= sizeof ("220 ") - 1) {
+		p = in->begin;
+		if (p[0] == success_code) {
+			/* Last reply line */
+			if (p[3] == ' ') {
+				return 1;
+			}
+			else {
+				return 0;
+			}
+		}
+		else {
+			return -1;
+		}
+	}
+
+	return -1;
+}
+
+size_t
+smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen)
+{
+	GList                          *cur = args;
+	size_t                          r = 0;
+	f_str_t                        *arg;
+
+	while (cur && r < buflen - 3) {
+		arg = cur->data;
+		r += rspamd_snprintf (buf + r, buflen - r, " %V", arg); 
+		cur = g_list_next (cur);
+	}
+
+	buf[r++] = CR;
+	buf[r++] = LF;
+	buf[r] = '\0';
+
+	return r;
+}
+
+gboolean 
+smtp_upstream_write_socket (void *arg)
+{
+	struct smtp_session            *session = arg;
+	
+	if (session->upstream_state == SMTP_STATE_IN_SENDFILE) {
+		session->upstream_state = SMTP_STATE_AFTER_DATA;
+		return rspamd_dispatcher_write (session->upstream_dispatcher, CRLF DATA_END_TRAILER, sizeof (CRLF DATA_END_TRAILER) - 1, FALSE, TRUE);
+	}
+
+	return TRUE;
+}
+
+gboolean 
+smtp_upstream_read_socket (f_str_t * in, void *arg)
+{
+	struct smtp_session            *session = arg;
+	gchar                           outbuf[BUFSIZ];
+	gint                            r;
+	
+	msg_debug ("in: %V, state: %d", in, session->upstream_state);
+	switch (session->upstream_state) {
+		case SMTP_STATE_GREETING:
+			r = check_smtp_ustream_reply (in, '2');
+			if (r == -1) {
+				session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+				rspamd_strlcpy (session->error, in->begin, in->len + 1);
+				/* XXX: assume upstream errors as critical errors */
+				session->state = SMTP_STATE_CRITICAL_ERROR;
+				rspamd_dispatcher_restore (session->dispatcher);
+				if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+					goto err;
+				}
+				if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+					goto err;
+				}
+				destroy_session (session->s);
+				return FALSE;
+			}
+			else if (r == 1) {
+				if (session->ctx->use_xclient) {
+					r = rspamd_snprintf (outbuf, sizeof (outbuf), "XCLIENT NAME=%s ADDR=%s" CRLF, 
+							session->resolved ? session->hostname : "[UNDEFINED]",
+							inet_ntoa (session->client_addr));
+					session->upstream_state = SMTP_STATE_HELO;
+					return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+				}
+				else {
+					session->upstream_state = SMTP_STATE_FROM;
+					if (session->helo) {
+						r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF, 
+							session->esmtp ? "EHLO" : "HELO",
+							session->helo);
+					}
+					else {
+						return smtp_upstream_read_socket (in, arg);
+					}
+					return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+				}
+			}
+			break;
+		case SMTP_STATE_HELO:
+			r = check_smtp_ustream_reply (in, '2');
+			if (r == -1) {
+				session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+				rspamd_strlcpy (session->error, in->begin, in->len + 1);
+				/* XXX: assume upstream errors as critical errors */
+				session->state = SMTP_STATE_CRITICAL_ERROR;
+				rspamd_dispatcher_restore (session->dispatcher);
+				if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+					goto err;
+				}
+				if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+					goto err;
+				}
+				destroy_session (session->s);
+				return FALSE;
+			}
+			else if (r == 1) {
+				session->upstream_state = SMTP_STATE_FROM;
+				if (session->helo) {
+					r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF, 
+						session->esmtp ? "EHLO" : "HELO",
+						session->helo);
+				}
+				else {
+					return smtp_upstream_read_socket (in, arg);
+				}
+				return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+			}
+			break;
+		case SMTP_STATE_FROM:
+			r = check_smtp_ustream_reply (in, '2');
+			if (r == -1) {
+				session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+				rspamd_strlcpy (session->error, in->begin, in->len + 1);
+				/* XXX: assume upstream errors as critical errors */
+				session->state = SMTP_STATE_CRITICAL_ERROR;
+				rspamd_dispatcher_restore (session->dispatcher);
+				if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+					goto err;
+				}
+				if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+					goto err;
+				}
+				destroy_session (session->s);
+				return FALSE;
+			}
+			else if (r == 1) {
+				r = rspamd_snprintf (outbuf, sizeof (outbuf), "MAIL FROM: ");
+				r += smtp_upstream_write_list (session->from, outbuf + r, sizeof (outbuf) - r);
+				session->upstream_state = SMTP_STATE_RCPT;
+				return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+			}
+			break;
+		case SMTP_STATE_RCPT:
+			r = check_smtp_ustream_reply (in, '2');
+			if (r == -1) {
+				session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+				rspamd_strlcpy (session->error, in->begin, in->len + 1);
+				/* XXX: assume upstream errors as critical errors */
+				session->state = SMTP_STATE_CRITICAL_ERROR;
+				rspamd_dispatcher_restore (session->dispatcher);
+				if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+					goto err;
+				}
+				if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+					goto err;
+				}
+				destroy_session (session->s);
+				return FALSE;
+			}
+			else if (r == 1) {
+				r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: ");
+				session->cur_rcpt = g_list_first (session->rcpt);
+				r += smtp_upstream_write_list (session->cur_rcpt->data, outbuf + r, sizeof (outbuf) - r);
+				session->cur_rcpt = g_list_next (session->cur_rcpt);
+				session->upstream_state = SMTP_STATE_BEFORE_DATA;
+				return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+			}
+			break;
+		case SMTP_STATE_BEFORE_DATA:
+			r = check_smtp_ustream_reply (in, '2');
+			if (r == -1) {
+				session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+				rspamd_strlcpy (session->error, in->begin, in->len + 1);
+				rspamd_dispatcher_restore (session->dispatcher);
+				if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+					goto err;
+				}
+				if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+					goto err;
+				}
+				if (session->cur_rcpt) {
+					session->rcpt = g_list_delete_link (session->rcpt, session->cur_rcpt);
+				}
+				else {
+					session->rcpt = g_list_delete_link (session->rcpt, session->rcpt);
+				}
+				session->errors ++;
+				session->state = SMTP_STATE_RCPT;
+				return TRUE;
+			}
+			else if (r == 1) {
+				if (session->cur_rcpt != NULL) {
+					r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: ");
+					r += smtp_upstream_write_list (session->cur_rcpt, outbuf + r, sizeof (outbuf) - r);
+					session->cur_rcpt = g_list_next (session->cur_rcpt);
+					if (! rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE)) {
+						goto err;
+					}
+				}
+				else {
+					session->upstream_state = SMTP_STATE_DATA;
+					rspamd_dispatcher_pause (session->upstream_dispatcher);
+				}
+				session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+				rspamd_strlcpy (session->error, in->begin, in->len + 1);
+				/* Write to client */
+				if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+					goto err;
+				} 
+				if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+					goto err;
+				}
+				if (session->state == SMTP_STATE_WAIT_UPSTREAM) {
+					rspamd_dispatcher_restore (session->dispatcher);
+					session->state = SMTP_STATE_RCPT;
+				}
+			}
+			break;
+		case SMTP_STATE_DATA:
+			r = check_smtp_ustream_reply (in, '3');
+			if (r == -1) {
+				session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+				rspamd_strlcpy (session->error, in->begin, in->len + 1);
+				/* XXX: assume upstream errors as critical errors */
+				session->state = SMTP_STATE_CRITICAL_ERROR;
+				rspamd_dispatcher_restore (session->dispatcher);
+				if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+					goto err;
+				}
+				if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+					goto err;
+				}
+				destroy_session (session->s);
+				return FALSE;
+			}
+			else if (r == 1) {
+				if (! make_smtp_tempfile (session)) {
+					session->error = SMTP_ERROR_FILE;
+					session->state = SMTP_STATE_CRITICAL_ERROR;
+					rspamd_dispatcher_restore (session->dispatcher);
+					if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+						goto err;
+					}
+					destroy_session (session->s);
+					return FALSE;
+				}
+				session->state = SMTP_STATE_AFTER_DATA;
+				session->error = SMTP_ERROR_DATA_OK;
+				rspamd_dispatcher_restore (session->dispatcher);
+				if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+					goto err;
+				}
+				rspamd_dispatcher_pause (session->upstream_dispatcher);
+				rspamd_set_dispatcher_policy (session->dispatcher, BUFFER_LINE, 0);
+				session->dispatcher->strip_eol = FALSE;
+				return TRUE;
+			}
+			break;
+		case SMTP_STATE_AFTER_DATA:
+			session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+			rspamd_strlcpy (session->error, in->begin, in->len + 1);
+			session->state = SMTP_STATE_DATA;
+			rspamd_dispatcher_restore (session->dispatcher);
+			if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+				goto err;
+			}
+			if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+				goto err;
+			}
+			if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, sizeof ("QUIT" CRLF) - 1, FALSE, TRUE)) {
+				goto err;
+			}
+			session->upstream_state = SMTP_STATE_END;
+			return TRUE;
+			break;
+		case SMTP_STATE_END:
+			r = check_smtp_ustream_reply (in, '5');
+			if (r == -1) {
+				session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+				rspamd_strlcpy (session->error, in->begin, in->len + 1);
+				/* XXX: assume upstream errors as critical errors */
+				session->state = SMTP_STATE_CRITICAL_ERROR;
+				rspamd_dispatcher_restore (session->dispatcher);
+				if (!rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+					goto err;
+				}
+				if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+					goto err;
+				}
+				destroy_session (session->s);
+				return FALSE;
+			}
+			else {
+				remove_normal_event (session->s,  (event_finalizer_t)smtp_upstream_finalize_connection, session);
+			}
+			return FALSE;
+			break;
+		default:
+			msg_err ("got upstream reply at unexpected state: %d, reply: %V", session->upstream_state, in);
+			session->state = SMTP_STATE_CRITICAL_ERROR;
+			rspamd_dispatcher_restore (session->dispatcher);
+			if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+				goto err;
+			}
+			if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+				goto err;
+			}
+			destroy_session (session->s);
+			return FALSE;
+	}
+
+	return TRUE;
+err:
+	msg_warn ("write error occured");
+	return FALSE;
+}
+
+void 
+smtp_upstream_err_socket (GError *err, void *arg)
+{
+	struct smtp_session            *session = arg;
+
+	msg_info ("abnormally closing connection with upstream %s, error: %s", session->upstream->name, err->message);
+	session->error = SMTP_ERROR_UPSTREAM;
+	session->state = SMTP_STATE_CRITICAL_ERROR;
+	/* XXX: assume upstream errors as critical errors */
+	rspamd_dispatcher_restore (session->dispatcher);
+	if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+		return;
+	}
+	if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+		return;
+	}
+	upstream_fail (&session->upstream->up, session->session_time);
+	destroy_session (session->s);
+}
+
+void
+smtp_upstream_finalize_connection (gpointer data)
+{
+	struct smtp_session            *session = data;
+	
+	if (session->state != SMTP_STATE_CRITICAL_ERROR) {
+		if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, 0, FALSE, TRUE)) {
+			msg_warn ("cannot send correctly closing message to upstream");
+		}
+	}
+	rspamd_remove_dispatcher (session->upstream_dispatcher);
+	session->upstream_dispatcher = NULL;
+	close (session->upstream_sock);
+	session->upstream_sock = -1;
+}
diff --git a/src/libmime/smtp_proto.h b/src/libmime/smtp_proto.h
new file mode 100644
index 000000000..42fecd255
--- /dev/null
+++ b/src/libmime/smtp_proto.h
@@ -0,0 +1,95 @@
+#ifndef RSPAMD_SMTP_PROTO_H
+#define RSPAMD_SMTP_PROTO_H
+
+#include "config.h"
+#include "smtp.h"
+
+/* SMTP errors */
+#define SMTP_ERROR_BAD_COMMAND "500 Syntax error, command unrecognized" CRLF
+#define SMTP_ERROR_BAD_ARGUMENTS "501 Syntax error in parameters or arguments" CRLF
+#define SMTP_ERROR_SEQUENCE "503 Bad sequence of commands" CRLF
+#define SMTP_ERROR_RECIPIENTS "554 No valid recipients" CRLF
+#define SMTP_ERROR_UNIMPLIMENTED "502 Command not implemented" CRLF
+#define SMTP_ERROR_LIMIT "505 Too many errors. Aborting." CRLF
+#define SMTP_ERROR_UPSTREAM "421 Service not available, closing transmission channel" CRLF
+#define SMTP_ERROR_FILE "420 Service not available, filesystem error" CRLF
+#define SMTP_ERROR_OK "250 Requested mail action okay, completed" CRLF
+#define SMTP_ERROR_DATA_OK "354 Start mail input; end with <CRLF>.<CRLF>" CRLF
+
+#define DATA_END_TRAILER "." CRLF
+
+#define XCLIENT_HOST_UNAVAILABLE "[UNAVAILABLE]"
+#define XCLIENT_HOST_TEMPFAIL "[TEMPUNAVAIL]"
+
+#define MAX_SMTP_UPSTREAMS 128
+
+struct smtp_command {
+	enum {
+		SMTP_COMMAND_HELO,
+		SMTP_COMMAND_EHLO,
+		SMTP_COMMAND_QUIT,
+		SMTP_COMMAND_NOOP,
+		SMTP_COMMAND_MAIL,
+		SMTP_COMMAND_RCPT,
+		SMTP_COMMAND_RSET,
+		SMTP_COMMAND_DATA,
+		SMTP_COMMAND_VRFY,
+		SMTP_COMMAND_EXPN,
+		SMTP_COMMAND_HELP
+	} command;
+	GList *args;
+};
+
+/*
+ * Generate SMTP error message
+ */
+gchar * make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...);
+
+/*
+ * Parse a single SMTP command
+ */
+gboolean parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd);
+
+/*
+ * Parse HELO command
+ */
+gboolean parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd);
+
+/*
+ * Parse MAIL command
+ */
+gboolean parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd);
+
+/*
+ * Parse RCPT command
+ */
+gboolean parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd);
+
+/* Upstream SMTP */
+
+/*
+ * Read a line from SMTP upstream
+ */
+gboolean smtp_upstream_read_socket (f_str_t * in, void *arg);
+
+/*
+ * Write to SMTP upstream
+ */
+gboolean smtp_upstream_write_socket (void *arg);
+
+/*
+ * Error handler for SMTP upstream
+ */
+void smtp_upstream_err_socket (GError *err, void *arg);
+
+/*
+ * Terminate connection with upstream
+ */
+void smtp_upstream_finalize_connection (gpointer data);
+
+/*
+ * Write a list of strings to the upstream
+ */
+size_t smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen);
+
+#endif
diff --git a/src/libmime/smtp_utils.c b/src/libmime/smtp_utils.c
new file mode 100644
index 000000000..5178de9dd
--- /dev/null
+++ b/src/libmime/smtp_utils.c
@@ -0,0 +1,362 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *       * Redistributions of source code must retain the above copyright
+ *         notice, this list of conditions and the following disclaimer.
+ *       * Redistributions in binary form must reproduce the above copyright
+ *         notice, this list of conditions and the following disclaimer in the
+ *         documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "filter.h"
+#include "settings.h"
+#include "smtp.h"
+#include "smtp_proto.h"
+
+void
+free_smtp_session (gpointer arg)
+{
+	struct smtp_session            *session = arg;
+
+	if (session) {
+		if (session->task) {
+			rspamd_task_free (session->task, FALSE);
+			if (session->task->msg->str) {
+				munmap (session->task->msg->str, session->task->msg->len);
+			}
+		}
+		if (session->rcpt) {
+			g_list_free (session->rcpt);
+		}
+		if (session->dispatcher) {
+			rspamd_remove_dispatcher (session->dispatcher);
+		}
+		close (session->sock);
+		if (session->temp_name != NULL) {
+			unlink (session->temp_name);
+		}
+		if (session->temp_fd != -1) {
+			close (session->temp_fd);
+		}
+		rspamd_mempool_delete (session->pool);
+		g_free (session);
+	}
+}
+
+gboolean
+create_smtp_upstream_connection (struct smtp_session *session)
+{
+	struct smtp_upstream              *selected;
+
+	/* Try to select upstream */
+	selected = (struct smtp_upstream *)get_upstream_round_robin (session->ctx->upstreams,
+			session->ctx->upstream_num, sizeof (struct smtp_upstream),
+			session->session_time, DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS);
+	if (selected == NULL) {
+		msg_err ("no upstreams suitable found");
+		return FALSE;
+	}
+
+	session->upstream = selected;
+
+	/* Now try to create socket */
+	session->upstream_sock = make_universal_socket (selected->addr, selected->port, SOCK_STREAM, TRUE, FALSE, FALSE);
+	if (session->upstream_sock == -1) {
+		msg_err ("cannot make a connection to %s", selected->name);
+		upstream_fail (&selected->up, session->session_time);
+		return FALSE;
+	}
+	/* Create a dispatcher for upstream connection */
+	session->upstream_dispatcher = rspamd_create_dispatcher (session->ev_base, session->upstream_sock, BUFFER_LINE,
+							smtp_upstream_read_socket, smtp_upstream_write_socket, smtp_upstream_err_socket,
+							&session->ctx->smtp_timeout, session);
+	session->state = SMTP_STATE_WAIT_UPSTREAM;
+	session->upstream_state = SMTP_STATE_GREETING;
+	register_async_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session, g_quark_from_static_string ("smtp proxy"));
+	return TRUE;
+}
+
+gboolean
+smtp_send_upstream_message (struct smtp_session *session)
+{
+	rspamd_dispatcher_pause (session->dispatcher);
+	rspamd_dispatcher_restore (session->upstream_dispatcher);
+
+	session->upstream_state = SMTP_STATE_IN_SENDFILE;
+	session->state = SMTP_STATE_WAIT_UPSTREAM;
+	if (! rspamd_dispatcher_sendfile (session->upstream_dispatcher, session->temp_fd, session->temp_size)) {
+		msg_err ("sendfile failed: %s", strerror (errno));
+		goto err;
+	}
+	return TRUE;
+
+err:
+	session->error = SMTP_ERROR_FILE;
+	session->state = SMTP_STATE_CRITICAL_ERROR;
+	if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+		return FALSE;
+	}
+	destroy_session (session->s);
+	return FALSE;
+}
+
+struct smtp_metric_callback_data {
+	struct smtp_session            *session;
+	enum rspamd_metric_action       action;
+	struct metric_result           *res;
+	gchar                          *log_buf;
+	gint                            log_offset;
+	gint                            log_size;
+	gboolean                        alive;
+};
+
+static void
+smtp_metric_symbols_callback (gpointer key, gpointer value, void *user_data)
+{
+	struct smtp_metric_callback_data    *cd = user_data;
+
+	cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "%s,", (gchar *)key);
+}
+
+static void
+smtp_metric_callback (gpointer key, gpointer value, gpointer ud)
+{
+	struct smtp_metric_callback_data *cd = ud;
+	struct metric_result           *metric_res = value;
+	enum rspamd_metric_action       action = METRIC_ACTION_NOACTION;
+	double                          ms = 0, rs = 0;
+	gboolean                        is_spam = FALSE;
+	struct rspamd_task             *task;
+
+	task = cd->session->task;
+
+	if (!check_metric_settings (metric_res, &ms, &rs)) {
+		ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score;
+		rs = metric_res->metric->actions[METRIC_ACTION_REJECT].score;
+	}
+	if (! check_metric_action_settings (task, metric_res, metric_res->score, &action)) {
+		action = check_metric_action (metric_res->score, ms, metric_res->metric);
+	}
+	if (metric_res->score >= ms) {
+		is_spam = 1;
+	}
+	if (action < cd->action) {
+		cd->action = action;
+		cd->res = metric_res;
+	}
+
+	if (!task->is_skipped) {
+		cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (%s): [%.2f/%.2f/%.2f] [",
+				(gchar *)key, is_spam ? 'T' : 'F', str_action_metric (action), metric_res->score, ms, rs);
+	}
+	else {
+		cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (default): [%.2f/%.2f/%.2f] [",
+				(gchar *)key, 'S', metric_res->score, ms, rs);
+
+	}
+	g_hash_table_foreach (metric_res->symbols, smtp_metric_symbols_callback, cd);
+	/* Remove last , from log buf */
+	if (cd->log_buf[cd->log_offset - 1] == ',') {
+		cd->log_buf[--cd->log_offset] = '\0';
+	}
+
+#ifdef HAVE_CLOCK_GETTIME
+	cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,",
+		task->msg->len, calculate_check_time (&task->tv, &task->ts, task->cfg->clock_res, &task->scan_milliseconds));
+#else
+	cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,",
+		task->msg->len, calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds));
+#endif
+}
+
+gboolean
+make_smtp_tempfile (struct smtp_session *session)
+{
+	gsize                            r;
+
+	r = strlen (session->cfg->temp_dir) + sizeof ("/rspamd-XXXXXX");
+	session->temp_name = rspamd_mempool_alloc (session->pool, r);
+	rspamd_snprintf (session->temp_name, r, "%s%crspamd-XXXXXX", session->cfg->temp_dir, G_DIR_SEPARATOR);
+#ifdef HAVE_MKSTEMP
+	/* Umask is set before */
+	session->temp_fd = mkstemp (session->temp_name);
+#else
+	session->temp_fd = g_mkstemp_full (session->temp_name, O_RDWR, S_IWUSR | S_IRUSR);
+#endif
+	if (session->temp_fd == -1) {
+		msg_err ("mkstemp error: %s", strerror (errno));
+
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+gboolean
+write_smtp_reply (struct smtp_session *session)
+{
+	gchar                           logbuf[1024], *new_subject;
+	const gchar                    *old_subject;
+	struct smtp_metric_callback_data cd;
+	GMimeStream                    *stream;
+	gint                            old_fd, sublen;
+
+	/* Check metrics */
+	cd.session = session;
+	cd.action = METRIC_ACTION_NOACTION;
+	cd.res = NULL;
+	cd.log_buf = logbuf;
+	cd.log_offset = rspamd_snprintf (logbuf, sizeof (logbuf), "id: <%s>, qid: <%s>, ",
+			session->task->message_id, session->task->queue_id);
+	cd.log_size = sizeof (logbuf);
+	if (session->task->user) {
+		cd.log_offset += rspamd_snprintf (logbuf + cd.log_offset, sizeof (logbuf) - cd.log_offset,
+				"user: %s, ", session->task->user);
+	}
+
+	g_hash_table_foreach (session->task->results, smtp_metric_callback, &cd);
+
+	msg_info ("%s", logbuf);
+
+	if (cd.action <= METRIC_ACTION_REJECT) {
+		if (! rspamd_dispatcher_write (session->dispatcher, session->ctx->reject_message, 0, FALSE, TRUE)) {
+			return FALSE;
+		}
+		if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+			return FALSE;
+		}
+		destroy_session (session->s);
+		return FALSE;
+	}
+	else if (cd.action <= METRIC_ACTION_ADD_HEADER || cd.action <= METRIC_ACTION_REWRITE_SUBJECT) {
+		old_fd = session->temp_fd;
+		if (! make_smtp_tempfile (session)) {
+			session->error = SMTP_ERROR_FILE;
+			session->state = SMTP_STATE_CRITICAL_ERROR;
+			rspamd_dispatcher_restore (session->dispatcher);
+			if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+				goto err;
+			}
+			destroy_session (session->s);
+			return FALSE;
+		}
+
+		if (cd.action <= METRIC_ACTION_REWRITE_SUBJECT) {
+			/* XXX: add this action */
+			old_subject = g_mime_message_get_subject (session->task->message);
+			if (old_subject != NULL) {
+				sublen = strlen (old_subject) + sizeof (SPAM_SUBJECT);
+				new_subject = rspamd_mempool_alloc (session->pool, sublen);
+				rspamd_snprintf (new_subject, sublen, "%s%s", SPAM_SUBJECT, old_subject);
+			}
+			else {
+				new_subject = SPAM_SUBJECT;
+			}
+			g_mime_message_set_subject (session->task->message, new_subject);
+		}
+		else if (cd.action <= METRIC_ACTION_ADD_HEADER) {
+#ifndef GMIME24
+			g_mime_message_add_header (session->task->message, "X-Spam", "true");
+#else
+			g_mime_object_append_header (GMIME_OBJECT (session->task->message), "X-Spam", "true");
+#endif
+		}
+		stream = g_mime_stream_fs_new (session->temp_fd);
+		g_mime_stream_fs_set_owner (GMIME_STREAM_FS (stream), FALSE);
+		close (old_fd);
+
+		if (g_mime_object_write_to_stream (GMIME_OBJECT (session->task->message), stream) == -1) {
+			msg_err ("cannot write MIME object to stream: %s", strerror (errno));
+			session->error = SMTP_ERROR_FILE;
+			session->state = SMTP_STATE_CRITICAL_ERROR;
+			rspamd_dispatcher_restore (session->dispatcher);
+			if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+				goto err;
+			}
+			destroy_session (session->s);
+			return FALSE;
+		}
+		g_object_unref (stream);
+	}
+	/* XXX: Add other actions */
+	return smtp_send_upstream_message (session);
+err:
+	session->error = SMTP_ERROR_FILE;
+	session->state = SMTP_STATE_CRITICAL_ERROR;
+	if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+		return FALSE;
+	}
+	destroy_session (session->s);
+	return FALSE;
+}
+
+gboolean
+parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count)
+{
+	gchar                           **strv, *p, *t, *tt, *err_str;
+	guint32                         num, i;
+	struct smtp_upstream           *cur;
+	gchar                           resolved_path[PATH_MAX];
+
+	strv = g_strsplit_set (line, ",; ", -1);
+	num = g_strv_length (strv);
+
+	if (num >= MAX_SMTP_UPSTREAMS) {
+		msg_err ("cannot define %d upstreams %d is max", num, MAX_SMTP_UPSTREAMS);
+		return FALSE;
+	}
+	*count = 0;
+
+	for (i = 0; i < num; i ++) {
+		p = strv[i];
+		cur = &upstreams[*count];
+		if ((t = strrchr (p, ':')) != NULL && (tt = strchr (p, ':')) != t) {
+			/* Assume that after last `:' we have weigth */
+			*t = '\0';
+			t ++;
+			errno = 0;
+			cur->up.priority = strtoul (t, &err_str, 10);
+			if (errno != 0 || (err_str && *err_str != '\0')) {
+				msg_err ("cannot convert weight: %s, %s", t, strerror (errno));
+				g_strfreev (strv);
+				return FALSE;
+			}
+		}
+		if (*p == '/') {
+			cur->is_unix = TRUE;
+			if (realpath (p, resolved_path) == NULL) {
+				msg_err ("cannot resolve path: %s", resolved_path);
+				g_strfreev (strv);
+				return FALSE;
+			}
+			cur->name = rspamd_mempool_strdup (pool, resolved_path);
+			(*count) ++;
+		}
+		else {
+			if (! parse_host_port (pool, p, &cur->addr, &cur->port)) {
+				g_strfreev (strv);
+				return FALSE;
+			}
+			cur->name = rspamd_mempool_strdup (pool, p);
+			(*count) ++;
+		}
+	}
+
+	g_strfreev (strv);
+	return TRUE;
+}
diff --git a/src/libmime/smtp_utils.h b/src/libmime/smtp_utils.h
new file mode 100644
index 000000000..652b6759f
--- /dev/null
+++ b/src/libmime/smtp_utils.h
@@ -0,0 +1,63 @@
+#ifndef SMTP_UTILS_H_
+#define SMTP_UTILS_H_
+
+#include "config.h"
+#include "main.h"
+#include "smtp.h"
+
+/**
+ * @file smtp_utils.h
+ * Contains utilities for smtp protocol handling
+ */
+
+struct smtp_upstream {
+	struct upstream up;
+
+	const gchar *name;
+	gchar *addr;
+	guint16 port;
+	gboolean is_unix;
+};
+
+#define MAX_SMTP_UPSTREAMS 128
+
+struct smtp_session;
+
+/**
+ * Send message to upstream
+ * @param session session object
+ */
+gboolean smtp_send_upstream_message (struct smtp_session *session);
+
+/**
+ * Create connection to upstream
+ * @param session session object
+ */
+gboolean create_smtp_upstream_connection (struct smtp_session *session);
+
+/**
+ * Create temporary file for smtp session
+ */
+gboolean make_smtp_tempfile (struct smtp_session *session);
+
+/**
+ * Write reply to upstream
+ * @param session session object
+ */
+gboolean write_smtp_reply (struct smtp_session *session);
+
+/**
+ * Frees smtp session object
+ */
+void free_smtp_session (gpointer arg);
+
+/**
+ * Parse upstreams line
+ * @param upstreams pointer to the array of upstreams (must be at least MAX_SMTP_UPSTREAMS size)
+ * @param line description line
+ * @param count targeted count
+ * @return
+ */
+gboolean parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count);
+
+#endif /* SMTP_UTILS_H_ */
diff --git a/src/libmime/worker_util.c b/src/libmime/worker_util.c
new file mode 100644
index 000000000..d029f5dc4
--- /dev/null
+++ b/src/libmime/worker_util.c
@@ -0,0 +1,255 @@
+/* Copyright (c) 2010-2011, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *       * Redistributions of source code must retain the above copyright
+ *         notice, this list of conditions and the following disclaimer.
+ *       * Redistributions in binary form must reproduce the above copyright
+ *         notice, this list of conditions and the following disclaimer in the
+ *         documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "message.h"
+#include "lua/lua_common.h"
+
+extern struct rspamd_main			*rspamd_main;
+
+/**
+ * Return worker's control structure by its type
+ * @param type
+ * @return worker's control structure or NULL
+ */
+worker_t*
+get_worker_by_type (GQuark type)
+{
+	worker_t						**cur;
+
+	cur = &workers[0];
+	while (*cur) {
+		if (g_quark_from_string ((*cur)->name) == type) {
+			return *cur;
+		}
+		cur ++;
+	}
+
+	return NULL;
+}
+
+double
+set_counter (const gchar *name, guint32 value)
+{
+	struct counter_data            *cd;
+	double                          alpha;
+	gchar                           *key;
+
+	cd = rspamd_hash_lookup (rspamd_main->counters, (gpointer) name);
+
+	if (cd == NULL) {
+		cd = rspamd_mempool_alloc_shared (rspamd_main->counters->pool, sizeof (struct counter_data));
+		cd->value = value;
+		cd->number = 0;
+		key = rspamd_mempool_strdup_shared (rspamd_main->counters->pool, name);
+		rspamd_hash_insert (rspamd_main->counters, (gpointer) key, (gpointer) cd);
+	}
+	else {
+		/* Calculate new value */
+		rspamd_mempool_wlock_rwlock (rspamd_main->counters->lock);
+
+		alpha = 2. / (++cd->number + 1);
+		cd->value = cd->value * (1. - alpha) + value * alpha;
+
+		rspamd_mempool_wunlock_rwlock (rspamd_main->counters->lock);
+	}
+
+	return cd->value;
+}
+
+struct event_base *
+prepare_worker (struct rspamd_worker *worker, const char *name,
+		rspamd_sig_handler_t sig_handler,
+		void (*accept_handler)(int, short, void *))
+{
+	struct event_base                *ev_base;
+	struct event                     *accept_event;
+	struct sigaction                  signals;
+	GList                             *cur;
+	gint                               listen_socket;
+
+#ifdef WITH_PROFILER
+	extern void                     _start (void), etext (void);
+	monstartup ((u_long) & _start, (u_long) & etext);
+#endif
+
+	gperf_profiler_init (worker->srv->cfg, name);
+
+	worker->srv->pid = getpid ();
+
+	ev_base = event_init ();
+
+	init_signals (&signals, sig_handler);
+	sigprocmask (SIG_UNBLOCK, &signals.sa_mask, NULL);
+
+	/* Accept all sockets */
+	cur = worker->cf->listen_socks;
+	while (cur) {
+		listen_socket = GPOINTER_TO_INT (cur->data);
+		if (listen_socket != -1) {
+			accept_event = g_slice_alloc0 (sizeof (struct event));
+			event_set (accept_event, listen_socket, EV_READ | EV_PERSIST,
+					accept_handler, worker);
+			event_base_set (ev_base, accept_event);
+			event_add (accept_event, NULL);
+			worker->accept_events = g_list_prepend (worker->accept_events, accept_event);
+		}
+		cur = g_list_next (cur);
+	}
+
+	return ev_base;
+}
+
+void
+worker_stop_accept (struct rspamd_worker *worker)
+{
+	GList                             *cur;
+	struct event                     *event;
+
+	/* Remove all events */
+	cur = worker->accept_events;
+	while (cur) {
+		event = cur->data;
+		event_del (event);
+		cur = g_list_next (cur);
+		g_slice_free1 (sizeof (struct event), event);
+	}
+
+	if (worker->accept_events != NULL) {
+		g_list_free (worker->accept_events);
+	}
+}
+
+/*
+ * Called if all filters are processed
+ * @return TRUE if session should be terminated
+ */
+gboolean
+rspamd_task_fin (void *arg)
+{
+	struct rspamd_task              *task = (struct rspamd_task *) arg;
+	gint r;
+	GError *err = NULL;
+
+	/* Task is already finished or skipped */
+	if (task->state == WRITE_REPLY) {
+		if (task->fin_callback) {
+			task->fin_callback (task->fin_arg);
+		}
+		else {
+			rspamd_protocol_write_reply (task);
+		}
+		return TRUE;
+	}
+
+	/* We processed all filters and want to process statfiles */
+	if (task->state != WAIT_POST_FILTER && task->state != WAIT_PRE_FILTER) {
+		/* Process all statfiles */
+		if (task->classify_pool == NULL) {
+			/* Non-threaded version */
+			process_statfiles (task);
+		}
+		else {
+			/* Just process composites */
+			make_composites (task);
+		}
+		if (task->cfg->post_filters) {
+			/* More to process */
+			/* Special state */
+			task->state = WAIT_POST_FILTER;
+			return FALSE;
+		}
+
+	}
+
+	/* We are on post-filter waiting state */
+	if (task->state != WAIT_PRE_FILTER) {
+		/* Check if we have all events finished */
+		task->state = WRITE_REPLY;
+		if (task->fin_callback) {
+			task->fin_callback (task->fin_arg);
+		}
+		else {
+			rspamd_protocol_write_reply (task);
+		}
+	}
+	else {
+		/* We were waiting for pre-filter */
+		if (task->pre_result.action != METRIC_ACTION_NOACTION) {
+			/* Write result based on pre filters */
+			task->state = WRITE_REPLY;
+			if (task->fin_callback) {
+				task->fin_callback (task->fin_arg);
+			}
+			else {
+				rspamd_protocol_write_reply (task);
+			}
+			return TRUE;
+		}
+		else {
+			task->state = WAIT_FILTER;
+			r = process_filters (task);
+			if (r == -1) {
+				task->last_error = "Filter processing error";
+				task->error_code = RSPAMD_FILTER_ERROR;
+				task->state = WRITE_REPLY;
+				rspamd_protocol_write_reply (task);
+				return TRUE;
+			}
+			/* Add task to classify to classify pool */
+			if (!task->is_skipped && task->classify_pool) {
+				register_async_thread (task->s);
+				g_thread_pool_push (task->classify_pool, task, &err);
+				if (err != NULL) {
+					msg_err ("cannot pull task to the pool: %s", err->message);
+					remove_async_thread (task->s);
+					g_error_free (err);
+				}
+			}
+			if (task->is_skipped) {
+				rspamd_protocol_write_reply (task);
+			}
+			else {
+				return FALSE;
+			}
+		}
+	}
+
+	return TRUE;
+}
+
+/*
+ * Called if session was restored inside fin callback
+ */
+void
+rspamd_task_restore (void *arg)
+{
+	struct rspamd_task             *task = (struct rspamd_task *) arg;
+
+	/* Call post filters */
+	if (task->state == WAIT_POST_FILTER) {
+		lua_call_post_filters (task);
+	}
+	task->s->wanna_die = TRUE;
+}
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2014-04-21 16:25:51 +0100
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2014-04-21 16:25:51 +0100
commit	61555065f3d1c8badcc9573691232f1b6e42988c (patch)
tree	563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libmime
parent	ad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff)
download	rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip