Merge branch 'expr-rework'

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-03-23 00:38:04 +0000
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-03-23 00:38:04 +0000
commit: ff86598cc6d61f81cca34b956a064a657ed07c16 (patch)
tree: 90439218952bf18bb44a55b3d6b1bb89a9b8e56f /src/libmime/mime_expressions.c
parent: 58bcfe99a3ea3e39f2e851c7d12b190e6b7f9e3e (diff)
parent: 2b584275d8aae7634204e047168e83f4b580ec42 (diff)
download: rspamd-ff86598cc6d61f81cca34b956a064a657ed07c16.tar.gz
rspamd-ff86598cc6d61f81cca34b956a064a657ed07c16.zip
1 files changed, 2234 insertions, 0 deletions
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
new file mode 100644
index 000000000..841610480
--- /dev/null
+++ b/src/libmime/mime_expressions.c
@@ -0,0 +1,2234 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "main.h"
+#include "message.h"
+#include "fuzzy.h"
+#include "mime_expressions.h"
+#include "html.h"
+#include "lua/lua_common.h"
+#include "diff.h"
+
+gboolean rspamd_compare_encoding (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_header_exists (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_parts_distance (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_recipients_distance (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_has_only_html_part (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_is_html_balanced (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_has_html_tag (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+gboolean rspamd_has_fake_html (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
+	GArray * args,
+	void *unused);
+static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
+	GArray * args,
+	void *unused);
+static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
+	GArray * args,
+	void *unused);
+static gboolean rspamd_has_content_part (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
+	GArray * args,
+	void *unused);
+
+static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
+		rspamd_mempool_t *pool, gpointer ud, GError **err);
+static gint rspamd_mime_expr_process (gpointer input, rspamd_expression_atom_t *atom);
+static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
+static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
+
+/**
+ * Regexp type: /H - header, /M - mime, /U - url /X - raw header
+ */
+enum rspamd_regexp_type {
+	REGEXP_NONE = 0,
+	REGEXP_HEADER,
+	REGEXP_MIME,
+	REGEXP_MESSAGE,
+	REGEXP_URL,
+	REGEXP_RAW_HEADER
+};
+
+/**
+ * Regexp structure
+ */
+struct rspamd_regexp_atom {
+	enum rspamd_regexp_type type;                   /**< regexp type										*/
+	gchar *regexp_text;                             /**< regexp text representation							*/
+	rspamd_regexp_t *regexp;                        /**< regexp structure									*/
+	gchar *header;                                  /**< header name for header regexps						*/
+	gboolean is_test;                               /**< true if this expression must be tested				*/
+	gboolean is_strong;                             /**< true if headers search must be case sensitive		*/
+	gboolean is_multiple;                           /**< true if we need to match all inclusions of atom	*/
+};
+
+/**
+ * Rspamd expression function
+ */
+struct rspamd_function_atom {
+	gchar *name;	/**< name of function								*/
+	GArray *args;	/**< its args										*/
+};
+
+struct rspamd_mime_atom {
+	gchar *str;
+	union {
+		struct rspamd_regexp_atom *re;
+		struct rspamd_function_atom *func;
+	} d;
+	gboolean is_function;
+};
+
+/*
+ * List of internal functions of rspamd
+ * Sorted by name to use bsearch
+ */
+static struct _fl {
+	const gchar *name;
+	rspamd_internal_func_t func;
+	void *user_data;
+} rspamd_functions_list[] = {
+	{"check_smtp_data", rspamd_check_smtp_data, NULL},
+	{"compare_encoding", rspamd_compare_encoding, NULL},
+	{"compare_parts_distance", rspamd_parts_distance, NULL},
+	{"compare_recipients_distance", rspamd_recipients_distance, NULL},
+	{"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
+	{"content_type_compare_param", rspamd_content_type_compare_param, NULL},
+	{"content_type_has_param", rspamd_content_type_has_param, NULL},
+	{"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
+	{"content_type_is_type", rspamd_content_type_is_type, NULL},
+	{"has_content_part", rspamd_has_content_part, NULL},
+	{"has_content_part_len", rspamd_has_content_part_len, NULL},
+	{"has_fake_html", rspamd_has_fake_html, NULL},
+	{"has_html_tag", rspamd_has_html_tag, NULL},
+	{"has_only_html_part", rspamd_has_only_html_part, NULL},
+	{"header_exists", rspamd_header_exists, NULL},
+	{"is_html_balanced", rspamd_is_html_balanced, NULL},
+	{"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
+	{"raw_header_exists", rspamd_raw_header_exists, NULL}
+};
+
+const struct rspamd_atom_subr mime_expr_subr = {
+	.parse = rspamd_mime_expr_parse,
+	.process = rspamd_mime_expr_process,
+	.priority = rspamd_mime_expr_priority,
+	.destroy = rspamd_mime_expr_destroy
+};
+
+static struct _fl *list_ptr = &rspamd_functions_list[0];
+static guint32 functions_number = sizeof (rspamd_functions_list) /
+	sizeof (struct _fl);
+static gboolean list_allocated = FALSE;
+static guint max_re_data = 0;
+
+/* Bsearch routine */
+static gint
+fl_cmp (const void *s1, const void *s2)
+{
+	struct _fl *fl1 = (struct _fl *)s1;
+	struct _fl *fl2 = (struct _fl *)s2;
+	return strcmp (fl1->name, fl2->name);
+}
+
+static GQuark
+rspamd_mime_expr_quark (void)
+{
+	return g_quark_from_static_string ("mime-expressions");
+}
+
+/*
+ * Rspamd regexp utility functions
+ */
+static struct rspamd_regexp_atom *
+rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line)
+{
+	const gchar *begin, *end, *p, *src, *start;
+	gchar *dbegin, *dend;
+	struct rspamd_regexp_atom *result;
+	rspamd_regexp_t *re;
+	GError *err = NULL;
+	GString *re_flags;
+
+	if (line == NULL) {
+		msg_err ("cannot parse NULL line");
+		return NULL;
+	}
+
+	if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) {
+		return ((struct rspamd_regexp_atom *)rspamd_regexp_get_ud (re));
+	}
+
+	src = line;
+	result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
+	/* Skip whitespaces */
+	while (g_ascii_isspace (*line)) {
+		line++;
+	}
+	if (*line == '\0') {
+		msg_warn ("got empty regexp");
+		return NULL;
+	}
+	start = line;
+	/* First try to find header name */
+	begin = strchr (line, '/');
+	if (begin != NULL) {
+		p = begin;
+		end = NULL;
+		while (p != line) {
+			if (*p == '=') {
+				end = p;
+				break;
+			}
+			p--;
+		}
+		if (end) {
+			result->header = rspamd_mempool_alloc (pool, end - line + 1);
+			rspamd_strlcpy (result->header, line, end - line + 1);
+			result->type = REGEXP_HEADER;
+			line = end;
+		}
+	}
+	else {
+		result->header = rspamd_mempool_strdup (pool, line);
+		result->type = REGEXP_HEADER;
+		line = start;
+	}
+	/* Find begin of regexp */
+	while (*line && *line != '/') {
+		line++;
+	}
+	if (*line != '\0') {
+		begin = line + 1;
+	}
+	else if (result->header == NULL) {
+		/* Assume that line without // is just a header name */
+		result->header = rspamd_mempool_strdup (pool, line);
+		result->type = REGEXP_HEADER;
+		return result;
+	}
+	else {
+		/* We got header name earlier but have not found // expression, so it is invalid regexp */
+		msg_warn (
+			"got no header name (eg. header=) but without corresponding regexp, %s",
+			src);
+		return NULL;
+	}
+	/* Find end */
+	end = begin;
+	while (*end && (*end != '/' || *(end - 1) == '\\')) {
+		end++;
+	}
+	if (end == begin || *end != '/') {
+		msg_warn ("no trailing / in regexp %s", src);
+		return NULL;
+	}
+	/* Parse flags */
+	p = end + 1;
+	re_flags = g_string_sized_new (32);
+	while (p != NULL) {
+		switch (*p) {
+		case 'i':
+		case 'm':
+		case 's':
+		case 'x':
+		case 'u':
+		case 'O':
+		case 'r':
+			g_string_append_c (re_flags, *p);
+			p++;
+			break;
+		case 'o':
+			p++;
+			break;
+		/* Type flags */
+		case 'H':
+			if (result->type == REGEXP_NONE) {
+				result->type = REGEXP_HEADER;
+			}
+			p++;
+			break;
+		case 'M':
+			if (result->type == REGEXP_NONE) {
+				result->type = REGEXP_MESSAGE;
+			}
+			p++;
+			break;
+		case 'P':
+			if (result->type == REGEXP_NONE) {
+				result->type = REGEXP_MIME;
+			}
+			p++;
+			break;
+		case 'U':
+			if (result->type == REGEXP_NONE) {
+				result->type = REGEXP_URL;
+			}
+			p++;
+			break;
+		case 'X':
+			if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
+				result->type = REGEXP_RAW_HEADER;
+			}
+			p++;
+			break;
+		case 'T':
+			result->is_test = TRUE;
+			p++;
+			break;
+		case 'S':
+			result->is_strong = TRUE;
+			p++;
+			break;
+		case 'A':
+			result->is_multiple = TRUE;
+			p++;
+			break;
+		/* Stop flags parsing */
+		default:
+			p = NULL;
+			break;
+		}
+	}
+
+	result->regexp_text = rspamd_mempool_strdup (pool, start);
+	dbegin = result->regexp_text + (begin - start);
+	dend = result->regexp_text + (end - start);
+	*dend = '\0';
+
+	result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
+			&err);
+
+	g_string_free (re_flags, TRUE);
+
+	if (result->regexp == NULL || err != NULL) {
+		msg_warn ("could not read regexp: %s while reading regexp %s",
+				err ? err->message : "unknown error",
+						src);
+		return NULL;
+	}
+
+	rspamd_mempool_add_destructor (pool,
+		(rspamd_mempool_destruct_t) rspamd_regexp_unref,
+		(void *)result->regexp);
+
+	rspamd_regexp_set_ud (result->regexp, result);
+
+	rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp);
+
+	*dend = '/';
+
+	return result;
+}
+
+struct rspamd_function_atom *
+rspamd_mime_expr_parse_function_atom (const gchar *input)
+{
+	const gchar *obrace, *ebrace, *p, *c;
+	gchar t, *databuf;
+	struct rspamd_function_atom *res;
+	struct expression_argument arg;
+	GError *err = NULL;
+	enum {
+		start_read_argument = 0,
+		in_string,
+		in_regexp,
+		got_backslash,
+		got_comma
+	} state, prev_state = 0;
+
+	obrace = strchr (input, '(');
+	ebrace = strrchr (input, ')');
+
+	g_assert (obrace != NULL && ebrace != NULL);
+
+	res = g_slice_alloc0 (sizeof (*res));
+	res->name = g_malloc (obrace - input + 1);
+	rspamd_strlcpy (res->name, input, obrace - input + 1);
+	res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
+
+	p = obrace + 1;
+	c = p;
+	state = start_read_argument;
+
+	/* Read arguments */
+	while (p <= ebrace) {
+		t = *p;
+		switch (state) {
+		case start_read_argument:
+			if (t == '/') {
+				state = in_regexp;
+				c = p;
+			}
+			else if (!g_ascii_isspace (t)) {
+				state = in_string;
+				c = p;
+			}
+			p ++;
+			break;
+		case in_regexp:
+			if (t == '\\') {
+				state = got_backslash;
+				prev_state = in_regexp;
+			}
+			else if (t == ',' || p == ebrace) {
+				databuf = g_malloc (p - c + 1);
+				rspamd_strlcpy (databuf, c, p - c + 1);
+				arg.type = EXPRESSION_ARGUMENT_REGEXP;
+				arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
+
+				if (arg.data == NULL) {
+					/* Fallback to string */
+					msg_warn ("cannot parse slashed argument %s as regexp: %s",
+							databuf, err->message);
+					g_error_free (err);
+					arg.type = EXPRESSION_ARGUMENT_NORMAL;
+					arg.data = databuf;
+				}
+				else {
+					g_free (databuf);
+				}
+
+				g_array_append_val (res->args, arg);
+			}
+			p ++;
+			break;
+		case in_string:
+			if (t == '\\') {
+				state = got_backslash;
+				prev_state = in_string;
+			}
+			else if (t == ',' || p == ebrace) {
+				databuf = g_malloc (p - c + 1);
+				rspamd_strlcpy (databuf, c, p - c + 1);
+				arg.type = EXPRESSION_ARGUMENT_NORMAL;
+				arg.data = databuf;
+				g_array_append_val (res->args, arg);
+			}
+			p ++;
+			break;
+		case got_backslash:
+			state = prev_state;
+			p ++;
+			break;
+		case got_comma:
+			state = start_read_argument;
+			break;
+		}
+	}
+
+	return res;
+}
+
+static rspamd_expression_atom_t *
+rspamd_mime_expr_parse (const gchar *line, gsize len,
+		rspamd_mempool_t *pool, gpointer ud, GError **err)
+{
+	rspamd_expression_atom_t *a = NULL;
+	struct rspamd_mime_atom *mime_atom = NULL;
+	const gchar *p, *end;
+	gchar t;
+	gboolean is_function = FALSE;
+	enum {
+		in_header = 0,
+		got_slash,
+		in_regexp,
+		got_backslash,
+		got_second_slash,
+		in_flags,
+		got_obrace,
+		in_function,
+		got_ebrace,
+		end_atom,
+		bad_atom
+	} state = 0, prev_state = 0;
+
+	p = line;
+	end = p + len;
+
+	while (p < end) {
+		t = *p;
+
+		switch (state) {
+		case in_header:
+			if (t == '/') {
+				/* Regexp */
+				state = got_slash;
+			}
+			else if (t == '(') {
+				/* Function */
+				state = got_obrace;
+			}
+			else if (g_ascii_isspace (t)) {
+				state = bad_atom;
+			}
+			p ++;
+			break;
+		case got_slash:
+			state = in_regexp;
+			break;
+		case in_regexp:
+			if (t == '\\') {
+				state = got_backslash;
+				prev_state = in_regexp;
+			}
+			else if (t == '/') {
+				state = got_second_slash;
+			}
+			p ++;
+			break;
+		case got_second_slash:
+			state = in_flags;
+			break;
+		case in_flags:
+			if (!g_ascii_isalpha (t)) {
+				state = end_atom;
+			}
+			else {
+				p ++;
+			}
+			break;
+		case got_backslash:
+			state = prev_state;
+			p ++;
+			break;
+		case got_obrace:
+			state = in_function;
+			is_function = TRUE;
+			break;
+		case in_function:
+			if (t == '\\') {
+				state = got_backslash;
+				prev_state = in_function;
+			}
+			else if (t == ')') {
+				state = got_ebrace;
+			}
+			p ++;
+			break;
+		case got_ebrace:
+			state = end_atom;
+			break;
+		case bad_atom:
+			g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
+					" mime atom '%*.s' when reading symbol '%c'", (gint)len, line, t);
+			return NULL;
+		case end_atom:
+			goto set;
+		}
+	}
+set:
+
+	if (p - line == 0 || (state != got_ebrace || state != got_second_slash ||
+			state != in_flags)) {
+		g_set_error (err, rspamd_mime_expr_quark(), 200, "inclomplete or empty"
+				" mime atom");
+		return NULL;
+	}
+
+	mime_atom = g_slice_alloc (sizeof (*mime_atom));
+	mime_atom->is_function = is_function;
+	mime_atom->str = g_malloc (p - line + 1);
+	rspamd_strlcpy (mime_atom->str, line, p - line + 1);
+
+	if (!is_function) {
+		mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
+				mime_atom->str);
+		if (mime_atom->d.re == NULL) {
+			g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse regexp '%s'",
+					mime_atom->str);
+			goto err;
+		}
+	}
+	else {
+		mime_atom->d.func = rspamd_mime_expr_parse_function_atom (mime_atom->str);
+		if (mime_atom->d.func == NULL) {
+			g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse function '%s'",
+					mime_atom->str);
+			goto err;
+		}
+	}
+
+	a = rspamd_mempool_alloc (pool, sizeof (*a));
+	a->len = p - line;
+	a->priority = 0;
+	a->data = mime_atom;
+
+	return a;
+
+err:
+	if (mime_atom != NULL) {
+		g_free (mime_atom->str);
+		g_slice_free1 (sizeof (*mime_atom), mime_atom);
+	}
+
+	return NULL;
+}
+
+static gint
+rspamd_mime_regexp_element_process (struct rspamd_task *task,
+		struct rspamd_regexp_atom *re, const gchar *data, gsize len,
+		gboolean raw)
+{
+	guint r = 0;
+	const gchar *start = NULL, *end = NULL;
+
+	if ((r = rspamd_task_re_cache_check (task, re->regexp_text)) !=
+			RSPAMD_TASK_CACHE_NO_VALUE) {
+		debug_task ("regexp /%s/ is found in cache, result: %d",
+				re->regexp_text, r);
+		return r;
+	}
+
+	if (len == 0) {
+		len = strlen (data);
+	}
+
+	if (max_re_data != 0 && len > max_re_data) {
+		msg_info ("<%s> skip data of size %Hud",
+							task->message_id,
+							len);
+
+		return 0;
+	}
+
+	while (rspamd_regexp_search (re->regexp, data, len, &start, &end, raw)) {
+		if (G_UNLIKELY (re->is_test)) {
+			msg_info (
+					"process test regexp %s for header %s with value '%s' returned TRUE",
+					re->regexp_text,
+					re->header,
+					data);
+		}
+		r++;
+
+		if (!re->is_multiple) {
+			break;
+		}
+	}
+
+	if (r > 0) {
+		rspamd_task_re_cache_add (task, re->regexp_text, r);
+	}
+
+	return r;
+}
+
+struct url_regexp_param {
+	struct rspamd_task *task;
+	rspamd_regexp_t *regexp;
+	struct rspamd_regexp_atom *re;
+	gboolean found;
+};
+
+static gboolean
+tree_url_callback (gpointer key, gpointer value, void *data)
+{
+	struct url_regexp_param *param = data;
+	struct rspamd_url *url = value;
+
+	if (rspamd_mime_regexp_element_process (param->task, param->re,
+			struri (url), 0, FALSE)) {
+		param->found = TRUE;
+		return TRUE;
+	}
+	else if (G_UNLIKELY (param->re->is_test)) {
+		msg_info ("process test regexp %s for url %s returned FALSE",
+			struri (url));
+	}
+
+	return FALSE;
+}
+
+static gint
+rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
+		struct rspamd_task *task)
+{
+	guint8 *ct;
+	gsize clen;
+	gboolean raw = FALSE;
+	const gchar *in;
+
+	GList *cur, *headerlist;
+	rspamd_regexp_t *regexp;
+	struct url_regexp_param callback_param = {
+		.task = task,
+		.re = re,
+		.found = FALSE
+	};
+	struct mime_text_part *part;
+	struct raw_header *rh;
+
+	if (re == NULL) {
+		msg_info ("invalid regexp passed");
+		return 0;
+	}
+
+	callback_param.regexp = re->regexp;
+
+
+	switch (re->type) {
+	case REGEXP_NONE:
+		msg_warn ("bad error detected: %s has invalid regexp type",
+			re->regexp_text);
+		break;
+	case REGEXP_HEADER:
+	case REGEXP_RAW_HEADER:
+		/* Check header's name */
+		if (re->header == NULL) {
+			msg_info ("header regexp without header name: '%s'",
+				re->regexp_text);
+			rspamd_task_re_cache_add (task, re->regexp_text, 0);
+			return 0;
+		}
+		debug_task ("checking %s header regexp: %s = %s",
+			re->type == REGEXP_RAW_HEADER ? "raw" : "decoded",
+			re->header,
+			re->regexp_text);
+
+		/* Get list of specified headers */
+		headerlist = message_get_header (task,
+				re->header,
+				re->is_strong);
+		if (headerlist == NULL) {
+			/* Header is not found */
+			if (G_UNLIKELY (re->is_test)) {
+				msg_info (
+					"process test regexp %s for header %s returned FALSE: no header found",
+					re->regexp_text,
+					re->header);
+			}
+			rspamd_task_re_cache_add (task, re->regexp_text, 0);
+			return 0;
+		}
+		else {
+			/* Check whether we have regexp for it */
+			if (re->regexp == NULL) {
+				debug_task ("regexp contains only header and it is found %s",
+					re->header);
+				rspamd_task_re_cache_add (task, re->regexp_text, 1);
+				return 1;
+			}
+			/* Iterate through headers */
+			cur = headerlist;
+			while (cur) {
+				rh = cur->data;
+				debug_task ("found header \"%s\" with value \"%s\"",
+					re->header, rh->decoded);
+				regexp = re->regexp;
+
+				if (re->type == REGEXP_RAW_HEADER) {
+					in = rh->value;
+					raw = TRUE;
+				}
+				else {
+					in = rh->decoded;
+					/* Validate input */
+					if (!in || !g_utf8_validate (in, -1, NULL)) {
+						cur = g_list_next (cur);
+						continue;
+					}
+				}
+
+				/* Match re */
+				if (in && rspamd_mime_regexp_element_process (task, re, in,
+						strlen (in), raw)) {
+
+					return 1;
+				}
+
+				cur = g_list_next (cur);
+			}
+
+			rspamd_task_re_cache_add (task, re->regexp_text, 0);
+		}
+		break;
+	case REGEXP_MIME:
+		debug_task ("checking mime regexp: %s", re->regexp_text);
+		/* Iterate throught text parts */
+		cur = g_list_first (task->text_parts);
+		while (cur) {
+			part = (struct mime_text_part *)cur->data;
+			/* Skip empty parts */
+			if (part->is_empty) {
+				cur = g_list_next (cur);
+				continue;
+			}
+
+			/* Check raw flags */
+			if (part->is_raw) {
+				raw = TRUE;
+			}
+			/* Select data for regexp */
+			if (raw) {
+				ct = part->orig->data;
+				clen = part->orig->len;
+			}
+			else {
+				ct = part->content->data;
+				clen = part->content->len;
+			}
+			/* If we have limit, apply regexp so much times as we can */
+			if (rspamd_mime_regexp_element_process (task, re, ct, clen, raw)) {
+				return 1;
+			}
+			cur = g_list_next (cur);
+		}
+		rspamd_task_re_cache_add (task, re->regexp_text, 0);
+		break;
+	case REGEXP_MESSAGE:
+		debug_task ("checking message regexp: %s", re->regexp_text);
+		raw = TRUE;
+		ct = (guint8 *)task->msg.start;
+		clen = task->msg.len;
+
+		if (rspamd_mime_regexp_element_process (task, re, ct, clen, raw)) {
+			return 1;
+		}
+		rspamd_task_re_cache_add (task, re->regexp_text, 0);
+		break;
+	case REGEXP_URL:
+		debug_task ("checking url regexp: %s", re->regexp_text);
+		regexp = re->regexp;
+		callback_param.task = task;
+		callback_param.regexp = regexp;
+		callback_param.re = re;
+		callback_param.found = FALSE;
+		if (task->urls) {
+			g_tree_foreach (task->urls, tree_url_callback, &callback_param);
+		}
+		if (task->emails && callback_param.found == FALSE) {
+			g_tree_foreach (task->emails, tree_url_callback, &callback_param);
+		}
+		if (callback_param.found == FALSE) {
+			rspamd_task_re_cache_add (task, re->regexp_text, 0);
+		}
+		break;
+	default:
+		msg_warn ("bad error detected: %p is not a valid regexp object", re);
+		break;
+	}
+
+	return 0;
+}
+
+
+static gint
+rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
+{
+	/* TODO: implement priorities for mime expressions */
+	return 0;
+}
+
+static void
+rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
+{
+	struct rspamd_mime_atom *mime_atom = atom->data;
+	guint i;
+	struct expression_argument *arg;
+
+	if (mime_atom) {
+		if (mime_atom->is_function) {
+			/* Need to cleanup arguments */
+			for (i = 0; i < mime_atom->d.func->args->len; i ++) {
+				arg = &g_array_index (mime_atom->d.func->args,
+						struct expression_argument, i);
+
+				if (arg->type == EXPRESSION_ARGUMENT_NORMAL) {
+					g_free (arg->data);
+				}
+			}
+			g_array_free (mime_atom->d.func->args, TRUE);
+		}
+		/* XXX: regexp shouldn't be special */
+		g_slice_free1 (sizeof (*mime_atom), mime_atom);
+	}
+}
+
+static gboolean
+rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
+	struct rspamd_task * task,
+	lua_State *L)
+{
+	struct _fl *selected, key;
+
+	key.name = func->name;
+
+	selected = bsearch (&key,
+			list_ptr,
+			functions_number,
+			sizeof (struct _fl),
+			fl_cmp);
+	if (selected == NULL) {
+		/* Try to check lua function */
+		return FALSE;
+	}
+
+	return selected->func (task, func->args, selected->user_data);
+}
+
+static gint
+rspamd_mime_expr_process (gpointer input, rspamd_expression_atom_t *atom)
+{
+	struct rspamd_task *task = input;
+	struct rspamd_mime_atom *mime_atom;
+	gint ret = 0;
+
+	g_assert (task != NULL);
+	g_assert (atom != NULL);
+
+	mime_atom = atom->data;
+
+	if (!mime_atom->is_function) {
+		ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
+	}
+	else {
+		ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
+				task->cfg->lua_state);
+	}
+
+	return ret;
+}
+
+void
+register_expression_function (const gchar *name,
+	rspamd_internal_func_t func,
+	void *user_data)
+{
+	static struct _fl *new;
+
+	functions_number++;
+
+	new = g_new (struct _fl, functions_number);
+	memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
+	if (list_allocated) {
+		g_free (list_ptr);
+	}
+
+	list_allocated = TRUE;
+	new[functions_number - 1].name = name;
+	new[functions_number - 1].func = func;
+	new[functions_number - 1].user_data = user_data;
+	qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
+	list_ptr = new;
+}
+
+gboolean
+rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
+{
+	struct expression_argument *arg;
+
+	if (args == NULL || task == NULL) {
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn ("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	/* XXX: really write this function */
+	return TRUE;
+}
+
+gboolean
+rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
+{
+	struct expression_argument *arg;
+	GList *headerlist;
+
+	if (args == NULL || task == NULL) {
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn ("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	debug_task ("try to get header %s", (gchar *)arg->data);
+	headerlist = message_get_header (task,
+			(gchar *)arg->data,
+			FALSE);
+	if (headerlist) {
+		return TRUE;
+	}
+	return FALSE;
+}
+
+/*
+ * This function is designed to find difference between text/html and text/plain parts
+ * It takes one argument: difference threshold, if we have two text parts, compare
+ * its hashes and check for threshold, if value is greater than threshold, return TRUE
+ * and return FALSE otherwise.
+ */
+gboolean
+rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
+{
+	gint threshold, threshold2 = -1, diff;
+	struct mime_text_part *p1, *p2;
+	GList *cur;
+	struct expression_argument *arg;
+	GMimeObject *parent;
+	const GMimeContentType *ct;
+	gint *pdiff;
+
+	if (args == NULL || args->len == 0) {
+		debug_task ("no threshold is specified, assume it 100");
+		threshold = 100;
+	}
+	else {
+		errno = 0;
+		arg = &g_array_index (args, struct expression_argument, 0);
+		if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+			msg_warn ("invalid argument to function is passed");
+			return FALSE;
+		}
+
+		threshold = strtoul ((gchar *)arg->data, NULL, 10);
+		if (errno != 0) {
+			msg_info ("bad numeric value for threshold \"%s\", assume it 100",
+				(gchar *)arg->data);
+			threshold = 100;
+		}
+		if (args->len == 1) {
+			arg = &g_array_index (args, struct expression_argument, 1);
+			if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+				msg_warn ("invalid argument to function is passed");
+				return FALSE;
+			}
+
+			errno = 0;
+			threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
+			if (errno != 0) {
+				msg_info ("bad numeric value for threshold \"%s\", ignore it",
+					(gchar *)arg->data);
+				threshold2 = -1;
+			}
+		}
+	}
+
+	if ((pdiff =
+		rspamd_mempool_get_variable (task->task_pool,
+		"parts_distance")) != NULL) {
+		diff = *pdiff;
+		if (diff != -1) {
+			if (threshold2 > 0) {
+				if (diff >=
+					MIN (threshold,
+					threshold2) && diff < MAX (threshold, threshold2)) {
+					return TRUE;
+				}
+			}
+			else {
+				if (diff <= threshold) {
+					return TRUE;
+				}
+			}
+			return FALSE;
+		}
+		else {
+			return FALSE;
+		}
+	}
+
+	if (g_list_length (task->text_parts) == 2) {
+		cur = g_list_first (task->text_parts);
+		p1 = cur->data;
+		cur = g_list_next (cur);
+		pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
+		*pdiff = -1;
+
+		if (cur == NULL) {
+			msg_info ("bad parts list");
+			return FALSE;
+		}
+		p2 = cur->data;
+		/* First of all check parent object */
+		if (p1->parent && p1->parent == p2->parent) {
+			parent = p1->parent;
+			ct = g_mime_object_get_content_type (parent);
+#ifndef GMIME24
+			if (ct == NULL ||
+				!g_mime_content_type_is_type (ct, "multipart", "alternative")) {
+#else
+			if (ct == NULL ||
+				!g_mime_content_type_is_type ((GMimeContentType *)ct,
+				"multipart", "alternative")) {
+#endif
+				debug_task (
+					"two parts are not belong to multipart/alternative container, skip check");
+				rspamd_mempool_set_variable (task->task_pool,
+					"parts_distance",
+					pdiff,
+					NULL);
+				return FALSE;
+			}
+		}
+		else {
+			debug_task (
+				"message contains two parts but they are in different multi-parts");
+			rspamd_mempool_set_variable (task->task_pool,
+				"parts_distance",
+				pdiff,
+				NULL);
+			return FALSE;
+		}
+		if (!p1->is_empty && !p2->is_empty) {
+			if (p1->diff_str != NULL && p2->diff_str != NULL) {
+				diff = rspamd_diff_distance_normalized (p1->diff_str,
+						p2->diff_str);
+			}
+			else {
+				diff = rspamd_fuzzy_compare_parts (p1, p2);
+			}
+			debug_task (
+				"got likeliness between parts of %d%%, threshold is %d%%",
+				diff,
+				threshold);
+			*pdiff = diff;
+			rspamd_mempool_set_variable (task->task_pool,
+				"parts_distance",
+				pdiff,
+				NULL);
+			if (threshold2 > 0) {
+				if (diff >=
+					MIN (threshold,
+					threshold2) && diff < MAX (threshold, threshold2)) {
+					return TRUE;
+				}
+			}
+			else {
+				if (diff <= threshold) {
+					return TRUE;
+				}
+			}
+		}
+		else if ((p1->is_empty &&
+			!p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
+			/* Empty and non empty parts are different */
+			*pdiff = 0;
+			rspamd_mempool_set_variable (task->task_pool,
+				"parts_distance",
+				pdiff,
+				NULL);
+			return TRUE;
+		}
+	}
+	else {
+		debug_task (
+			"message has too many text parts, so do not try to compare them with each other");
+		rspamd_mempool_set_variable (task->task_pool,
+			"parts_distance",
+			pdiff,
+			NULL);
+		return FALSE;
+	}
+
+	rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff,
+		NULL);
+	return FALSE;
+}
+
+struct addr_list {
+	const gchar *name;
+	const gchar *addr;
+};
+
+#define COMPARE_RCPT_LEN 3
+#define MIN_RCPT_TO_COMPARE 7
+
+gboolean
+rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
+	void *unused)
+{
+	struct expression_argument *arg;
+	InternetAddressList *cur;
+	double threshold;
+	struct addr_list *ar;
+	gchar *c;
+	gint num, i, j, hits = 0, total = 0;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn ("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	errno = 0;
+	threshold = strtod ((gchar *)arg->data, NULL);
+
+	if (errno != 0) {
+		msg_warn ("invalid numeric value '%s': %s",
+			(gchar *)arg->data,
+			strerror (errno));
+		return FALSE;
+	}
+
+	if (!task->rcpt_mime) {
+		return FALSE;
+	}
+
+	num = internet_address_list_length (task->rcpt_mime);
+
+	if (num < MIN_RCPT_TO_COMPARE) {
+		return FALSE;
+	}
+	ar =
+		rspamd_mempool_alloc0 (task->task_pool, num *
+			sizeof (struct addr_list));
+
+	/* Fill array */
+	cur = task->rcpt_mime;
+#ifdef GMIME24
+	for (i = 0; i < num; i++) {
+		InternetAddress *iaelt =
+			internet_address_list_get_address(cur, i);
+		InternetAddressMailbox *iamb =
+			INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
+			INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
+		if (iamb) {
+			ar[i].name = internet_address_mailbox_get_addr (iamb);
+			if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+				ar[i].addr = c + 1;
+			}
+		}
+	}
+#else
+	InternetAddress *addr;
+	i = 0;
+	while (cur) {
+		addr = internet_address_list_get_address (cur);
+		if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+			ar[i].name = rspamd_mempool_strdup (task->task_pool,
+					internet_address_get_addr (addr));
+			if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+				*c = '\0';
+				ar[i].addr = c + 1;
+			}
+			cur = internet_address_list_next (cur);
+			i++;
+		}
+		else {
+			cur = internet_address_list_next (cur);
+		}
+	}
+#endif
+
+	/* Cycle all elements in array */
+	for (i = 0; i < num; i++) {
+		for (j = i + 1; j < num; j++) {
+			if (ar[i].name && ar[j].name &&
+				g_ascii_strncasecmp (ar[i].name, ar[j].name,
+				COMPARE_RCPT_LEN) == 0) {
+				/* Common name part */
+				hits++;
+			}
+			else if (ar[i].addr && ar[j].addr &&
+				g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
+				/* Common address part, but different name */
+				hits++;
+			}
+			total++;
+		}
+	}
+
+	if ((double)(hits * num / 2.) / (double)total >= threshold) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
+	void *unused)
+{
+	struct mime_text_part *p;
+	GList *cur;
+	gboolean res = FALSE;
+
+	cur = g_list_first (task->text_parts);
+	while (cur) {
+		p = cur->data;
+		if (p->is_html) {
+			res = TRUE;
+		}
+		else {
+			res = FALSE;
+			break;
+		}
+		cur = g_list_next (cur);
+	}
+
+	return res;
+}
+
+static gboolean
+is_recipient_list_sorted (const InternetAddressList * ia)
+{
+	const InternetAddressList *cur;
+	InternetAddress *addr;
+	gboolean res = TRUE;
+	struct addr_list current = { NULL, NULL }, previous = {
+		NULL, NULL
+	};
+#ifdef GMIME24
+	gint num, i;
+#endif
+
+	/* Do not check to short address lists */
+	if (internet_address_list_length ((InternetAddressList *)ia) <
+		MIN_RCPT_TO_COMPARE) {
+		return FALSE;
+	}
+#ifdef GMIME24
+	num = internet_address_list_length ((InternetAddressList *)ia);
+	cur = ia;
+	for (i = 0; i < num; i++) {
+		addr =
+			internet_address_list_get_address ((InternetAddressList *)cur, i);
+		current.addr = (gchar *)internet_address_get_name (addr);
+		if (previous.addr != NULL) {
+			if (current.addr &&
+				g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+				res = FALSE;
+				break;
+			}
+		}
+		previous.addr = current.addr;
+	}
+#else
+	cur = ia;
+	while (cur) {
+		addr = internet_address_list_get_address (cur);
+		if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+			current.addr = internet_address_get_addr (addr);
+			if (previous.addr != NULL) {
+				if (current.addr &&
+					g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+					res = FALSE;
+					break;
+				}
+			}
+			previous.addr = current.addr;
+		}
+		cur = internet_address_list_next (cur);
+	}
+#endif
+
+	return res;
+}
+
+gboolean
+rspamd_is_recipients_sorted (struct rspamd_task * task,
+	GArray * args,
+	void *unused)
+{
+	/* Check all types of addresses */
+	if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+		GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
+		return TRUE;
+	}
+	if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+		GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
+		return TRUE;
+	}
+	if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+		GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_compare_transfer_encoding (struct rspamd_task * task,
+	GArray * args,
+	void *unused)
+{
+	GMimeObject *part;
+#ifndef GMIME24
+	GMimePartEncodingType enc_req, part_enc;
+#else
+	GMimeContentEncoding enc_req, part_enc;
+#endif
+	struct expression_argument *arg;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn ("invalid argument to function is passed");
+		return FALSE;
+	}
+
+#ifndef GMIME24
+	enc_req = g_mime_part_encoding_from_string (arg->data);
+	if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
+#else
+	enc_req = g_mime_content_encoding_from_string (arg->data);
+	if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
+#endif
+		msg_warn ("bad encoding type: %s", (gchar *)arg->data);
+		return FALSE;
+	}
+
+	part = g_mime_message_get_mime_part (task->message);
+	if (part) {
+		if (GMIME_IS_PART (part)) {
+#ifndef GMIME24
+			part_enc = g_mime_part_get_encoding (GMIME_PART (part));
+			if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
+				/* Assume 7bit as default transfer encoding */
+				part_enc = GMIME_PART_ENCODING_7BIT;
+			}
+#else
+			part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
+			if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
+				/* Assume 7bit as default transfer encoding */
+				part_enc = GMIME_CONTENT_ENCODING_7BIT;
+			}
+#endif
+
+
+			debug_task ("got encoding in part: %d and compare with %d",
+				(gint)part_enc,
+				(gint)enc_req);
+#ifndef GMIME24
+			g_object_unref (part);
+#endif
+
+			return part_enc == enc_req;
+		}
+#ifndef GMIME24
+		g_object_unref (part);
+#endif
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
+{
+	struct mime_text_part *p;
+	GList *cur;
+	gboolean res = TRUE;
+
+	cur = g_list_first (task->text_parts);
+	while (cur) {
+		p = cur->data;
+		if (!p->is_empty && p->is_html) {
+			if (p->is_balanced) {
+				res = TRUE;
+			}
+			else {
+				res = FALSE;
+				break;
+			}
+		}
+		cur = g_list_next (cur);
+	}
+
+	return res;
+
+}
+
+struct html_callback_data {
+	struct html_tag *tag;
+	gboolean *res;
+};
+
+static gboolean
+search_html_node_callback (GNode * node, gpointer data)
+{
+	struct html_callback_data *cd = data;
+	struct html_node *nd;
+
+	nd = node->data;
+	if (nd) {
+		if (nd->tag == cd->tag) {
+			*cd->res = TRUE;
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
+{
+	struct mime_text_part *p;
+	GList *cur;
+	struct expression_argument *arg;
+	struct html_tag *tag;
+	gboolean res = FALSE;
+	struct html_callback_data cd;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn ("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	tag = get_tag_by_name (arg->data);
+	if (tag == NULL) {
+		msg_warn ("unknown tag type passed as argument: %s",
+			(gchar *)arg->data);
+		return FALSE;
+	}
+
+	cur = g_list_first (task->text_parts);
+	cd.res = &res;
+	cd.tag = tag;
+
+	while (cur && res == FALSE) {
+		p = cur->data;
+		if (!p->is_empty && p->is_html && p->html_nodes) {
+			g_node_traverse (p->html_nodes,
+				G_PRE_ORDER,
+				G_TRAVERSE_ALL,
+				-1,
+				search_html_node_callback,
+				&cd);
+		}
+		cur = g_list_next (cur);
+	}
+
+	return res;
+
+}
+
+gboolean
+rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
+{
+	struct mime_text_part *p;
+	GList *cur;
+	gboolean res = FALSE;
+
+	cur = g_list_first (task->text_parts);
+
+	while (cur && res == FALSE) {
+		p = cur->data;
+		if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
+			res = TRUE;
+		}
+		cur = g_list_next (cur);
+	}
+
+	return res;
+
+}
+
+static gboolean
+rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
+{
+	struct expression_argument *arg;
+
+	if (args == NULL || task == NULL) {
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn ("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
+}
+
+static gboolean
+match_smtp_data (struct rspamd_task *task,
+	struct expression_argument *arg,
+	const gchar *what)
+{
+	rspamd_regexp_t *re;
+	gint r;
+
+	if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
+		/* This is a regexp */
+		re = arg->data;
+		if (re == NULL) {
+			msg_warn ("cannot compile regexp for function");
+			return FALSE;
+		}
+
+		if ((r = rspamd_task_re_cache_check (task,
+				rspamd_regexp_get_pattern (re))) == -1) {
+			r = rspamd_regexp_search (re, what, 0, NULL, NULL, FALSE);
+			rspamd_task_re_cache_add (task, rspamd_regexp_get_pattern (re), r);
+		}
+		return r;
+	}
+	else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
+			g_ascii_strcasecmp (arg->data, what) == 0) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
+{
+	struct expression_argument *arg;
+	InternetAddressList *ia = NULL;
+	const gchar *type, *what = NULL;
+	gint i, ialen;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+
+	if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+	else {
+		type = arg->data;
+		switch (*type) {
+		case 'f':
+		case 'F':
+			if (g_ascii_strcasecmp (type, "from") == 0) {
+				what = rspamd_task_get_sender (task);
+			}
+			else {
+				msg_warn ("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		case 'h':
+		case 'H':
+			if (g_ascii_strcasecmp (type, "helo") == 0) {
+				what = task->helo;
+			}
+			else {
+				msg_warn ("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		case 'u':
+		case 'U':
+			if (g_ascii_strcasecmp (type, "user") == 0) {
+				what = task->user;
+			}
+			else {
+				msg_warn ("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		case 's':
+		case 'S':
+			if (g_ascii_strcasecmp (type, "subject") == 0) {
+				what = task->subject;
+			}
+			else {
+				msg_warn ("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		case 'r':
+		case 'R':
+			if (g_ascii_strcasecmp (type, "rcpt") == 0) {
+				ia = task->rcpt_mime;
+			}
+			else {
+				msg_warn ("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		default:
+			msg_warn ("bad argument to function: %s", type);
+			return FALSE;
+		}
+	}
+
+	if (what == NULL && ia == NULL) {
+		/* Not enough data so regexp would NOT be found anyway */
+		return FALSE;
+	}
+
+	/* We would process only one more argument, others are ignored */
+	if (args->len >= 2) {
+		arg = &g_array_index (args, struct expression_argument, 1);
+		if (arg) {
+			if (what != NULL) {
+				return match_smtp_data (task, arg, what);
+			}
+			else {
+				if (ia != NULL) {
+					ialen = internet_address_list_length(ia);
+					for (i = 0; i < ialen; i ++) {
+						InternetAddress *iaelt =
+								internet_address_list_get_address(ia, i);
+						InternetAddressMailbox *iamb =
+							INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
+							INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
+						if (iamb &&
+							match_smtp_data (task, arg,
+								internet_address_mailbox_get_addr(iamb))) {
+							return TRUE;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_content_type_compare_param (struct rspamd_task * task,
+	GArray * args,
+	void *unused)
+{
+	const gchar *param_name;
+	const gchar *param_data;
+	rspamd_regexp_t *re;
+	struct expression_argument *arg, *arg1, *arg_pattern;
+	GMimeObject *part;
+	GMimeContentType *ct;
+	gint r;
+	gboolean recursive = FALSE, result = FALSE;
+	GList *cur = NULL;
+	struct mime_part *cur_part;
+
+	if (args == NULL || args->len < 2) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+	g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
+	param_name = arg->data;
+	arg_pattern = &g_array_index (args, struct expression_argument, 1);
+
+
+	part = g_mime_message_get_mime_part (task->message);
+	if (part) {
+		ct = (GMimeContentType *)g_mime_object_get_content_type (part);
+		if (args->len >= 3) {
+			arg1 = &g_array_index (args, struct expression_argument, 2);
+			if (g_ascii_strncasecmp (arg1->data, "true",
+				sizeof ("true") - 1) == 0) {
+				recursive = TRUE;
+			}
+		}
+		else {
+			/*
+			 * If user did not specify argument, let's assume that he wants
+			 * recursive search if mime part is multipart/mixed
+			 */
+			if (g_mime_content_type_is_type (ct, "multipart", "*")) {
+				recursive = TRUE;
+			}
+		}
+
+		if (recursive) {
+			cur = task->parts;
+		}
+
+#ifndef GMIME24
+		g_object_unref (part);
+#endif
+		for (;; ) {
+			if ((param_data =
+				g_mime_content_type_get_parameter ((GMimeContentType *)ct,
+				param_name)) == NULL) {
+				result = FALSE;
+			}
+			else {
+				if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
+					re = arg_pattern->data;
+
+					if ((r = rspamd_task_re_cache_check (task,
+							rspamd_regexp_get_pattern (re))) == -1) {
+						r = rspamd_regexp_search (re, param_data, 0,
+								NULL, NULL, FALSE);
+						rspamd_task_re_cache_add (task,
+								rspamd_regexp_get_pattern (re), r);
+					}
+				}
+				else {
+					/* Just do strcasecmp */
+					if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) {
+						return TRUE;
+					}
+				}
+			}
+			/* Get next part */
+			if (!recursive) {
+				return result;
+			}
+			else if (cur != NULL) {
+				cur_part = cur->data;
+				if (cur_part->type != NULL) {
+					ct = cur_part->type;
+				}
+				cur = g_list_next (cur);
+			}
+			else {
+				/* All is done */
+				return result;
+			}
+		}
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_content_type_has_param (struct rspamd_task * task,
+	GArray * args,
+	void *unused)
+{
+	gchar *param_name;
+	const gchar *param_data;
+	struct expression_argument *arg, *arg1;
+	GMimeObject *part;
+	GMimeContentType *ct;
+	gboolean recursive = FALSE, result = FALSE;
+	GList *cur = NULL;
+	struct mime_part *cur_part;
+
+	if (args == NULL || args->len < 1) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index (args, struct expression_argument, 0);
+	g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
+	param_name = arg->data;
+
+	part = g_mime_message_get_mime_part (task->message);
+	if (part) {
+		ct = (GMimeContentType *)g_mime_object_get_content_type (part);
+		if (args->len >= 2) {
+			arg1 = &g_array_index (args, struct expression_argument, 2);
+			if (g_ascii_strncasecmp (arg1->data, "true",
+					sizeof ("true") - 1) == 0) {
+				recursive = TRUE;
+			}
+		}
+		else {
+			/*
+			 * If user did not specify argument, let's assume that he wants
+			 * recursive search if mime part is multipart/mixed
+			 */
+			if (g_mime_content_type_is_type (ct, "multipart", "*")) {
+				recursive = TRUE;
+			}
+		}
+
+		if (recursive) {
+			cur = task->parts;
+		}
+
+#ifndef GMIME24
+		g_object_unref (part);
+#endif
+		for (;; ) {
+			if ((param_data =
+				g_mime_content_type_get_parameter ((GMimeContentType *)ct,
+				param_name)) != NULL) {
+				return TRUE;
+			}
+			/* Get next part */
+			if (!recursive) {
+				return result;
+			}
+			else if (cur != NULL) {
+				cur_part = cur->data;
+				if (cur_part->type != NULL) {
+					ct = cur_part->type;
+				}
+				cur = g_list_next (cur);
+			}
+			else {
+				/* All is done */
+				return result;
+			}
+		}
+
+	}
+
+	return TRUE;
+}
+
+static gboolean
+rspamd_content_type_check (struct rspamd_task *task,
+	GArray * args,
+	gboolean check_subtype)
+{
+	const gchar *param_data;
+	rspamd_regexp_t *re;
+	struct expression_argument *arg1, *arg_pattern;
+	GMimeObject *part;
+	GMimeContentType *ct;
+	gint r;
+	gboolean recursive = FALSE, result = FALSE;
+	GList *cur = NULL;
+	struct mime_part *cur_part;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+	arg_pattern = &g_array_index (args, struct expression_argument, 1);
+
+	part = g_mime_message_get_mime_part (task->message);
+	if (part) {
+		ct = (GMimeContentType *)g_mime_object_get_content_type (part);
+		if (args->len >= 2) {
+			arg1 = &g_array_index (args, struct expression_argument, 2);
+			if (g_ascii_strncasecmp (arg1->data, "true",
+					sizeof ("true") - 1) == 0) {
+				recursive = TRUE;
+			}
+		}
+		else {
+			/*
+			 * If user did not specify argument, let's assume that he wants
+			 * recursive search if mime part is multipart/mixed
+			 */
+			if (g_mime_content_type_is_type (ct, "multipart", "*")) {
+				recursive = TRUE;
+			}
+		}
+
+		if (recursive) {
+			cur = task->parts;
+		}
+
+#ifndef GMIME24
+		g_object_unref (part);
+#endif
+		for (;;) {
+
+			if (check_subtype) {
+				param_data = ct->subtype;
+			}
+			else {
+				param_data = ct->type;
+			}
+
+			if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
+				re = arg_pattern->data;
+
+				if ((r = rspamd_task_re_cache_check (task,
+						rspamd_regexp_get_pattern (re))) == -1) {
+					r = rspamd_regexp_search (re, param_data, 0,
+							NULL, NULL, FALSE);
+					rspamd_task_re_cache_add (task,
+							rspamd_regexp_get_pattern (re), r);
+				}
+			}
+			else {
+				/* Just do strcasecmp */
+				if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) {
+					return TRUE;
+				}
+			}
+			/* Get next part */
+			if (!recursive) {
+				return result;
+			}
+			else if (cur != NULL) {
+				cur_part = cur->data;
+				if (cur_part->type != NULL) {
+					ct = cur_part->type;
+				}
+				cur = g_list_next (cur);
+			}
+			else {
+				/* All is done */
+				return result;
+			}
+		}
+
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_content_type_is_type (struct rspamd_task * task,
+	GArray * args,
+	void *unused)
+{
+	return rspamd_content_type_check (task, args, FALSE);
+}
+
+static gboolean
+rspamd_content_type_is_subtype (struct rspamd_task * task,
+	GArray * args,
+	void *unused)
+{
+	return rspamd_content_type_check (task, args, TRUE);
+}
+
+static gboolean
+compare_subtype (struct rspamd_task *task, GMimeContentType * ct,
+	struct expression_argument *subtype)
+{
+	rspamd_regexp_t *re;
+	gint r = 0;
+
+	if (subtype == NULL || ct == NULL) {
+		msg_warn ("invalid parameters passed");
+		return FALSE;
+	}
+	if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
+		re = subtype->data;
+
+		if ((r = rspamd_task_re_cache_check (task,
+				rspamd_regexp_get_pattern (re))) == -1) {
+			r = rspamd_regexp_search (re, ct->subtype, 0,
+					NULL, NULL, FALSE);
+			rspamd_task_re_cache_add (task,
+					rspamd_regexp_get_pattern (re), r);
+		}
+	}
+	else {
+		/* Just do strcasecmp */
+		if (ct->subtype && g_ascii_strcasecmp (ct->subtype, subtype->data) == 0) {
+			return TRUE;
+		}
+	}
+
+	return r;
+}
+
+static gboolean
+compare_len (struct mime_part *part, guint min, guint max)
+{
+	if (min == 0 && max == 0) {
+		return TRUE;
+	}
+
+	if (min == 0) {
+		return part->content->len <= max;
+	}
+	else if (max == 0) {
+		return part->content->len >= min;
+	}
+	else {
+		return part->content->len >= min && part->content->len <= max;
+	}
+}
+
+static gboolean
+common_has_content_part (struct rspamd_task * task,
+	struct expression_argument *param_type,
+	struct expression_argument *param_subtype,
+	gint min_len,
+	gint max_len)
+{
+	rspamd_regexp_t *re;
+	struct mime_part *part;
+	GList *cur;
+	GMimeContentType *ct;
+	gint r;
+
+	cur = g_list_first (task->parts);
+	while (cur) {
+		part = cur->data;
+		ct = part->type;
+		if (ct == NULL) {
+			cur = g_list_next (cur);
+			continue;
+		}
+
+		if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
+			re = param_type->data;
+
+			if ((r = rspamd_task_re_cache_check (task,
+					rspamd_regexp_get_pattern (re))) == -1) {
+				r = rspamd_regexp_search (re, ct->type, 0,
+						NULL, NULL, FALSE);
+				/* Also check subtype and length of the part */
+				if (r && param_subtype) {
+					r = compare_len (part, min_len, max_len) &&
+						compare_subtype (task, ct, param_subtype);
+				}
+				rspamd_task_re_cache_add (task,
+						rspamd_regexp_get_pattern (re), r);
+			}
+		}
+		else {
+			/* Just do strcasecmp */
+			if (ct->type && g_ascii_strcasecmp (ct->type, param_type->data) == 0) {
+				if (param_subtype) {
+					if (compare_subtype (task, ct, param_subtype)) {
+						if (compare_len (part, min_len, max_len)) {
+							return TRUE;
+						}
+					}
+				}
+				else {
+					if (compare_len (part, min_len, max_len)) {
+						return TRUE;
+					}
+				}
+			}
+		}
+		cur = g_list_next (cur);
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
+{
+	struct expression_argument *param_type = NULL, *param_subtype = NULL;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	param_type = &g_array_index (args, struct expression_argument, 0);
+	if (args->len >= 2) {
+		param_subtype = &g_array_index (args, struct expression_argument, 1);
+	}
+
+	return common_has_content_part (task, param_type, param_subtype, 0, 0);
+}
+
+static gboolean
+rspamd_has_content_part_len (struct rspamd_task * task,
+	GArray * args,
+	void *unused)
+{
+	struct expression_argument *param_type = NULL, *param_subtype = NULL;
+	gint min = 0, max = 0;
+	struct expression_argument *arg;
+
+	if (args == NULL) {
+		msg_warn ("no parameters to function");
+		return FALSE;
+	}
+
+	param_type = &g_array_index (args, struct expression_argument, 0);
+
+	if (args->len >= 2) {
+		param_subtype = &g_array_index (args, struct expression_argument, 1);
+
+		if (args->len >= 3) {
+			arg = &g_array_index (args, struct expression_argument, 2);
+			errno = 0;
+			min = strtoul (arg->data, NULL, 10);
+			g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
+
+			if (errno != 0) {
+				msg_warn ("invalid numeric value '%s': %s",
+					(gchar *)arg->data,
+					strerror (errno));
+				return FALSE;
+			}
+
+			if (args) {
+				arg = &g_array_index (args, struct expression_argument, 3);
+				g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
+				max = strtoul (arg->data, NULL, 10);
+
+				if (errno != 0) {
+					msg_warn ("invalid numeric value '%s': %s",
+						(gchar *)arg->data,
+						strerror (errno));
+					return FALSE;
+				}
+			}
+		}
+	}
+
+	return common_has_content_part (task, param_type, param_subtype, min, max);
+}
+
+guint
+rspamd_mime_expression_set_re_limit (guint limit)
+{
+	guint ret = max_re_data;
+
+	max_re_data = limit;
+	return ret;
+}
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-03-23 00:38:04 +0000
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-03-23 00:38:04 +0000
commit	ff86598cc6d61f81cca34b956a064a657ed07c16 (patch)
tree	90439218952bf18bb44a55b3d6b1bb89a9b8e56f /src/libmime/mime_expressions.c
parent	58bcfe99a3ea3e39f2e851c7d12b190e6b7f9e3e (diff)
parent	2b584275d8aae7634204e047168e83f4b580ec42 (diff)
download	rspamd-ff86598cc6d61f81cca34b956a064a657ed07c16.tar.gz rspamd-ff86598cc6d61f81cca34b956a064a657ed07c16.zip