Rework rspamd mime regexp processing.

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-04-30 15:19:14 +0100
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-04-30 15:19:14 +0100
commit: 633b66378c1eb7ea7f65887272eccc0d80fdc61a (patch)
tree: c6709cde32491340e46f790e7c85e250d0af727d /src
parent: f44290c814a4274f31dca274c4fd75471d58aa1b (diff)
download: rspamd-633b66378c1eb7ea7f65887272eccc0d80fdc61a.tar.gz
rspamd-633b66378c1eb7ea7f65887272eccc0d80fdc61a.zip
1 files changed, 70 insertions, 58 deletions
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index aaeb19619..16a3be875 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -723,10 +723,6 @@ rspamd_mime_regexp_element_process (struct rspamd_task *task,
 				r);
 	}
 
-	if (r > 0) {
-		rspamd_task_re_cache_add (task, re->regexp_text, r);
-	}
-
 	return r;
 }
 
@@ -734,7 +730,7 @@ struct url_regexp_param {
 	struct rspamd_task *task;
 	rspamd_regexp_t *regexp;
 	struct rspamd_regexp_atom *re;
-	gboolean found;
+	gint found;
 };
 
 static void
@@ -742,16 +738,18 @@ tree_url_callback (gpointer key, gpointer value, void *data)
 {
 	struct url_regexp_param *param = data;
 	struct rspamd_url *url = value;
+	gint ret;
 
-	if (param->found) {
+	if (param->found && ! param->re->is_multiple) {
 		return;
 	}
 
-	if (rspamd_mime_regexp_element_process (param->task, param->re,
-			struri (url), 0, FALSE)) {
-		param->found = TRUE;
-	}
-	else if (G_UNLIKELY (param->re->is_test)) {
+	ret = rspamd_mime_regexp_element_process (param->task, param->re,
+			struri (url), 0, FALSE);
+
+	param->found = ret;
+
+	if (G_UNLIKELY (param->re->is_test)) {
 		msg_info ("process test regexp %s for url %s returned FALSE",
 			struri (url));
 	}
@@ -765,7 +763,7 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
 	gsize clen;
 	gboolean raw = FALSE;
 	const gchar *in;
-
+	gint ret = 0;
 	GList *cur, *headerlist;
 	rspamd_regexp_t *regexp;
 	struct url_regexp_param callback_param = {
@@ -815,8 +813,6 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
 					re->regexp_text,
 					re->header);
 			}
-			rspamd_task_re_cache_add (task, re->regexp_text, 0);
-			return 0;
 		}
 		else {
 			/* Check whether we have regexp for it */
@@ -824,44 +820,48 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
 				debug_task ("regexp contains only header and it is found %s",
 					re->header);
 				rspamd_task_re_cache_add (task, re->regexp_text, 1);
-				return 1;
-			}
-			/* Iterate through headers */
-			cur = headerlist;
-			while (cur) {
-				rh = cur->data;
-				debug_task ("found header \"%s\" with value \"%s\"",
-					re->header, rh->decoded);
-				regexp = re->regexp;
-
-				if (re->type == REGEXP_RAW_HEADER) {
-					in = rh->value;
-					raw = TRUE;
-				}
-				else {
-					in = rh->decoded;
-					/* Validate input */
-					if (!in || !g_utf8_validate (in, -1, NULL)) {
-						cur = g_list_next (cur);
-						continue;
+				ret = 1;
+			}
+			else {
+				/* Iterate through headers */
+				cur = headerlist;
+				while (cur) {
+					rh = cur->data;
+					debug_task ("found header \"%s\" with value \"%s\"",
+							re->header, rh->decoded);
+					regexp = re->regexp;
+
+					if (re->type == REGEXP_RAW_HEADER) {
+						in = rh->value;
+						raw = TRUE;
+					}
+					else {
+						in = rh->decoded;
+						/* Validate input */
+						if (!in || !g_utf8_validate (in, -1, NULL)) {
+							cur = g_list_next (cur);
+							continue;
+						}
 					}
-				}
 
-				/* Match re */
-				if (in && rspamd_mime_regexp_element_process (task, re, in,
-						strlen (in), raw)) {
+					/* Match re */
+					if (in) {
+						ret += rspamd_mime_regexp_element_process (task, re, in,
+							strlen (in), raw);
+						debug_task ("checking header %s regexp: %s -> %d",
+								re->header, re->regexp_text, ret);
 
-					return 1;
-				}
+						if (!re->is_multiple && ret) {
+							break;
+						}
+					}
 
-				cur = g_list_next (cur);
+					cur = g_list_next (cur);
+				}
 			}
-
-			rspamd_task_re_cache_add (task, re->regexp_text, 0);
 		}
 		break;
 	case REGEXP_MIME:
-		debug_task ("checking mime regexp: %s", re->regexp_text);
 		/* Iterate throught text parts */
 		cur = g_list_first (task->text_parts);
 		while (cur) {
@@ -886,23 +886,25 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
 				clen = part->content->len;
 			}
 			/* If we have limit, apply regexp so much times as we can */
-			if (rspamd_mime_regexp_element_process (task, re, ct, clen, raw)) {
-				return 1;
+			ret = rspamd_mime_regexp_element_process (task, re, ct, clen, raw);
+			debug_task ("checking mime regexp: %s -> %d",
+					re->regexp_text, ret);
+
+			if (!re->is_multiple && ret) {
+				break;
 			}
+
 			cur = g_list_next (cur);
 		}
-		rspamd_task_re_cache_add (task, re->regexp_text, 0);
+
+
 		break;
 	case REGEXP_MESSAGE:
-		debug_task ("checking message regexp: %s", re->regexp_text);
 		raw = TRUE;
 		ct = (guint8 *)task->msg.start;
 		clen = task->msg.len;
 
-		if (rspamd_mime_regexp_element_process (task, re, ct, clen, raw)) {
-			return 1;
-		}
-		rspamd_task_re_cache_add (task, re->regexp_text, 0);
+		ret = rspamd_mime_regexp_element_process (task, re, ct, clen, raw);
 		break;
 	case REGEXP_URL:
 		debug_task ("checking url regexp: %s", re->regexp_text);
@@ -910,23 +912,33 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
 		callback_param.task = task;
 		callback_param.regexp = regexp;
 		callback_param.re = re;
-		callback_param.found = FALSE;
+		callback_param.found = 0;
+
 		if (task->urls) {
 			g_hash_table_foreach (task->urls, tree_url_callback, &callback_param);
 		}
-		if (task->emails && callback_param.found == FALSE) {
+
+		if (task->emails && !callback_param.found) {
 			g_hash_table_foreach (task->emails, tree_url_callback, &callback_param);
 		}
-		if (callback_param.found == FALSE) {
-			rspamd_task_re_cache_add (task, re->regexp_text, 0);
-		}
+
+		ret = callback_param.found;
 		break;
 	default:
 		msg_warn ("bad error detected: %p is not a valid regexp object", re);
+		return 0;
 		break;
 	}
 
-	return 0;
+	if (re && re->regexp_text) {
+		if (ret > 1 && !re->is_multiple) {
+			ret = 1;
+		}
+
+		rspamd_task_re_cache_add (task, re->regexp_text, ret);
+	}
+
+	return ret;
 }
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-04-30 15:19:14 +0100
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-04-30 15:19:14 +0100
commit	633b66378c1eb7ea7f65887272eccc0d80fdc61a (patch)
tree	c6709cde32491340e46f790e7c85e250d0af727d /src
parent	f44290c814a4274f31dca274c4fd75471d58aa1b (diff)
download	rspamd-633b66378c1eb7ea7f65887272eccc0d80fdc61a.tar.gz rspamd-633b66378c1eb7ea7f65887272eccc0d80fdc61a.zip