* Rewrite message parser

* Change mime parts storage * Add html tags striping (ported from php code) * Rework learning to process only text and striped html parts
author: Vsevolod Stakhov <vsevolod@rambler-co.ru> 2009-01-21 17:25:06 +0300
committer: Vsevolod Stakhov <vsevolod@rambler-co.ru> 2009-01-21 17:25:06 +0300
commit: 1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898 (patch)
tree: f0a714e2e87ebd50f6016c8cc7f2a8e03a9cc2d8 /src/worker.c
parent: 87c9659fdd08bbbc0eb796afccf7237a03181498 (diff)
download: rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.tar.gz
rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.zip
1 files changed, 9 insertions, 101 deletions
diff --git a/src/worker.c b/src/worker.c
index 4a6f9acdd..809adddda 100644
--- a/src/worker.c
+++ b/src/worker.c
@@ -28,6 +28,7 @@
 #include "cfg_file.h"
 #include "url.h"
 #include "modules.h"
+#include "message.h"
 
 #define TASK_POOL_SIZE 4095
 
@@ -79,16 +80,18 @@ rcpt_destruct (void *pointer)
 static void
 free_task (struct worker_task *task)
 {
-	struct mime_part *part;
+	GList *part;
+	struct mime_part *p;
 
 	if (task) {
 		if (task->memc_ctx) {
 			memc_close_ctx (task->memc_ctx);
 		}
-		while (!TAILQ_EMPTY (&task->parts)) {
-			part = TAILQ_FIRST (&task->parts);
-			g_byte_array_free (part->content, FALSE);
-			TAILQ_REMOVE (&task->parts, part, next);
+		while ((part = g_list_first (task->parts))) {
+			task->parts = g_list_remove_link (task->parts, part);
+			p = (struct mime_part *)part->data;
+			g_byte_array_free (p->content, FALSE);
+			g_list_free_1 (part);
 		}
 		memory_pool_delete (task->task_pool);
 		bufferevent_disable (task->bev, EV_READ | EV_WRITE);
@@ -98,102 +101,7 @@ free_task (struct worker_task *task)
 	}
 }
 
-static void
-mime_foreach_callback (GMimeObject *part, gpointer user_data)
-{
-	struct worker_task *task = (struct worker_task *)user_data;
-	struct mime_part *mime_part;
-	GMimeContentType *type;
-	GMimeDataWrapper *wrapper;
-	GMimeStream *part_stream;
-	GByteArray *part_content;
-	
-	task->parts_count ++;
-	
-	/* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
-	
-	/* find out what class 'part' is... */
-	if (GMIME_IS_MESSAGE_PART (part)) {
-		/* message/rfc822 or message/news */
-		GMimeMessage *message;
-		
-		/* g_mime_message_foreach_part() won't descend into
-                   child message parts, so if we want to count any
-                   subparts of this child message, we'll have to call
-                   g_mime_message_foreach_part() again here. */
-		
-		message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
-		g_mime_message_foreach_part (message, mime_foreach_callback, task);
-		g_object_unref (message);
-	} else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
-		/* message/partial */
-		
-		/* this is an incomplete message part, probably a
-                   large message that the sender has broken into
-                   smaller parts and is sending us bit by bit. we
-                   could save some info about it so that we could
-                   piece this back together again once we get all the
-                   parts? */
-	} else if (GMIME_IS_MULTIPART (part)) {
-		/* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
-		
-		/* we'll get to finding out if this is a signed/encrypted multipart later... */
-	} else if (GMIME_IS_PART (part)) {
-		/* a normal leaf part, could be text/plain or image/jpeg etc */
-		wrapper = g_mime_part_get_content_object (GMIME_PART (part));
-		if (wrapper != NULL) {
-			part_stream = g_mime_stream_mem_new ();
-			if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
-				part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
-				type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part));
-				mime_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_part));
-				mime_part->type = type;
-				mime_part->content = part_content;
-				TAILQ_INSERT_TAIL (&task->parts, mime_part, next);
-				if (g_mime_content_type_is_type (type, "text", "html")) {
-					url_parse_html (task, part_content);
-				} 
-				else if (g_mime_content_type_is_type (type, "text", "plain")) {
-					url_parse_text (task, part_content);
-				}
-			}
-		}
-	} else {
-		g_assert_not_reached ();
-	}
-}
 
-static int
-process_message (struct worker_task *task)
-{
-	GMimeMessage *message;
-	GMimeParser *parser;
-	GMimeStream *stream;
-
-	stream = g_mime_stream_mem_new_with_buffer (task->msg->buf->begin, task->msg->buf->len);
-	/* create a new parser object to parse the stream */
-	parser = g_mime_parser_new_with_stream (stream);
-
-	/* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
-	g_object_unref (stream);
-
-	/* parse the message from the stream */
-	message = g_mime_parser_construct_message (parser);
-	
-	task->message = message;
-	memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_object_unref, task->message);
-
-	/* free the parser (and the stream) */
-	g_object_unref (parser);
-
-	g_mime_message_foreach_part (message, mime_foreach_callback, task);
-	
-	msg_info ("process_message: found %d parts in message", task->parts_count);
-
-	task->worker->srv->stat->messages_scanned ++;
-
-	return process_filters (task);
-}
 
 static void
 read_socket (struct bufferevent *bev, void *arg)
@@ -225,6 +133,7 @@ read_socket (struct bufferevent *bev, void *arg)
 				update_buf_size (task->msg);
 				if (task->msg->free == 0) {
 					r = process_message (task);
+					r = process_filters (task);
 					if (r == -1) {
 						task->last_error = "Filter processing error";
 						task->error_code = RSPAMD_FILTER_ERROR;
@@ -318,7 +227,6 @@ accept_socket (int fd, short what, void *arg)
 	new_task->sock = nfd;
 	new_task->cfg = worker->srv->cfg;
 	TAILQ_INIT (&new_task->urls);
-	TAILQ_INIT (&new_task->parts);
 	new_task->task_pool = memory_pool_new (memory_pool_get_size ());
 	/* Add destructor for recipients list (it would be better to use anonymous function here */
 	memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task);
author	Vsevolod Stakhov <vsevolod@rambler-co.ru>	2009-01-21 17:25:06 +0300
committer	Vsevolod Stakhov <vsevolod@rambler-co.ru>	2009-01-21 17:25:06 +0300
commit	1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898 (patch)
tree	f0a714e2e87ebd50f6016c8cc7f2a8e03a9cc2d8 /src/worker.c
parent	87c9659fdd08bbbc0eb796afccf7237a03181498 (diff)
download	rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.tar.gz rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.zip