summaryrefslogtreecommitdiffstats
path: root/src/worker.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-01-21 17:25:06 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-01-21 17:25:06 +0300
commit1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898 (patch)
treef0a714e2e87ebd50f6016c8cc7f2a8e03a9cc2d8 /src/worker.c
parent87c9659fdd08bbbc0eb796afccf7237a03181498 (diff)
downloadrspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.tar.gz
rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.zip
* Rewrite message parser
* Change mime parts storage * Add html tags striping (ported from php code) * Rework learning to process only text and striped html parts
Diffstat (limited to 'src/worker.c')
-rw-r--r--src/worker.c110
1 files changed, 9 insertions, 101 deletions
diff --git a/src/worker.c b/src/worker.c
index 4a6f9acdd..809adddda 100644
--- a/src/worker.c
+++ b/src/worker.c
@@ -28,6 +28,7 @@
#include "cfg_file.h"
#include "url.h"
#include "modules.h"
+#include "message.h"
#define TASK_POOL_SIZE 4095
@@ -79,16 +80,18 @@ rcpt_destruct (void *pointer)
static void
free_task (struct worker_task *task)
{
- struct mime_part *part;
+ GList *part;
+ struct mime_part *p;
if (task) {
if (task->memc_ctx) {
memc_close_ctx (task->memc_ctx);
}
- while (!TAILQ_EMPTY (&task->parts)) {
- part = TAILQ_FIRST (&task->parts);
- g_byte_array_free (part->content, FALSE);
- TAILQ_REMOVE (&task->parts, part, next);
+ while ((part = g_list_first (task->parts))) {
+ task->parts = g_list_remove_link (task->parts, part);
+ p = (struct mime_part *)part->data;
+ g_byte_array_free (p->content, FALSE);
+ g_list_free_1 (part);
}
memory_pool_delete (task->task_pool);
bufferevent_disable (task->bev, EV_READ | EV_WRITE);
@@ -98,102 +101,7 @@ free_task (struct worker_task *task)
}
}
-static void
-mime_foreach_callback (GMimeObject *part, gpointer user_data)
-{
- struct worker_task *task = (struct worker_task *)user_data;
- struct mime_part *mime_part;
- GMimeContentType *type;
- GMimeDataWrapper *wrapper;
- GMimeStream *part_stream;
- GByteArray *part_content;
-
- task->parts_count ++;
-
- /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
-
- /* find out what class 'part' is... */
- if (GMIME_IS_MESSAGE_PART (part)) {
- /* message/rfc822 or message/news */
- GMimeMessage *message;
-
- /* g_mime_message_foreach_part() won't descend into
- child message parts, so if we want to count any
- subparts of this child message, we'll have to call
- g_mime_message_foreach_part() again here. */
-
- message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
- g_mime_message_foreach_part (message, mime_foreach_callback, task);
- g_object_unref (message);
- } else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
- /* message/partial */
-
- /* this is an incomplete message part, probably a
- large message that the sender has broken into
- smaller parts and is sending us bit by bit. we
- could save some info about it so that we could
- piece this back together again once we get all the
- parts? */
- } else if (GMIME_IS_MULTIPART (part)) {
- /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
-
- /* we'll get to finding out if this is a signed/encrypted multipart later... */
- } else if (GMIME_IS_PART (part)) {
- /* a normal leaf part, could be text/plain or image/jpeg etc */
- wrapper = g_mime_part_get_content_object (GMIME_PART (part));
- if (wrapper != NULL) {
- part_stream = g_mime_stream_mem_new ();
- if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
- part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
- type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part));
- mime_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_part));
- mime_part->type = type;
- mime_part->content = part_content;
- TAILQ_INSERT_TAIL (&task->parts, mime_part, next);
- if (g_mime_content_type_is_type (type, "text", "html")) {
- url_parse_html (task, part_content);
- }
- else if (g_mime_content_type_is_type (type, "text", "plain")) {
- url_parse_text (task, part_content);
- }
- }
- }
- } else {
- g_assert_not_reached ();
- }
-}
-static int
-process_message (struct worker_task *task)
-{
- GMimeMessage *message;
- GMimeParser *parser;
- GMimeStream *stream;
-
- stream = g_mime_stream_mem_new_with_buffer (task->msg->buf->begin, task->msg->buf->len);
- /* create a new parser object to parse the stream */
- parser = g_mime_parser_new_with_stream (stream);
-
- /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
- g_object_unref (stream);
-
- /* parse the message from the stream */
- message = g_mime_parser_construct_message (parser);
-
- task->message = message;
- memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_object_unref, task->message);
-
- /* free the parser (and the stream) */
- g_object_unref (parser);
-
- g_mime_message_foreach_part (message, mime_foreach_callback, task);
-
- msg_info ("process_message: found %d parts in message", task->parts_count);
-
- task->worker->srv->stat->messages_scanned ++;
-
- return process_filters (task);
-}
static void
read_socket (struct bufferevent *bev, void *arg)
@@ -225,6 +133,7 @@ read_socket (struct bufferevent *bev, void *arg)
update_buf_size (task->msg);
if (task->msg->free == 0) {
r = process_message (task);
+ r = process_filters (task);
if (r == -1) {
task->last_error = "Filter processing error";
task->error_code = RSPAMD_FILTER_ERROR;
@@ -318,7 +227,6 @@ accept_socket (int fd, short what, void *arg)
new_task->sock = nfd;
new_task->cfg = worker->srv->cfg;
TAILQ_INIT (&new_task->urls);
- TAILQ_INIT (&new_task->parts);
new_task->task_pool = memory_pool_new (memory_pool_get_size ());
/* Add destructor for recipients list (it would be better to use anonymous function here */
memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task);