diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-01-21 17:25:06 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-01-21 17:25:06 +0300 |
commit | 1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898 (patch) | |
tree | f0a714e2e87ebd50f6016c8cc7f2a8e03a9cc2d8 /src/worker.c | |
parent | 87c9659fdd08bbbc0eb796afccf7237a03181498 (diff) | |
download | rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.tar.gz rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.zip |
* Rewrite message parser
* Change mime parts storage
* Add html tags striping (ported from php code)
* Rework learning to process only text and striped html parts
Diffstat (limited to 'src/worker.c')
-rw-r--r-- | src/worker.c | 110 |
1 files changed, 9 insertions, 101 deletions
diff --git a/src/worker.c b/src/worker.c index 4a6f9acdd..809adddda 100644 --- a/src/worker.c +++ b/src/worker.c @@ -28,6 +28,7 @@ #include "cfg_file.h" #include "url.h" #include "modules.h" +#include "message.h" #define TASK_POOL_SIZE 4095 @@ -79,16 +80,18 @@ rcpt_destruct (void *pointer) static void free_task (struct worker_task *task) { - struct mime_part *part; + GList *part; + struct mime_part *p; if (task) { if (task->memc_ctx) { memc_close_ctx (task->memc_ctx); } - while (!TAILQ_EMPTY (&task->parts)) { - part = TAILQ_FIRST (&task->parts); - g_byte_array_free (part->content, FALSE); - TAILQ_REMOVE (&task->parts, part, next); + while ((part = g_list_first (task->parts))) { + task->parts = g_list_remove_link (task->parts, part); + p = (struct mime_part *)part->data; + g_byte_array_free (p->content, FALSE); + g_list_free_1 (part); } memory_pool_delete (task->task_pool); bufferevent_disable (task->bev, EV_READ | EV_WRITE); @@ -98,102 +101,7 @@ free_task (struct worker_task *task) } } -static void -mime_foreach_callback (GMimeObject *part, gpointer user_data) -{ - struct worker_task *task = (struct worker_task *)user_data; - struct mime_part *mime_part; - GMimeContentType *type; - GMimeDataWrapper *wrapper; - GMimeStream *part_stream; - GByteArray *part_content; - - task->parts_count ++; - - /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ - - /* find out what class 'part' is... */ - if (GMIME_IS_MESSAGE_PART (part)) { - /* message/rfc822 or message/news */ - GMimeMessage *message; - - /* g_mime_message_foreach_part() won't descend into - child message parts, so if we want to count any - subparts of this child message, we'll have to call - g_mime_message_foreach_part() again here. */ - - message = g_mime_message_part_get_message ((GMimeMessagePart *) part); - g_mime_message_foreach_part (message, mime_foreach_callback, task); - g_object_unref (message); - } else if (GMIME_IS_MESSAGE_PARTIAL (part)) { - /* message/partial */ - - /* this is an incomplete message part, probably a - large message that the sender has broken into - smaller parts and is sending us bit by bit. we - could save some info about it so that we could - piece this back together again once we get all the - parts? */ - } else if (GMIME_IS_MULTIPART (part)) { - /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ - - /* we'll get to finding out if this is a signed/encrypted multipart later... */ - } else if (GMIME_IS_PART (part)) { - /* a normal leaf part, could be text/plain or image/jpeg etc */ - wrapper = g_mime_part_get_content_object (GMIME_PART (part)); - if (wrapper != NULL) { - part_stream = g_mime_stream_mem_new (); - if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { - part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream)); - type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part)); - mime_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_part)); - mime_part->type = type; - mime_part->content = part_content; - TAILQ_INSERT_TAIL (&task->parts, mime_part, next); - if (g_mime_content_type_is_type (type, "text", "html")) { - url_parse_html (task, part_content); - } - else if (g_mime_content_type_is_type (type, "text", "plain")) { - url_parse_text (task, part_content); - } - } - } - } else { - g_assert_not_reached (); - } -} -static int -process_message (struct worker_task *task) -{ - GMimeMessage *message; - GMimeParser *parser; - GMimeStream *stream; - - stream = g_mime_stream_mem_new_with_buffer (task->msg->buf->begin, task->msg->buf->len); - /* create a new parser object to parse the stream */ - parser = g_mime_parser_new_with_stream (stream); - - /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */ - g_object_unref (stream); - - /* parse the message from the stream */ - message = g_mime_parser_construct_message (parser); - - task->message = message; - memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_object_unref, task->message); - - /* free the parser (and the stream) */ - g_object_unref (parser); - - g_mime_message_foreach_part (message, mime_foreach_callback, task); - - msg_info ("process_message: found %d parts in message", task->parts_count); - - task->worker->srv->stat->messages_scanned ++; - - return process_filters (task); -} static void read_socket (struct bufferevent *bev, void *arg) @@ -225,6 +133,7 @@ read_socket (struct bufferevent *bev, void *arg) update_buf_size (task->msg); if (task->msg->free == 0) { r = process_message (task); + r = process_filters (task); if (r == -1) { task->last_error = "Filter processing error"; task->error_code = RSPAMD_FILTER_ERROR; @@ -318,7 +227,6 @@ accept_socket (int fd, short what, void *arg) new_task->sock = nfd; new_task->cfg = worker->srv->cfg; TAILQ_INIT (&new_task->urls); - TAILQ_INIT (&new_task->parts); new_task->task_pool = memory_pool_new (memory_pool_get_size ()); /* Add destructor for recipients list (it would be better to use anonymous function here */ memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task); |