From 7bae787900fea17ca82393886217c6287d7e8cea Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 2 Jun 2009 19:32:34 +0400 Subject: [PATCH] * Rework url parsing algorithms * Adopt all parts of rspamd for new url parser * Improve url-extracter utility by avoiding cut&paste of mime parsing * Small fixes to rspamc client * Bump version to 0.1.3 --- CMakeLists.txt | 2 +- rspamc.pl.in | 3 +- src/lmtp.c | 4 +- src/main.h | 2 +- src/message.c | 20 ++++-- src/message.h | 2 + src/plugins/regexp.c | 51 ++++++++++++-- src/plugins/surbl.c | 55 ++++++++++----- src/protocol.c | 8 ++- src/tokenizers/tokenizers.c | 6 +- src/url.c | 31 +++++---- src/url.h | 3 +- src/worker.c | 4 +- test/rspamd_url_test.c | 48 +------------ utils/url_extracter.c | 130 ++++++------------------------------ 15 files changed, 164 insertions(+), 205 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f744fff03..2d55565e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ PROJECT(rspamd C) SET(RSPAMD_VERSION_MAJOR 0) SET(RSPAMD_VERSION_MINOR 1) -SET(RSPAMD_VERSION_PATCH 2) +SET(RSPAMD_VERSION_PATCH 3) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd") diff --git a/rspamc.pl.in b/rspamc.pl.in index 6681fd6d9..9d5712117 100755 --- a/rspamc.pl.in +++ b/rspamc.pl.in @@ -200,7 +200,7 @@ sub do_control_command { my %args; getopt('c:h:p:Ps:', \%args); my $cmd = shift; -my $do_parse_config = 0; +my $do_parse_config = 1; if (!defined ($cmd) || $cmd eq '') { HELP_MESSAGE(); @@ -230,6 +230,7 @@ if (defined ($args{h})) { $cfg{'host'} = $args{h}; if ($args{h} =~ /^\/.*$/) { $cfg{'is_unix'} = 1; + $do_parse_config = 0; } } if (defined ($args{p})) { diff --git a/src/lmtp.c b/src/lmtp.c index d649d11c3..5f2878a08 100644 --- a/src/lmtp.c +++ b/src/lmtp.c @@ -109,6 +109,9 @@ free_task (struct rspamd_lmtp_proto *lmtp, gboolean is_soft) else { rspamd_remove_dispatcher (lmtp->task->dispatcher); } + if (lmtp->task->urls) { + g_list_free (lmtp->task->urls); + } close (lmtp->task->sock); g_free (lmtp->task); g_free (lmtp); @@ -230,7 +233,6 @@ accept_socket (int fd, short what, void *arg) new_task->state = READ_COMMAND; new_task->sock = nfd; new_task->cfg = worker->srv->cfg; - TAILQ_INIT (&new_task->urls); new_task->task_pool = memory_pool_new (memory_pool_get_size ()); /* Add destructor for recipients list (it would be better to use anonymous function here */ memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task); diff --git a/src/main.h b/src/main.h index c1a057824..924fe13ed 100644 --- a/src/main.h +++ b/src/main.h @@ -183,7 +183,7 @@ struct worker_task { GList *parts; /**< list of parsed parts */ GList *text_parts; /**< list of text parts */ char *raw_headers; /**< list of raw headers */ - TAILQ_HEAD (uriq, uri) urls; /**< list of parsed urls */ + GList *urls; /**< list of parsed urls */ GHashTable *results; /**< hash table of metric_result indexed by * metric's name */ GHashTable *re_cache; /**< cache for matched or not matched regexps */ diff --git a/src/message.c b/src/message.c index 510d407e9..f664122d0 100644 --- a/src/message.c +++ b/src/message.c @@ -301,28 +301,36 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont text_part->is_balanced = TRUE; text_part->html_nodes = NULL; text_part->content = strip_html_tags (task->task_pool, text_part, part_content, NULL); + text_part->html_urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp); + text_part->urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp); if (text_part->html_nodes == NULL) { - url_parse_text (task, text_part->orig, FALSE); + url_parse_text (task->task_pool, task, text_part, FALSE); } else { - url_parse_text (task, text_part->orig, TRUE); + url_parse_text (task->task_pool, task, text_part, FALSE); + url_parse_text (task->task_pool, task, text_part, TRUE); } text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool); memory_pool_add_destructor (task->task_pool, (pool_destruct_func)free_byte_array_callback, text_part->content); + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->html_urls); + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->urls); task->text_parts = g_list_prepend (task->text_parts, text_part); } else if (g_mime_content_type_is_type (type, "text", "plain")) { msg_debug ("mime_foreach_callback: got urls from text/plain part"); - url_parse_text (task, part_content, FALSE); text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part)); text_part->orig = convert_text_to_utf (task, part_content, type, text_part); text_part->content = text_part->orig; text_part->is_html = FALSE; text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool); + text_part->html_urls = NULL; + text_part->urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp); + url_parse_text (task->task_pool, task, text_part, FALSE); task->text_parts = g_list_prepend (task->text_parts, text_part); + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->urls); } } @@ -489,8 +497,10 @@ process_message (struct worker_task *task) if (task->rcpts) { memory_pool_add_destructor (task->task_pool, (pool_destruct_func)internet_address_list_destroy, task->rcpts); } - - task->worker->srv->stat->messages_scanned ++; + + if (task->worker) { + task->worker->srv->stat->messages_scanned ++; + } /* free the parser (and the stream) */ g_object_unref (parser); diff --git a/src/message.h b/src/message.h index 72711638f..9a63b0824 100644 --- a/src/message.h +++ b/src/message.h @@ -21,6 +21,8 @@ struct mime_text_part { GByteArray *orig; GByteArray *content; GNode *html_nodes; + GTree *urls; + GTree *html_urls; fuzzy_hash_t *fuzzy; }; diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 1b109bf55..fbe36f291 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -237,6 +237,28 @@ find_raw_header_pos (const char *headers, const char *headerv) return NULL; } +struct url_regexp_param { + struct worker_task *task; + GRegex *regexp; + struct rspamd_regexp *re; + gboolean found; +}; + +static gboolean +tree_url_callback (gpointer key, gpointer value, void *data) +{ + struct url_regexp_param *param = data; + struct uri *url = value; + + if (g_regex_match (param->regexp, struri (url), 0, NULL) == TRUE) { + task_cache_add (param->task, param->re, 1); + param->found = TRUE; + return TRUE; + } + + return FALSE; +} + static gsize process_regexp (struct rspamd_regexp *re, struct worker_task *task) { @@ -244,7 +266,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task) struct mime_text_part *part, *tmp; GList *cur, *headerlist; GRegex *regexp; - struct uri *url; + struct url_regexp_param callback_param; int r; @@ -333,13 +355,30 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task) return 0; case REGEXP_URL: msg_debug ("process_regexp: checking url regexp: /%s/", re->regexp_text); - TAILQ_FOREACH (url, &task->urls, next) { - if (g_regex_match (re->regexp, struri (url), 0, NULL) == TRUE) { - task_cache_add (task, re, 1); - return 1; + cur = g_list_first (task->text_parts); + while (cur) { + part = (struct mime_text_part *)cur->data; + if (part->is_raw) { + regexp = re->raw_regexp; } + else { + regexp = re->regexp; + } + callback_param.task = task; + callback_param.regexp = regexp; + callback_param.re = re; + callback_param.found = FALSE; + if (part->urls) { + g_tree_foreach (part->urls, tree_url_callback, &callback_param); + } + if (part->html_urls && callback_param.found == FALSE) { + g_tree_foreach (part->html_urls, tree_url_callback, &callback_param); + } + cur = g_list_next (cur); + } + if (callback_param.found == FALSE) { + task_cache_add (task, re, 0); } - task_cache_add (task, re, 0); return 0; case REGEXP_RAW_HEADER: msg_debug ("process_regexp: checking for raw header: %s with regexp: /%s/", re->header, re->regexp_text); diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index d44c7fbe7..1514cae1c 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -28,6 +28,7 @@ #include "../config.h" #include "../util.h" +#include "../message.h" #include #include "surbl.h" @@ -647,29 +648,53 @@ register_redirector_call (struct uri *url, struct worker_task *task, GTree *url_ event_add (¶m->ev, &timeout); } +static gboolean +tree_url_callback (gpointer key, gpointer value, void *data) +{ + struct redirector_param *param = data; + struct uri *url = value; + + msg_debug ("surbl_test_url: check url %s", struri (url)); + if (surbl_module_ctx->use_redirector) { + register_redirector_call (url, param->task, param->tree); + param->task->save.saved++; + } + else { + if (param->task->worker->srv->cfg->memcached_servers_num > 0) { + register_memcached_call (url, param->task, param->tree); + param->task->save.saved++; + } + else { + make_surbl_requests (url, param->task, param->tree); + } + } + + return FALSE; +} + static int surbl_test_url (struct worker_task *task) { - struct uri *url; GTree *url_tree; + GList *cur; + struct mime_text_part *part; + struct redirector_param param; url_tree = g_tree_new ((GCompareFunc)g_ascii_strcasecmp); - - TAILQ_FOREACH (url, &task->urls, next) { - msg_debug ("surbl_test_url: check url %s", struri (url)); - if (surbl_module_ctx->use_redirector) { - register_redirector_call (url, task, url_tree); - task->save.saved++; + + param.tree = url_tree; + param.task = task; + cur = task->text_parts; + while (cur) { + part = cur->data; + if (part->urls) { + g_tree_foreach (part->urls, tree_url_callback, ¶m); } - else { - if (task->worker->srv->cfg->memcached_servers_num > 0) { - register_memcached_call (url, task, url_tree); - task->save.saved++; - } - else { - make_surbl_requests (url, task, url_tree); - } + if (part->html_urls) { + g_tree_foreach (part->html_urls, tree_url_callback, ¶m); } + + cur = g_list_next (cur); } memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, url_tree); diff --git a/src/protocol.c b/src/protocol.c index 2cd025287..c551bb783 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -369,10 +369,13 @@ show_url_header (struct worker_task *task) int r = 0; char outbuf[OUTBUFSIZ], c; struct uri *url; + GList *cur; f_str_t host; r = snprintf (outbuf, sizeof (outbuf), "Urls: "); - TAILQ_FOREACH (url, &task->urls, next) { + cur = task->urls; + while (cur) { + url = cur->data; host.begin = url->host; host.len = url->hostlen; /* Skip long hosts to avoid protocol coollisions */ @@ -386,7 +389,7 @@ show_url_header (struct worker_task *task) r = 0; } /* Write url host to buf */ - if (TAILQ_NEXT (url, next) != NULL) { + if (g_list_next (cur) != NULL) { c = *(host.begin + host.len); *(host.begin + host.len) = '\0'; msg_debug ("show_url_header: write url: %s", host.begin); @@ -400,6 +403,7 @@ show_url_header (struct worker_task *task) r += snprintf (outbuf + r, sizeof (outbuf) - r, "%s" CRLF, host.begin); *(host.begin + host.len) = c; } + cur = g_list_next (cur); } rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE); } diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c index 1b47289a2..4527e699c 100644 --- a/src/tokenizers/tokenizers.c +++ b/src/tokenizers/tokenizers.c @@ -122,6 +122,7 @@ tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **tree) token_node_t *new = NULL; f_str_t url_domain; struct uri *url; + GList *cur; uint32_t h; if (*tree == NULL) { @@ -129,7 +130,9 @@ tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **tree) memory_pool_add_destructor (pool, (pool_destruct_func)g_tree_destroy, *tree); } - TAILQ_FOREACH (url, &task->urls, next) { + cur = task->urls; + while (cur) { + url = cur->data; url_domain.begin = url->host; url_domain.len = url->hostlen; new = memory_pool_alloc (pool, sizeof (token_node_t)); @@ -139,6 +142,7 @@ tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **tree) if (g_tree_lookup (*tree, new) == NULL) { g_tree_insert (*tree, new, new); } + cur = g_list_next (cur); } return TRUE; diff --git a/src/url.c b/src/url.c index 875358ae0..cc58a2caf 100644 --- a/src/url.c +++ b/src/url.c @@ -23,10 +23,11 @@ */ #include "config.h" +#include "url.h" #include "util.h" #include "fstring.h" #include "main.h" -#include "url.h" +#include "message.h" #define POST_CHAR 1 #define POST_CHAR_S "\001" @@ -853,7 +854,7 @@ parse_uri(struct uri *uri, unsigned char *uristring, memory_pool_t *pool) } void -url_parse_text (struct worker_task *task, GByteArray *content, gboolean is_html) +url_parse_text (memory_pool_t *pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html) { GMatchInfo *info; GError *err = NULL; @@ -861,26 +862,32 @@ url_parse_text (struct worker_task *task, GByteArray *content, gboolean is_html) char *url_str = NULL; struct uri *new; - if (!content->data || content->len == 0) { + if (!part->orig->data || part->orig->len == 0) { msg_warn ("url_parse_text: got empty text part"); return; } if (url_init () == 0) { - rc = g_regex_match_full (is_html ? html_re : text_re, (const char *)content->data, content->len, 0, 0, &info, &err); + if (is_html) { + rc = g_regex_match_full (html_re, (const char *)part->orig->data, part->orig->len, 0, 0, &info, &err); + } + else { + rc = g_regex_match_full (text_re, (const char *)part->content->data, part->content->len, 0, 0, &info, &err); + + } if (rc) { while (g_match_info_matches (info)) { url_str = g_match_info_fetch (info, is_html ? 1 : 0); msg_debug ("url_parse_text: extracted string with regexp: '%s', html is %s", url_str, is_html ? "on" : "off"); if (url_str != NULL) { - new = memory_pool_alloc (task->task_pool, sizeof (struct uri)); - if (new != NULL) { - rc = parse_uri (new, url_str, task->task_pool); - if (rc != URI_ERRNO_OK) { - msg_debug ("url_parse_text: error while parsing url %s: %s", url_str, url_strerror (rc)); - } - if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST) { - TAILQ_INSERT_TAIL (&task->urls, new, next); + if (g_tree_lookup (is_html ? part->html_urls : part->urls, url_str) == NULL) { + new = memory_pool_alloc (pool, sizeof (struct uri)); + if (new != NULL) { + rc = parse_uri (new, url_str, pool); + if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST) { + g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new); + task->urls = g_list_prepend (task->urls, new); + } } } } diff --git a/src/url.h b/src/url.h index 2c367548d..7860f544a 100644 --- a/src/url.h +++ b/src/url.h @@ -6,6 +6,7 @@ #include "mem_pool.h" struct worker_task; +struct mime_text_part; struct uri { /* The start of the uri (and thus start of the protocol string). */ @@ -73,7 +74,7 @@ enum protocol { #define struri(uri) ((uri)->string) -void url_parse_text (struct worker_task *task, GByteArray *part, gboolean is_html); +void url_parse_text (memory_pool_t *pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html); enum uri_errno parse_uri(struct uri *uri, unsigned char *uristring, memory_pool_t *pool); #endif diff --git a/src/worker.c b/src/worker.c index 3f223241b..af4575919 100644 --- a/src/worker.c +++ b/src/worker.c @@ -130,6 +130,9 @@ free_task (struct worker_task *task, gboolean is_soft) if (task->text_parts) { g_list_free (task->text_parts); } + if (task->urls) { + g_list_free (task->urls); + } memory_pool_delete (task->task_pool); if (is_soft) { /* Plan dispatcher shutdown */ @@ -287,7 +290,6 @@ accept_socket (int fd, short what, void *arg) #endif io_tv.tv_sec = WORKER_IO_TIMEOUT; io_tv.tv_usec = 0; - TAILQ_INIT (&new_task->urls); new_task->task_pool = memory_pool_new (memory_pool_get_size ()); /* Add destructor for recipients list (it would be better to use anonymous function here */ memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task); diff --git a/test/rspamd_url_test.c b/test/rspamd_url_test.c index 36c9e439b..f716c1ab0 100644 --- a/test/rspamd_url_test.c +++ b/test/rspamd_url_test.c @@ -70,51 +70,5 @@ const char *test_html = "This is test file with data = (gchar *)test_text; - text->len = strlen (test_text); - html = g_byte_array_new(); - html->data = (gchar *)test_html; - html->len = strlen (test_html); - bzero (&task, sizeof (task)); - TAILQ_INIT (&task.urls); - task.task_pool = memory_pool_new (8192); - - g_test_timer_start (); - g_test_message ("Testing text URL regexp parser"); - url_parse_text (&task, text, FALSE); - - TAILQ_FOREACH (url, &task.urls, next) { - msg_debug ("Found url: %s, hostname: %s, data: %s", struri (url), url->host, url->data); - i ++; - } - - while (!TAILQ_EMPTY (&task.urls)) { - url = TAILQ_FIRST (&task.urls); - TAILQ_REMOVE (&task.urls, url, next); - } - /* g_assert (i == 39); */ - - msg_debug ("Time elapsed: %.2f", g_test_timer_elapsed ()); - i = 0; - g_test_timer_start (); - g_test_message ("Testing html URL regexp parser"); - url_parse_text (&task, html, TRUE); - - TAILQ_FOREACH (url, &task.urls, next) { - msg_debug ("Found url: %s, hostname: %s, data: %s", struri (url), url->host, url->data); - i ++; - } - - while (!TAILQ_EMPTY (&task.urls)) { - url = TAILQ_FIRST (&task.urls); - TAILQ_REMOVE (&task.urls, url, next); - } - g_assert (i == 1); - msg_debug ("Time elapsed: %.2f", g_test_timer_elapsed ()); + /* XXX: maybe write test for this */ } diff --git a/utils/url_extracter.c b/utils/url_extracter.c index ac8e8be4e..97bf72c47 100644 --- a/utils/url_extracter.c +++ b/utils/url_extracter.c @@ -24,107 +24,24 @@ #include "../src/main.h" #include "../src/cfg_file.h" #include "../src/url.h" +#include "../src/util.h" #include "../src/message.h" rspamd_hash_t *counters = NULL; -#ifdef GMIME24 -static void -mime_foreach_callback (GMimeObject *parent, GMimeObject *part, gpointer user_data) -#else -static void -mime_foreach_callback (GMimeObject *part, gpointer user_data) -#endif -{ - struct worker_task *task = (struct worker_task *)user_data; - struct mime_part *mime_part; - GMimeContentType *type; - GMimeDataWrapper *wrapper; - GMimeStream *part_stream; - GByteArray *part_content; - GMimeMessage *message; - - /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ - - /* find out what class 'part' is... */ - if (GMIME_IS_MESSAGE_PART (part)) { - /* message/rfc822 or message/news */ - printf ("Message part found\n"); - - /* g_mime_message_foreach_part() won't descend into - child message parts, so if we want to count any - subparts of this child message, we'll have to call - g_mime_message_foreach_part() again here. */ - - message = g_mime_message_part_get_message ((GMimeMessagePart *) part); -#ifdef GMIME24 - g_mime_message_foreach (message, mime_foreach_callback, task); -#else - g_mime_message_foreach_part (message, mime_foreach_callback, task); -#endif - g_object_unref (message); - } else if (GMIME_IS_MESSAGE_PARTIAL (part)) { - /* message/partial */ - printf ("Message/partial part found\n"); - - /* this is an incomplete message part, probably a - large message that the sender has broken into - smaller parts and is sending us bit by bit. we - could save some info about it so that we could - piece this back together again once we get all the - parts? */ - } else if (GMIME_IS_MULTIPART (part)) { - /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ - - /* we'll get to finding out if this is a signed/encrypted multipart later... */ - } else if (GMIME_IS_PART (part)) { - printf ("Normal part found\n"); - /* a normal leaf part, could be text/plain or image/jpeg etc */ - wrapper = g_mime_part_get_content_object (GMIME_PART (part)); - if (wrapper != NULL) { - part_stream = g_mime_stream_mem_new (); - printf ("Get new wrapper object for normal part\n"); - if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { - printf ("Write wrapper to stream\n"); - part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream)); -#ifdef GMIME24 - type = (GMimeContentType *)g_mime_object_get_content_type (GMIME_OBJECT (part)); -#else - type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part)); -#endif - mime_part = g_malloc (sizeof (struct mime_part)); - mime_part->type = type; - mime_part->content = part_content; - task->parts = g_list_prepend (task->parts, mime_part); - if (g_mime_content_type_is_type (type, "text", "html")) { - printf ("Found text/html part\n"); - url_parse_text (task, part_content, TRUE); - } - else if (g_mime_content_type_is_type (type, "text", "plain")) { - printf ("Found text/plain part\n"); - url_parse_text (task, part_content, FALSE); - } - } - } - } else { - g_assert_not_reached (); - } -} - int main (int argc, char **argv) { - GMimeMessage *message; - GMimeParser *parser; - GMimeStream *stream; struct worker_task task; struct uri *url; char *buf = NULL; size_t pos = 0, size = 65535; + GList *cur; g_mem_set_vtable(glib_mem_profiler_table); g_mime_init (0); bzero (&task, sizeof (struct worker_task)); + task.task_pool = memory_pool_new (memory_pool_get_size ()); /* Preallocate buffer */ buf = g_malloc (size); @@ -137,32 +54,23 @@ main (int argc, char **argv) buf = g_realloc (buf, size); } } - - stream = g_mime_stream_mem_new_with_buffer (buf, pos); - /* create a new parser object to parse the stream */ - parser = g_mime_parser_new_with_stream (stream); - - /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */ - g_object_unref (stream); - - /* parse the message from the stream */ - message = g_mime_parser_construct_message (parser); - task.message = message; - task.task_pool = memory_pool_new (memory_pool_get_size ()); - TAILQ_INIT (&task.urls); - - /* free the parser (and the stream) */ - g_object_unref (parser); + task.cfg = memory_pool_alloc0 (task.task_pool, sizeof (struct config_file)); + task.cfg->log_level = G_LOG_LEVEL_CRITICAL; + task.cfg->log_fd = STDERR_FILENO; + g_log_set_default_handler (file_log_function, task.cfg); -#ifdef GMIME24 - g_mime_message_foreach (message, mime_foreach_callback, &task); -#else - g_mime_message_foreach_part (message, mime_foreach_callback, &task); -#endif - - TAILQ_FOREACH (url, &task.urls, next) { - printf ("Found url: %s, hostname: %s, data: %s\n", struri (url), url->host, url->data); + task.msg = memory_pool_alloc (task.task_pool, sizeof (f_str_t)); + task.msg->begin = buf; + task.msg->len = pos; + process_message (&task); + + cur = task.urls; + while (cur) { + url = cur->data; + printf ("%s\n", struri (url)); + cur = g_list_next (cur); } - + + return 0; } -- 2.39.5