summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt2
-rwxr-xr-xrspamc.pl.in3
-rw-r--r--src/lmtp.c4
-rw-r--r--src/main.h2
-rw-r--r--src/message.c20
-rw-r--r--src/message.h2
-rw-r--r--src/plugins/regexp.c51
-rw-r--r--src/plugins/surbl.c55
-rw-r--r--src/protocol.c8
-rw-r--r--src/tokenizers/tokenizers.c6
-rw-r--r--src/url.c31
-rw-r--r--src/url.h3
-rw-r--r--src/worker.c4
-rw-r--r--test/rspamd_url_test.c48
-rw-r--r--utils/url_extracter.c130
15 files changed, 164 insertions, 205 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f744fff03..2d55565e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,7 +7,7 @@ PROJECT(rspamd C)
SET(RSPAMD_VERSION_MAJOR 0)
SET(RSPAMD_VERSION_MINOR 1)
-SET(RSPAMD_VERSION_PATCH 2)
+SET(RSPAMD_VERSION_PATCH 3)
SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd")
diff --git a/rspamc.pl.in b/rspamc.pl.in
index 6681fd6d9..9d5712117 100755
--- a/rspamc.pl.in
+++ b/rspamc.pl.in
@@ -200,7 +200,7 @@ sub do_control_command {
my %args;
getopt('c:h:p:Ps:', \%args);
my $cmd = shift;
-my $do_parse_config = 0;
+my $do_parse_config = 1;
if (!defined ($cmd) || $cmd eq '') {
HELP_MESSAGE();
@@ -230,6 +230,7 @@ if (defined ($args{h})) {
$cfg{'host'} = $args{h};
if ($args{h} =~ /^\/.*$/) {
$cfg{'is_unix'} = 1;
+ $do_parse_config = 0;
}
}
if (defined ($args{p})) {
diff --git a/src/lmtp.c b/src/lmtp.c
index d649d11c3..5f2878a08 100644
--- a/src/lmtp.c
+++ b/src/lmtp.c
@@ -109,6 +109,9 @@ free_task (struct rspamd_lmtp_proto *lmtp, gboolean is_soft)
else {
rspamd_remove_dispatcher (lmtp->task->dispatcher);
}
+ if (lmtp->task->urls) {
+ g_list_free (lmtp->task->urls);
+ }
close (lmtp->task->sock);
g_free (lmtp->task);
g_free (lmtp);
@@ -230,7 +233,6 @@ accept_socket (int fd, short what, void *arg)
new_task->state = READ_COMMAND;
new_task->sock = nfd;
new_task->cfg = worker->srv->cfg;
- TAILQ_INIT (&new_task->urls);
new_task->task_pool = memory_pool_new (memory_pool_get_size ());
/* Add destructor for recipients list (it would be better to use anonymous function here */
memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task);
diff --git a/src/main.h b/src/main.h
index c1a057824..924fe13ed 100644
--- a/src/main.h
+++ b/src/main.h
@@ -183,7 +183,7 @@ struct worker_task {
GList *parts; /**< list of parsed parts */
GList *text_parts; /**< list of text parts */
char *raw_headers; /**< list of raw headers */
- TAILQ_HEAD (uriq, uri) urls; /**< list of parsed urls */
+ GList *urls; /**< list of parsed urls */
GHashTable *results; /**< hash table of metric_result indexed by
* metric's name */
GHashTable *re_cache; /**< cache for matched or not matched regexps */
diff --git a/src/message.c b/src/message.c
index 510d407e9..f664122d0 100644
--- a/src/message.c
+++ b/src/message.c
@@ -301,28 +301,36 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
text_part->is_balanced = TRUE;
text_part->html_nodes = NULL;
text_part->content = strip_html_tags (task->task_pool, text_part, part_content, NULL);
+ text_part->html_urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp);
+ text_part->urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp);
if (text_part->html_nodes == NULL) {
- url_parse_text (task, text_part->orig, FALSE);
+ url_parse_text (task->task_pool, task, text_part, FALSE);
}
else {
- url_parse_text (task, text_part->orig, TRUE);
+ url_parse_text (task->task_pool, task, text_part, FALSE);
+ url_parse_text (task->task_pool, task, text_part, TRUE);
}
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)free_byte_array_callback, text_part->content);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->html_urls);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->urls);
task->text_parts = g_list_prepend (task->text_parts, text_part);
}
else if (g_mime_content_type_is_type (type, "text", "plain")) {
msg_debug ("mime_foreach_callback: got urls from text/plain part");
- url_parse_text (task, part_content, FALSE);
text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
text_part->content = text_part->orig;
text_part->is_html = FALSE;
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
+ text_part->html_urls = NULL;
+ text_part->urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp);
+ url_parse_text (task->task_pool, task, text_part, FALSE);
task->text_parts = g_list_prepend (task->text_parts, text_part);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->urls);
}
}
@@ -489,8 +497,10 @@ process_message (struct worker_task *task)
if (task->rcpts) {
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)internet_address_list_destroy, task->rcpts);
}
-
- task->worker->srv->stat->messages_scanned ++;
+
+ if (task->worker) {
+ task->worker->srv->stat->messages_scanned ++;
+ }
/* free the parser (and the stream) */
g_object_unref (parser);
diff --git a/src/message.h b/src/message.h
index 72711638f..9a63b0824 100644
--- a/src/message.h
+++ b/src/message.h
@@ -21,6 +21,8 @@ struct mime_text_part {
GByteArray *orig;
GByteArray *content;
GNode *html_nodes;
+ GTree *urls;
+ GTree *html_urls;
fuzzy_hash_t *fuzzy;
};
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 1b109bf55..fbe36f291 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -237,6 +237,28 @@ find_raw_header_pos (const char *headers, const char *headerv)
return NULL;
}
+struct url_regexp_param {
+ struct worker_task *task;
+ GRegex *regexp;
+ struct rspamd_regexp *re;
+ gboolean found;
+};
+
+static gboolean
+tree_url_callback (gpointer key, gpointer value, void *data)
+{
+ struct url_regexp_param *param = data;
+ struct uri *url = value;
+
+ if (g_regex_match (param->regexp, struri (url), 0, NULL) == TRUE) {
+ task_cache_add (param->task, param->re, 1);
+ param->found = TRUE;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
static gsize
process_regexp (struct rspamd_regexp *re, struct worker_task *task)
{
@@ -244,7 +266,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
struct mime_text_part *part, *tmp;
GList *cur, *headerlist;
GRegex *regexp;
- struct uri *url;
+ struct url_regexp_param callback_param;
int r;
@@ -333,13 +355,30 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
return 0;
case REGEXP_URL:
msg_debug ("process_regexp: checking url regexp: /%s/", re->regexp_text);
- TAILQ_FOREACH (url, &task->urls, next) {
- if (g_regex_match (re->regexp, struri (url), 0, NULL) == TRUE) {
- task_cache_add (task, re, 1);
- return 1;
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ part = (struct mime_text_part *)cur->data;
+ if (part->is_raw) {
+ regexp = re->raw_regexp;
}
+ else {
+ regexp = re->regexp;
+ }
+ callback_param.task = task;
+ callback_param.regexp = regexp;
+ callback_param.re = re;
+ callback_param.found = FALSE;
+ if (part->urls) {
+ g_tree_foreach (part->urls, tree_url_callback, &callback_param);
+ }
+ if (part->html_urls && callback_param.found == FALSE) {
+ g_tree_foreach (part->html_urls, tree_url_callback, &callback_param);
+ }
+ cur = g_list_next (cur);
+ }
+ if (callback_param.found == FALSE) {
+ task_cache_add (task, re, 0);
}
- task_cache_add (task, re, 0);
return 0;
case REGEXP_RAW_HEADER:
msg_debug ("process_regexp: checking for raw header: %s with regexp: /%s/", re->header, re->regexp_text);
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
index d44c7fbe7..1514cae1c 100644
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -28,6 +28,7 @@
#include "../config.h"
#include "../util.h"
+#include "../message.h"
#include <evdns.h>
#include "surbl.h"
@@ -647,29 +648,53 @@ register_redirector_call (struct uri *url, struct worker_task *task, GTree *url_
event_add (&param->ev, &timeout);
}
+static gboolean
+tree_url_callback (gpointer key, gpointer value, void *data)
+{
+ struct redirector_param *param = data;
+ struct uri *url = value;
+
+ msg_debug ("surbl_test_url: check url %s", struri (url));
+ if (surbl_module_ctx->use_redirector) {
+ register_redirector_call (url, param->task, param->tree);
+ param->task->save.saved++;
+ }
+ else {
+ if (param->task->worker->srv->cfg->memcached_servers_num > 0) {
+ register_memcached_call (url, param->task, param->tree);
+ param->task->save.saved++;
+ }
+ else {
+ make_surbl_requests (url, param->task, param->tree);
+ }
+ }
+
+ return FALSE;
+}
+
static int
surbl_test_url (struct worker_task *task)
{
- struct uri *url;
GTree *url_tree;
+ GList *cur;
+ struct mime_text_part *part;
+ struct redirector_param param;
url_tree = g_tree_new ((GCompareFunc)g_ascii_strcasecmp);
-
- TAILQ_FOREACH (url, &task->urls, next) {
- msg_debug ("surbl_test_url: check url %s", struri (url));
- if (surbl_module_ctx->use_redirector) {
- register_redirector_call (url, task, url_tree);
- task->save.saved++;
+
+ param.tree = url_tree;
+ param.task = task;
+ cur = task->text_parts;
+ while (cur) {
+ part = cur->data;
+ if (part->urls) {
+ g_tree_foreach (part->urls, tree_url_callback, &param);
}
- else {
- if (task->worker->srv->cfg->memcached_servers_num > 0) {
- register_memcached_call (url, task, url_tree);
- task->save.saved++;
- }
- else {
- make_surbl_requests (url, task, url_tree);
- }
+ if (part->html_urls) {
+ g_tree_foreach (part->html_urls, tree_url_callback, &param);
}
+
+ cur = g_list_next (cur);
}
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, url_tree);
diff --git a/src/protocol.c b/src/protocol.c
index 2cd025287..c551bb783 100644
--- a/src/protocol.c
+++ b/src/protocol.c
@@ -369,10 +369,13 @@ show_url_header (struct worker_task *task)
int r = 0;
char outbuf[OUTBUFSIZ], c;
struct uri *url;
+ GList *cur;
f_str_t host;
r = snprintf (outbuf, sizeof (outbuf), "Urls: ");
- TAILQ_FOREACH (url, &task->urls, next) {
+ cur = task->urls;
+ while (cur) {
+ url = cur->data;
host.begin = url->host;
host.len = url->hostlen;
/* Skip long hosts to avoid protocol coollisions */
@@ -386,7 +389,7 @@ show_url_header (struct worker_task *task)
r = 0;
}
/* Write url host to buf */
- if (TAILQ_NEXT (url, next) != NULL) {
+ if (g_list_next (cur) != NULL) {
c = *(host.begin + host.len);
*(host.begin + host.len) = '\0';
msg_debug ("show_url_header: write url: %s", host.begin);
@@ -400,6 +403,7 @@ show_url_header (struct worker_task *task)
r += snprintf (outbuf + r, sizeof (outbuf) - r, "%s" CRLF, host.begin);
*(host.begin + host.len) = c;
}
+ cur = g_list_next (cur);
}
rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE);
}
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c
index 1b47289a2..4527e699c 100644
--- a/src/tokenizers/tokenizers.c
+++ b/src/tokenizers/tokenizers.c
@@ -122,6 +122,7 @@ tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **tree)
token_node_t *new = NULL;
f_str_t url_domain;
struct uri *url;
+ GList *cur;
uint32_t h;
if (*tree == NULL) {
@@ -129,7 +130,9 @@ tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **tree)
memory_pool_add_destructor (pool, (pool_destruct_func)g_tree_destroy, *tree);
}
- TAILQ_FOREACH (url, &task->urls, next) {
+ cur = task->urls;
+ while (cur) {
+ url = cur->data;
url_domain.begin = url->host;
url_domain.len = url->hostlen;
new = memory_pool_alloc (pool, sizeof (token_node_t));
@@ -139,6 +142,7 @@ tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **tree)
if (g_tree_lookup (*tree, new) == NULL) {
g_tree_insert (*tree, new, new);
}
+ cur = g_list_next (cur);
}
return TRUE;
diff --git a/src/url.c b/src/url.c
index 875358ae0..cc58a2caf 100644
--- a/src/url.c
+++ b/src/url.c
@@ -23,10 +23,11 @@
*/
#include "config.h"
+#include "url.h"
#include "util.h"
#include "fstring.h"
#include "main.h"
-#include "url.h"
+#include "message.h"
#define POST_CHAR 1
#define POST_CHAR_S "\001"
@@ -853,7 +854,7 @@ parse_uri(struct uri *uri, unsigned char *uristring, memory_pool_t *pool)
}
void
-url_parse_text (struct worker_task *task, GByteArray *content, gboolean is_html)
+url_parse_text (memory_pool_t *pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html)
{
GMatchInfo *info;
GError *err = NULL;
@@ -861,26 +862,32 @@ url_parse_text (struct worker_task *task, GByteArray *content, gboolean is_html)
char *url_str = NULL;
struct uri *new;
- if (!content->data || content->len == 0) {
+ if (!part->orig->data || part->orig->len == 0) {
msg_warn ("url_parse_text: got empty text part");
return;
}
if (url_init () == 0) {
- rc = g_regex_match_full (is_html ? html_re : text_re, (const char *)content->data, content->len, 0, 0, &info, &err);
+ if (is_html) {
+ rc = g_regex_match_full (html_re, (const char *)part->orig->data, part->orig->len, 0, 0, &info, &err);
+ }
+ else {
+ rc = g_regex_match_full (text_re, (const char *)part->content->data, part->content->len, 0, 0, &info, &err);
+
+ }
if (rc) {
while (g_match_info_matches (info)) {
url_str = g_match_info_fetch (info, is_html ? 1 : 0);
msg_debug ("url_parse_text: extracted string with regexp: '%s', html is %s", url_str, is_html ? "on" : "off");
if (url_str != NULL) {
- new = memory_pool_alloc (task->task_pool, sizeof (struct uri));
- if (new != NULL) {
- rc = parse_uri (new, url_str, task->task_pool);
- if (rc != URI_ERRNO_OK) {
- msg_debug ("url_parse_text: error while parsing url %s: %s", url_str, url_strerror (rc));
- }
- if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST) {
- TAILQ_INSERT_TAIL (&task->urls, new, next);
+ if (g_tree_lookup (is_html ? part->html_urls : part->urls, url_str) == NULL) {
+ new = memory_pool_alloc (pool, sizeof (struct uri));
+ if (new != NULL) {
+ rc = parse_uri (new, url_str, pool);
+ if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST) {
+ g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new);
+ task->urls = g_list_prepend (task->urls, new);
+ }
}
}
}
diff --git a/src/url.h b/src/url.h
index 2c367548d..7860f544a 100644
--- a/src/url.h
+++ b/src/url.h
@@ -6,6 +6,7 @@
#include "mem_pool.h"
struct worker_task;
+struct mime_text_part;
struct uri {
/* The start of the uri (and thus start of the protocol string). */
@@ -73,7 +74,7 @@ enum protocol {
#define struri(uri) ((uri)->string)
-void url_parse_text (struct worker_task *task, GByteArray *part, gboolean is_html);
+void url_parse_text (memory_pool_t *pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html);
enum uri_errno parse_uri(struct uri *uri, unsigned char *uristring, memory_pool_t *pool);
#endif
diff --git a/src/worker.c b/src/worker.c
index 3f223241b..af4575919 100644
--- a/src/worker.c
+++ b/src/worker.c
@@ -130,6 +130,9 @@ free_task (struct worker_task *task, gboolean is_soft)
if (task->text_parts) {
g_list_free (task->text_parts);
}
+ if (task->urls) {
+ g_list_free (task->urls);
+ }
memory_pool_delete (task->task_pool);
if (is_soft) {
/* Plan dispatcher shutdown */
@@ -287,7 +290,6 @@ accept_socket (int fd, short what, void *arg)
#endif
io_tv.tv_sec = WORKER_IO_TIMEOUT;
io_tv.tv_usec = 0;
- TAILQ_INIT (&new_task->urls);
new_task->task_pool = memory_pool_new (memory_pool_get_size ());
/* Add destructor for recipients list (it would be better to use anonymous function here */
memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task);
diff --git a/test/rspamd_url_test.c b/test/rspamd_url_test.c
index 36c9e439b..f716c1ab0 100644
--- a/test/rspamd_url_test.c
+++ b/test/rspamd_url_test.c
@@ -70,51 +70,5 @@ const char *test_html = "<some_tag>This is test file with <a href=\"http://micro
void
rspamd_url_test_func ()
{
- GByteArray *text, *html;
- struct worker_task task;
- struct uri *url;
- int i = 0;
-
- text = g_byte_array_new();
- text->data = (gchar *)test_text;
- text->len = strlen (test_text);
- html = g_byte_array_new();
- html->data = (gchar *)test_html;
- html->len = strlen (test_html);
- bzero (&task, sizeof (task));
- TAILQ_INIT (&task.urls);
- task.task_pool = memory_pool_new (8192);
-
- g_test_timer_start ();
- g_test_message ("Testing text URL regexp parser");
- url_parse_text (&task, text, FALSE);
-
- TAILQ_FOREACH (url, &task.urls, next) {
- msg_debug ("Found url: %s, hostname: %s, data: %s", struri (url), url->host, url->data);
- i ++;
- }
-
- while (!TAILQ_EMPTY (&task.urls)) {
- url = TAILQ_FIRST (&task.urls);
- TAILQ_REMOVE (&task.urls, url, next);
- }
- /* g_assert (i == 39); */
-
- msg_debug ("Time elapsed: %.2f", g_test_timer_elapsed ());
- i = 0;
- g_test_timer_start ();
- g_test_message ("Testing html URL regexp parser");
- url_parse_text (&task, html, TRUE);
-
- TAILQ_FOREACH (url, &task.urls, next) {
- msg_debug ("Found url: %s, hostname: %s, data: %s", struri (url), url->host, url->data);
- i ++;
- }
-
- while (!TAILQ_EMPTY (&task.urls)) {
- url = TAILQ_FIRST (&task.urls);
- TAILQ_REMOVE (&task.urls, url, next);
- }
- g_assert (i == 1);
- msg_debug ("Time elapsed: %.2f", g_test_timer_elapsed ());
+ /* XXX: maybe write test for this */
}
diff --git a/utils/url_extracter.c b/utils/url_extracter.c
index ac8e8be4e..97bf72c47 100644
--- a/utils/url_extracter.c
+++ b/utils/url_extracter.c
@@ -24,107 +24,24 @@
#include "../src/main.h"
#include "../src/cfg_file.h"
#include "../src/url.h"
+#include "../src/util.h"
#include "../src/message.h"
rspamd_hash_t *counters = NULL;
-#ifdef GMIME24
-static void
-mime_foreach_callback (GMimeObject *parent, GMimeObject *part, gpointer user_data)
-#else
-static void
-mime_foreach_callback (GMimeObject *part, gpointer user_data)
-#endif
-{
- struct worker_task *task = (struct worker_task *)user_data;
- struct mime_part *mime_part;
- GMimeContentType *type;
- GMimeDataWrapper *wrapper;
- GMimeStream *part_stream;
- GByteArray *part_content;
- GMimeMessage *message;
-
- /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
-
- /* find out what class 'part' is... */
- if (GMIME_IS_MESSAGE_PART (part)) {
- /* message/rfc822 or message/news */
- printf ("Message part found\n");
-
- /* g_mime_message_foreach_part() won't descend into
- child message parts, so if we want to count any
- subparts of this child message, we'll have to call
- g_mime_message_foreach_part() again here. */
-
- message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
-#ifdef GMIME24
- g_mime_message_foreach (message, mime_foreach_callback, task);
-#else
- g_mime_message_foreach_part (message, mime_foreach_callback, task);
-#endif
- g_object_unref (message);
- } else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
- /* message/partial */
- printf ("Message/partial part found\n");
-
- /* this is an incomplete message part, probably a
- large message that the sender has broken into
- smaller parts and is sending us bit by bit. we
- could save some info about it so that we could
- piece this back together again once we get all the
- parts? */
- } else if (GMIME_IS_MULTIPART (part)) {
- /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
-
- /* we'll get to finding out if this is a signed/encrypted multipart later... */
- } else if (GMIME_IS_PART (part)) {
- printf ("Normal part found\n");
- /* a normal leaf part, could be text/plain or image/jpeg etc */
- wrapper = g_mime_part_get_content_object (GMIME_PART (part));
- if (wrapper != NULL) {
- part_stream = g_mime_stream_mem_new ();
- printf ("Get new wrapper object for normal part\n");
- if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
- printf ("Write wrapper to stream\n");
- part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
-#ifdef GMIME24
- type = (GMimeContentType *)g_mime_object_get_content_type (GMIME_OBJECT (part));
-#else
- type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part));
-#endif
- mime_part = g_malloc (sizeof (struct mime_part));
- mime_part->type = type;
- mime_part->content = part_content;
- task->parts = g_list_prepend (task->parts, mime_part);
- if (g_mime_content_type_is_type (type, "text", "html")) {
- printf ("Found text/html part\n");
- url_parse_text (task, part_content, TRUE);
- }
- else if (g_mime_content_type_is_type (type, "text", "plain")) {
- printf ("Found text/plain part\n");
- url_parse_text (task, part_content, FALSE);
- }
- }
- }
- } else {
- g_assert_not_reached ();
- }
-}
-
int
main (int argc, char **argv)
{
- GMimeMessage *message;
- GMimeParser *parser;
- GMimeStream *stream;
struct worker_task task;
struct uri *url;
char *buf = NULL;
size_t pos = 0, size = 65535;
+ GList *cur;
g_mem_set_vtable(glib_mem_profiler_table);
g_mime_init (0);
bzero (&task, sizeof (struct worker_task));
+ task.task_pool = memory_pool_new (memory_pool_get_size ());
/* Preallocate buffer */
buf = g_malloc (size);
@@ -137,32 +54,23 @@ main (int argc, char **argv)
buf = g_realloc (buf, size);
}
}
-
- stream = g_mime_stream_mem_new_with_buffer (buf, pos);
- /* create a new parser object to parse the stream */
- parser = g_mime_parser_new_with_stream (stream);
-
- /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
- g_object_unref (stream);
-
- /* parse the message from the stream */
- message = g_mime_parser_construct_message (parser);
- task.message = message;
- task.task_pool = memory_pool_new (memory_pool_get_size ());
- TAILQ_INIT (&task.urls);
-
- /* free the parser (and the stream) */
- g_object_unref (parser);
+ task.cfg = memory_pool_alloc0 (task.task_pool, sizeof (struct config_file));
+ task.cfg->log_level = G_LOG_LEVEL_CRITICAL;
+ task.cfg->log_fd = STDERR_FILENO;
+ g_log_set_default_handler (file_log_function, task.cfg);
-#ifdef GMIME24
- g_mime_message_foreach (message, mime_foreach_callback, &task);
-#else
- g_mime_message_foreach_part (message, mime_foreach_callback, &task);
-#endif
-
- TAILQ_FOREACH (url, &task.urls, next) {
- printf ("Found url: %s, hostname: %s, data: %s\n", struri (url), url->host, url->data);
+ task.msg = memory_pool_alloc (task.task_pool, sizeof (f_str_t));
+ task.msg->begin = buf;
+ task.msg->len = pos;
+ process_message (&task);
+
+ cur = task.urls;
+ while (cur) {
+ url = cur->data;
+ printf ("%s\n", struri (url));
+ cur = g_list_next (cur);
}
-
+
+ return 0;
}