summaryrefslogtreecommitdiffstats
path: root/src/message.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-06-02 19:32:34 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-06-02 19:32:34 +0400
commit7bae787900fea17ca82393886217c6287d7e8cea (patch)
tree4f358b3624d7b2ba6c86a25057d4ba7db10965ae /src/message.c
parent4eb2985d1ef3631fca82cbf18cc2e8d7aab9b096 (diff)
downloadrspamd-7bae787900fea17ca82393886217c6287d7e8cea.tar.gz
rspamd-7bae787900fea17ca82393886217c6287d7e8cea.zip
* Rework url parsing algorithms
* Adopt all parts of rspamd for new url parser * Improve url-extracter utility by avoiding cut&paste of mime parsing * Small fixes to rspamc client * Bump version to 0.1.3
Diffstat (limited to 'src/message.c')
-rw-r--r--src/message.c20
1 files changed, 15 insertions, 5 deletions
diff --git a/src/message.c b/src/message.c
index 510d407e9..f664122d0 100644
--- a/src/message.c
+++ b/src/message.c
@@ -301,28 +301,36 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
text_part->is_balanced = TRUE;
text_part->html_nodes = NULL;
text_part->content = strip_html_tags (task->task_pool, text_part, part_content, NULL);
+ text_part->html_urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp);
+ text_part->urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp);
if (text_part->html_nodes == NULL) {
- url_parse_text (task, text_part->orig, FALSE);
+ url_parse_text (task->task_pool, task, text_part, FALSE);
}
else {
- url_parse_text (task, text_part->orig, TRUE);
+ url_parse_text (task->task_pool, task, text_part, FALSE);
+ url_parse_text (task->task_pool, task, text_part, TRUE);
}
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)free_byte_array_callback, text_part->content);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->html_urls);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->urls);
task->text_parts = g_list_prepend (task->text_parts, text_part);
}
else if (g_mime_content_type_is_type (type, "text", "plain")) {
msg_debug ("mime_foreach_callback: got urls from text/plain part");
- url_parse_text (task, part_content, FALSE);
text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
text_part->content = text_part->orig;
text_part->is_html = FALSE;
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
+ text_part->html_urls = NULL;
+ text_part->urls = g_tree_new ( (GCompareFunc)g_ascii_strcasecmp);
+ url_parse_text (task->task_pool, task, text_part, FALSE);
task->text_parts = g_list_prepend (task->text_parts, text_part);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_tree_destroy, text_part->urls);
}
}
@@ -489,8 +497,10 @@ process_message (struct worker_task *task)
if (task->rcpts) {
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)internet_address_list_destroy, task->rcpts);
}
-
- task->worker->srv->stat->messages_scanned ++;
+
+ if (task->worker) {
+ task->worker->srv->stat->messages_scanned ++;
+ }
/* free the parser (and the stream) */
g_object_unref (parser);