summaryrefslogtreecommitdiffstats
path: root/src/message.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-03-23 14:10:07 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-03-23 14:10:07 +0300
commitc79b5ccd22cbc1c273479f4f88189a18effda533 (patch)
tree1741743779a70146a61cd1767936aa43d671e36b /src/message.c
parentafdaddc4d0745a5bcefad73dd74fd4c03ae3de15 (diff)
downloadrspamd-c79b5ccd22cbc1c273479f4f88189a18effda533.tar.gz
rspamd-c79b5ccd22cbc1c273479f4f88189a18effda533.zip
* Fix error in expression parser that causes bad errors with expressions that have regexp at the end
* Improve test for fuzzy hashes * Add new object - TextPart to perl XS library that allows access to stripped parts and fuzzy hashes * Add documentation for expressions parser and fot Mail::Rspamd::TextPart * Allways calculate fuzzy hash for text parts * Store text parts separately from other parts * Add compare_parts_distance for expressions that calculates difference in 2 parts messages * Do not try to substitute variables in empty strings
Diffstat (limited to 'src/message.c')
-rw-r--r--src/message.c19
1 files changed, 17 insertions, 2 deletions
diff --git a/src/message.c b/src/message.c
index 76743f7de..807463a82 100644
--- a/src/message.c
+++ b/src/message.c
@@ -242,6 +242,7 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
{
struct worker_task *task = (struct worker_task *)user_data;
struct mime_part *mime_part;
+ struct mime_text_part *text_part;
GMimeContentType *type;
GMimeDataWrapper *wrapper;
GMimeStream *part_stream;
@@ -302,13 +303,27 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
mime_part->content = part_content;
msg_debug ("mime_foreach_callback: found part with content-type: %s/%s", type->type, type->subtype);
task->parts = g_list_prepend (task->parts, mime_part);
- if (g_mime_content_type_is_type (type, "text", "html")) {
+ /* Now do special processing for text parts of message */
+ if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) {
msg_debug ("mime_foreach_callback: got urls from text/html part");
url_parse_html (task, part_content);
+
+ text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
+ text_part->content = strip_html_tags (part_content, NULL);
+ text_part->is_html = TRUE;
+ text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)free_byte_array_callback, text_part->content);
+ task->text_parts = g_list_prepend (task->text_parts, text_part);
}
else if (g_mime_content_type_is_type (type, "text", "plain")) {
- url_parse_text (task, part_content);
msg_debug ("mime_foreach_callback: got urls from text/plain part");
+ url_parse_text (task, part_content);
+
+ text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
+ text_part->content = part_content;
+ text_part->is_html = FALSE;
+ text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
+ task->text_parts = g_list_prepend (task->text_parts, text_part);
}
}
else {