diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-06-06 16:35:57 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-06-06 16:35:57 +0100 |
commit | a8e5e53c38ec2289c240008bf918ec086ec7a08a (patch) | |
tree | 3cd6148c92d8be9cafc0cb57e9e85837839de78d /src/libmime | |
parent | 573f8cd97a5f24949977b856b0108844b5bacf88 (diff) | |
download | rspamd-a8e5e53c38ec2289c240008bf918ec086ec7a08a.tar.gz rspamd-a8e5e53c38ec2289c240008bf918ec086ec7a08a.zip |
[Feature] Split message parsing and processing
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/message.c | 84 | ||||
-rw-r--r-- | src/libmime/message.h | 6 |
2 files changed, 51 insertions, 39 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 5681c3d00..e5e43c5be 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -907,13 +907,10 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, gboolean rspamd_message_parse (struct rspamd_task *task) { - struct rspamd_mime_text_part *p1, *p2; struct received_header *recv, *trecv; const gchar *p; gsize len; guint i; - gdouble diff, *pdiff; - guint tw, *ptw, dw; GError *err = NULL; rspamd_cryptobox_hash_state_t st; guchar digest_out[rspamd_cryptobox_HASHBYTES]; @@ -1016,16 +1013,6 @@ rspamd_message_parse (struct rspamd_task *task) task->queue_id = "undef"; } - for (i = 0; i < task->parts->len; i ++) { - struct rspamd_mime_part *part; - - part = g_ptr_array_index (task->parts, i); - rspamd_message_process_text_part (task, part); - } - - rspamd_images_process (task); - rspamd_archives_process (task); - if (task->received->len > 0) { gboolean need_recv_correction = FALSE; rspamd_inet_addr_t *raddr; @@ -1130,6 +1117,50 @@ rspamd_message_parse (struct rspamd_task *task) rspamd_url_task_subject_callback, task); } + for (i = 0; i < task->parts->len; i ++) { + struct rspamd_mime_part *part; + + part = g_ptr_array_index (task->parts, i); + rspamd_cryptobox_hash_update (&st, part->digest, sizeof (part->digest)); + } + + rspamd_cryptobox_hash_final (&st, digest_out); + memcpy (task->digest, digest_out, sizeof (task->digest)); + + if (task->queue_id) { + msg_info_task ("loaded message; id: <%s>; queue-id: <%s>; size: %z; " + "checksum: <%*xs>", + task->message_id, task->queue_id, task->msg.len, + (gint)sizeof (task->digest), task->digest); + } + else { + msg_info_task ("loaded message; id: <%s>; size: %z; " + "checksum: <%*xs>", + task->message_id, task->msg.len, + (gint)sizeof (task->digest), task->digest); + } + + return TRUE; +} + +void +rspamd_message_process (struct rspamd_task *task) +{ + guint i; + struct rspamd_mime_text_part *p1, *p2; + gdouble diff, *pdiff; + guint tw, *ptw, dw; + + for (i = 0; i < task->parts->len; i ++) { + struct rspamd_mime_part *part; + + part = g_ptr_array_index (task->parts, i); + rspamd_message_process_text_part (task, part); + } + + rspamd_images_process (task); + rspamd_archives_process (task); + /* Calculate distance for 2-parts messages */ if (task->text_parts->len == 2) { p1 = g_ptr_array_index (task->text_parts, 0); @@ -1144,7 +1175,7 @@ rspamd_message_parse (struct rspamd_task *task) if (rspamd_ftok_cmp (&p1->mime_part->parent_part->ct->subtype, &srch) == 0) { if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) && - p1->normalized_hashes && p2->normalized_hashes) { + p1->normalized_hashes && p2->normalized_hashes) { /* * We also detect language on one part and propagate it to * another one @@ -1219,13 +1250,6 @@ rspamd_message_parse (struct rspamd_task *task) } } - for (i = 0; i < task->parts->len; i ++) { - struct rspamd_mime_part *part; - - part = g_ptr_array_index (task->parts, i); - rspamd_cryptobox_hash_update (&st, part->digest, sizeof (part->digest)); - } - /* Calculate average words length and number of short words */ struct rspamd_mime_text_part *text_part; gdouble *var; @@ -1258,24 +1282,6 @@ rspamd_message_parse (struct rspamd_task *task) *var /= (double)total_words; } } - - rspamd_cryptobox_hash_final (&st, digest_out); - memcpy (task->digest, digest_out, sizeof (task->digest)); - - if (task->queue_id) { - msg_info_task ("loaded message; id: <%s>; queue-id: <%s>; size: %z; " - "checksum: <%*xs>", - task->message_id, task->queue_id, task->msg.len, - (gint)sizeof (task->digest), task->digest); - } - else { - msg_info_task ("loaded message; id: <%s>; size: %z; " - "checksum: <%*xs>", - task->message_id, task->msg.len, - (gint)sizeof (task->digest), task->digest); - } - - return TRUE; } diff --git a/src/libmime/message.h b/src/libmime/message.h index 0ed2a5c66..b16011666 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -143,6 +143,12 @@ struct received_header { gboolean rspamd_message_parse (struct rspamd_task *task); /** + * Process content in task (e.g. HTML parsing) + * @param task + */ +void rspamd_message_process (struct rspamd_task *task); + +/** * Get an array of header's values with specified header's name using raw headers * @param task worker task structure * @param field header's name |