diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-05-05 14:59:33 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-05-05 14:59:33 +0100 |
commit | 5dd1ccbb33e53b9a3903970bbd623569869ad008 (patch) | |
tree | a7bb39de8f90e042019e7ed7e9bcd286d174393e | |
parent | 569825264466b743f741514f117afae29c4b8dad (diff) | |
download | rspamd-5dd1ccbb33e53b9a3903970bbd623569869ad008.tar.gz rspamd-5dd1ccbb33e53b9a3903970bbd623569869ad008.zip |
[Minor] Allow attaching of urls to the mime parts
-rw-r--r-- | src/libmime/message.c | 8 | ||||
-rw-r--r-- | src/libmime/message.h | 1 | ||||
-rw-r--r-- | src/libmime/mime_parser.c | 2 | ||||
-rw-r--r-- | src/libserver/html.c | 36 | ||||
-rw-r--r-- | src/libserver/html.h | 3 | ||||
-rw-r--r-- | src/libserver/url.c | 14 |
6 files changed, 51 insertions, 13 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 4b00d2dd0..eec992552 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -758,7 +758,8 @@ rspamd_message_process_html_text_part (struct rspamd_task *task, text_part->html, text_part->utf_raw_content, &text_part->exceptions, - MESSAGE_FIELD (task, urls)); + MESSAGE_FIELD (task, urls), + text_part->mime_part->urls); if (text_part->utf_content->len == 0) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY; @@ -925,6 +926,7 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, part->parsed_data.begin = start; part->parsed_data.len = len; part->part_number = MESSAGE_FIELD (task, parts)->len; + part->urls = g_ptr_array_new (); part->raw_headers = rspamd_message_headers_new (); part->headers_order = NULL; @@ -1052,6 +1054,10 @@ rspamd_message_dtor (struct rspamd_message *msg) LUA_REGISTRYINDEX, p->specific.lua_specific.cbref); } + + if (p->urls) { + g_ptr_array_unref (p->urls); + } } PTR_ARRAY_FOREACH (msg->text_parts, i, tp) { diff --git a/src/libmime/message.h b/src/libmime/message.h index 96ed9d5d4..a921d6f38 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -91,6 +91,7 @@ struct rspamd_mime_part { struct rspamd_mime_header *headers_order; struct rspamd_mime_headers_table *raw_headers; + GPtrArray *urls; gchar *raw_headers_str; gsize raw_headers_len; diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 590ee57d6..4fc37ad3d 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -683,6 +683,7 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, } part->part_number = MESSAGE_FIELD (task, parts)->len; + part->urls = g_ptr_array_new (); g_ptr_array_add (MESSAGE_FIELD (task, parts), part); msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte", &part->ct->type, &part->ct->subtype, part->parsed_data.len, @@ -1017,6 +1018,7 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task, } part->part_number = MESSAGE_FIELD (task, parts)->len; + part->urls = g_ptr_array_new (); g_ptr_array_add (MESSAGE_FIELD (task, parts), part); st->nesting ++; rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE); diff --git a/src/libserver/html.c b/src/libserver/html.c index f8c43bdd5..b916019d9 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1548,7 +1548,7 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, static struct rspamd_url * rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag, - struct html_content *hc) + struct html_content *hc) { struct html_tag_component *comp; GList *cur; @@ -1628,6 +1628,7 @@ struct rspamd_html_url_query_cbd { rspamd_mempool_t *pool; khash_t (rspamd_url_hash) *url_set; struct rspamd_url *url; + GPtrArray *part_urls; }; static gboolean @@ -1651,14 +1652,18 @@ rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset, cbd->url->querylen, rspamd_url_query_unsafe (cbd->url)); url->flags |= RSPAMD_URL_FLAG_QUERY; - rspamd_url_set_add_or_increase (cbd->url_set, url); + + if (rspamd_url_set_add_or_increase (cbd->url_set, url) && cbd->part_urls) { + g_ptr_array_add (cbd->part_urls, url); + } return TRUE; } static void rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, - khash_t (rspamd_url_hash) *url_set) + khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls) { if (url->flags & RSPAMD_URL_FLAG_UNNORMALISED) { url->flags |= RSPAMD_URL_FLAG_OBSCURED; @@ -1670,12 +1675,17 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, qcbd.pool = pool; qcbd.url_set = url_set; qcbd.url = url; + qcbd.part_urls = part_urls; rspamd_url_find_multiple(pool, rspamd_url_query_unsafe (url), url->querylen, RSPAMD_URL_FIND_ALL, NULL, rspamd_html_url_query_callback, &qcbd); } + + if (part_urls) { + g_ptr_array_add (part_urls, url); + } } static void @@ -1732,7 +1742,8 @@ rspamd_html_process_data_image (rspamd_mempool_t *pool, static void rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, - struct html_content *hc, khash_t (rspamd_url_hash) *url_set) + struct html_content *hc, khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls) { struct html_tag_component *comp; struct html_image *img; @@ -1778,7 +1789,11 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, if (img->url) { img->url->flags |= RSPAMD_URL_FLAG_IMAGE; - rspamd_url_set_add_or_increase (url_set, img->url); + + if (rspamd_url_set_add_or_increase (url_set, img->url) && + part_urls) { + g_ptr_array_add (part_urls, img->url); + } } } } @@ -2603,7 +2618,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in, GList **exceptions, - khash_t (rspamd_url_hash) *url_set) + khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls) { const guchar *p, *c, *end, *savep = NULL; guchar t; @@ -3067,7 +3083,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, if (url_set != NULL) { if (rspamd_url_set_add_or_increase (url_set, url)) { - rspamd_process_html_url (pool, url, url_set); + rspamd_process_html_url (pool, url, url_set, + part_urls); } } @@ -3129,7 +3146,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, } if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) { - rspamd_html_process_img_tag (pool, cur_tag, hc, url_set); + rspamd_html_process_img_tag (pool, cur_tag, hc, url_set, + part_urls); } else if (cur_tag->flags & FL_BLOCK) { struct html_block *bl; @@ -3194,5 +3212,5 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in) { - return rspamd_html_process_part_full (pool, hc, in, NULL, NULL); + return rspamd_html_process_part_full (pool, hc, in, NULL, NULL, NULL); } diff --git a/src/libserver/html.h b/src/libserver/html.h index 72eac8d79..b319964ce 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -143,7 +143,8 @@ GByteArray *rspamd_html_process_part (rspamd_mempool_t *pool, GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in, GList **exceptions, - khash_t (rspamd_url_hash) *url_set); + khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls); /* * Returns true if a specified tag has been seen in a part diff --git a/src/libserver/url.c b/src/libserver/url.c index a47d732f7..c10073dcb 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -3296,7 +3296,13 @@ rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset, } url->flags |= RSPAMD_URL_FLAG_QUERY; - rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url); + + + if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url)) { + if (cbd->part && cbd->part->mime_part->urls) { + g_ptr_array_add (cbd->part->mime_part->urls, url); + } + } return TRUE; } @@ -3347,7 +3353,11 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, } url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; - rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url); + + if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url) && + cbd->part->mime_part->urls) { + g_ptr_array_add (cbd->part->mime_part->urls, url); + } cbd->part->exceptions = g_list_prepend ( cbd->part->exceptions, |