]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Allow attaching of urls to the mime parts
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 May 2020 13:59:33 +0000 (14:59 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 May 2020 13:59:33 +0000 (14:59 +0100)
src/libmime/message.c
src/libmime/message.h
src/libmime/mime_parser.c
src/libserver/html.c
src/libserver/html.h
src/libserver/url.c

index 4b00d2dd0575689a33f590068865d5a288c8b6e1..eec9925524cb71898709d445a38f284e2394ed8c 100644 (file)
@@ -758,7 +758,8 @@ rspamd_message_process_html_text_part (struct rspamd_task *task,
                        text_part->html,
                        text_part->utf_raw_content,
                        &text_part->exceptions,
-                       MESSAGE_FIELD (task, urls));
+                       MESSAGE_FIELD (task, urls),
+                       text_part->mime_part->urls);
 
        if (text_part->utf_content->len == 0) {
                text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
@@ -925,6 +926,7 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
        part->parsed_data.begin = start;
        part->parsed_data.len = len;
        part->part_number = MESSAGE_FIELD (task, parts)->len;
+       part->urls = g_ptr_array_new ();
        part->raw_headers = rspamd_message_headers_new ();
        part->headers_order = NULL;
 
@@ -1052,6 +1054,10 @@ rspamd_message_dtor (struct rspamd_message *msg)
                                        LUA_REGISTRYINDEX,
                                        p->specific.lua_specific.cbref);
                }
+
+               if (p->urls) {
+                       g_ptr_array_unref (p->urls);
+               }
        }
 
        PTR_ARRAY_FOREACH (msg->text_parts, i, tp) {
index 96ed9d5d489522bb37546760b25810192fd7aab7..a921d6f3826477d2a86f925aca9f308d06585a8e 100644 (file)
@@ -91,6 +91,7 @@ struct rspamd_mime_part {
 
        struct rspamd_mime_header *headers_order;
        struct rspamd_mime_headers_table *raw_headers;
+       GPtrArray *urls;
 
        gchar *raw_headers_str;
        gsize raw_headers_len;
index 590ee57d6d1bbd127bbc5e15a6f8d33c7e2f4419..4fc37ad3d7b32f513e729ed6944eadefbe64b76e 100644 (file)
@@ -683,6 +683,7 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
        }
 
        part->part_number = MESSAGE_FIELD (task, parts)->len;
+       part->urls = g_ptr_array_new ();
        g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
        msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte",
                        &part->ct->type, &part->ct->subtype, part->parsed_data.len,
@@ -1017,6 +1018,7 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task,
        }
 
        part->part_number = MESSAGE_FIELD (task, parts)->len;
+       part->urls = g_ptr_array_new ();
        g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
        st->nesting ++;
        rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE);
index f8c43bdd5d721bbb371613c92d4a0d286ba7d6ed..b916019d9a174e5fc69e683924bf55a583bbcee2 100644 (file)
@@ -1548,7 +1548,7 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
 
 static struct rspamd_url *
 rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag,
-               struct html_content *hc)
+                                                        struct html_content *hc)
 {
        struct html_tag_component *comp;
        GList *cur;
@@ -1628,6 +1628,7 @@ struct rspamd_html_url_query_cbd {
        rspamd_mempool_t *pool;
        khash_t (rspamd_url_hash) *url_set;
        struct rspamd_url *url;
+       GPtrArray *part_urls;
 };
 
 static gboolean
@@ -1651,14 +1652,18 @@ rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset,
                                        cbd->url->querylen, rspamd_url_query_unsafe (cbd->url));
 
        url->flags |= RSPAMD_URL_FLAG_QUERY;
-       rspamd_url_set_add_or_increase (cbd->url_set, url);
+
+       if (rspamd_url_set_add_or_increase (cbd->url_set, url) && cbd->part_urls) {
+               g_ptr_array_add (cbd->part_urls, url);
+       }
 
        return TRUE;
 }
 
 static void
 rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
-                                                khash_t (rspamd_url_hash) *url_set)
+                                                khash_t (rspamd_url_hash) *url_set,
+                                                GPtrArray *part_urls)
 {
        if (url->flags & RSPAMD_URL_FLAG_UNNORMALISED) {
                url->flags |= RSPAMD_URL_FLAG_OBSCURED;
@@ -1670,12 +1675,17 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
                qcbd.pool = pool;
                qcbd.url_set = url_set;
                qcbd.url = url;
+               qcbd.part_urls = part_urls;
 
                rspamd_url_find_multiple(pool,
                                rspamd_url_query_unsafe (url), url->querylen,
                                RSPAMD_URL_FIND_ALL, NULL,
                                rspamd_html_url_query_callback, &qcbd);
        }
+
+       if (part_urls) {
+               g_ptr_array_add (part_urls, url);
+       }
 }
 
 static void
@@ -1732,7 +1742,8 @@ rspamd_html_process_data_image (rspamd_mempool_t *pool,
 
 static void
 rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
-               struct html_content *hc, khash_t (rspamd_url_hash) *url_set)
+                                                        struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
+                                                        GPtrArray *part_urls)
 {
        struct html_tag_component *comp;
        struct html_image *img;
@@ -1778,7 +1789,11 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
 
                                                if (img->url) {
                                                        img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
-                                                       rspamd_url_set_add_or_increase (url_set, img->url);
+
+                                                       if (rspamd_url_set_add_or_increase (url_set, img->url) &&
+                                                               part_urls) {
+                                                               g_ptr_array_add (part_urls, img->url);
+                                                       }
                                                }
                                        }
                                }
@@ -2603,7 +2618,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
                                                           struct html_content *hc,
                                                           GByteArray *in,
                                                           GList **exceptions,
-                                                          khash_t (rspamd_url_hash) *url_set)
+                                                          khash_t (rspamd_url_hash) *url_set,
+                                                          GPtrArray *part_urls)
 {
        const guchar *p, *c, *end, *savep = NULL;
        guchar t;
@@ -3067,7 +3083,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
 
                                                        if (url_set != NULL) {
                                                                if (rspamd_url_set_add_or_increase (url_set, url)) {
-                                                                       rspamd_process_html_url (pool, url, url_set);
+                                                                       rspamd_process_html_url (pool, url, url_set,
+                                                                                       part_urls);
                                                                }
                                                        }
 
@@ -3129,7 +3146,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
                                }
 
                                if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
-                                       rspamd_html_process_img_tag (pool, cur_tag, hc, url_set);
+                                       rspamd_html_process_img_tag (pool, cur_tag, hc, url_set,
+                                                       part_urls);
                                }
                                else if (cur_tag->flags & FL_BLOCK) {
                                        struct html_block *bl;
@@ -3194,5 +3212,5 @@ rspamd_html_process_part (rspamd_mempool_t *pool,
                struct html_content *hc,
                GByteArray *in)
 {
-       return rspamd_html_process_part_full (pool, hc, in, NULL, NULL);
+       return rspamd_html_process_part_full (pool, hc, in, NULL, NULL, NULL);
 }
index 72eac8d79864d7a3ece2da90f2b25846ae9fbd3e..b319964ce9d7e080c968cb87c60837d0be460291 100644 (file)
@@ -143,7 +143,8 @@ GByteArray *rspamd_html_process_part (rspamd_mempool_t *pool,
 GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool,
                                                                                   struct html_content *hc,
                                                                                   GByteArray *in, GList **exceptions,
-                                                                                  khash_t (rspamd_url_hash) *url_set);
+                                                                                  khash_t (rspamd_url_hash) *url_set,
+                                                                                  GPtrArray *part_urls);
 
 /*
  * Returns true if a specified tag has been seen in a part
index a47d732f7cd28863827ce3c324923384ec1b7311..c10073dcb3802e0160f34e13a0afff60577e23e6 100644 (file)
@@ -3296,7 +3296,13 @@ rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset,
        }
 
        url->flags |= RSPAMD_URL_FLAG_QUERY;
-       rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
+
+       if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url)) {
+               if (cbd->part && cbd->part->mime_part->urls) {
+                       g_ptr_array_add (cbd->part->mime_part->urls, url);
+               }
+       }
 
        return TRUE;
 }
@@ -3347,7 +3353,11 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
        }
 
        url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-       rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
+       if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url) &&
+                       cbd->part->mime_part->urls) {
+               g_ptr_array_add (cbd->part->mime_part->urls, url);
+       }
 
        cbd->part->exceptions = g_list_prepend (
                        cbd->part->exceptions,