From 141617dab47bf741af4578c656d4cda5f18742ed Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 1 Jun 2021 20:18:47 +0100 Subject: [PATCH] [Rework] Html: Add images processing logic --- src/libmime/images.c | 49 ++++++++++++++----------------------- src/libserver/html/html.cxx | 32 ++++++++++++++++++++++++ src/libserver/html/html.h | 10 ++++++++ 3 files changed, 60 insertions(+), 31 deletions(-) diff --git a/src/libmime/images.c b/src/libmime/images.c index 960036d78..4e0872f38 100644 --- a/src/libmime/images.c +++ b/src/libmime/images.c @@ -658,8 +658,8 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa struct rspamd_mime_header *rh; struct rspamd_mime_text_part *tp; struct html_image *himg; - const gchar *cid, *html_cid; - guint cid_len, i, j; + const gchar *cid; + guint cid_len, i; struct rspamd_image *img; img = (struct rspamd_image *)part->specific.img; @@ -684,35 +684,22 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa } PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { - if (IS_TEXT_PART_HTML (tp) && tp->html != NULL && - tp->html->images != NULL) { - for (j = 0; j < tp->html->images->len; j ++) { - himg = g_ptr_array_index (tp->html->images, j); - - if ((himg->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED) && - himg->src) { - html_cid = himg->src; - - if (strncmp (html_cid, "cid:", 4) == 0) { - html_cid += 4; - } - - if (strlen (html_cid) == cid_len && - memcmp (html_cid, cid, cid_len) == 0) { - img->html_image = himg; - himg->embedded_image = img; - - msg_debug_images ("found linked image by cid: <%s>", - cid); - - if (himg->height == 0) { - himg->height = img->height; - } - - if (himg->width == 0) { - himg->width = img->width; - } - } + if (IS_TEXT_PART_HTML (tp) && tp->html != NULL) { + himg = rspamd_html_find_embedded_image(tp->html, cid, cid_len); + + if (himg != NULL) { + img->html_image = himg; + himg->embedded_image = img; + + msg_debug_images ("found linked image by cid: <%s>", + cid); + + if (himg->height == 0) { + himg->height = img->height; + } + + if (himg->width == 0) { + himg->width = img->width; } } } diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 45a9afa18..00f1d331f 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -2277,6 +2277,23 @@ html_process_part_full (rspamd_mempool_t *pool, return hc; } +static auto +html_find_image_by_cid(const html_content &hc, std::string_view cid) + -> std::optional +{ + for (const auto *html_image : hc.images) { + /* Filter embedded images */ + if (html_image->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED && + html_image->src != nullptr) { + if (cid == html_image->src) { + return html_image; + } + } + } + + return std::nullopt; +} + } void * @@ -2355,4 +2372,19 @@ rspamd_html_tag_name(void *p, gsize *len) } return tag->name.data(); +} + +struct html_image* +rspamd_html_find_embedded_image(void *html_content, + const char *cid, gsize cid_len) +{ + auto *hc = rspamd::html::html_content::from_ptr(html_content); + + auto maybe_img = rspamd::html::html_find_image_by_cid(*hc, {cid, cid_len}); + + if (maybe_img) { + return (html_image *)maybe_img.value(); + } + + return nullptr; } \ No newline at end of file diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h index 94063b9be..1e71d0c2d 100644 --- a/src/libserver/html/html.h +++ b/src/libserver/html/html.h @@ -144,6 +144,16 @@ gint rspamd_html_tag_by_name(const gchar *name); */ const gchar *rspamd_html_tag_name(void *tag, gsize *len); +/** + * Find HTML image by content id + * @param html_content + * @param cid + * @param cid_len + * @return + */ +struct html_image* rspamd_html_find_embedded_image(void *html_content, + const char *cid, gsize cid_len); + #ifdef __cplusplus } -- 2.39.5