Browse Source

[Feature] Core: Process data urls for images

tags/1.9.0
Vsevolod Stakhov 5 years ago
parent
commit
b28a5df944
1 changed files with 63 additions and 1 deletions
  1. 63
    1
      src/libserver/html.c

+ 63
- 1
src/libserver/html.c View File

@@ -23,6 +23,7 @@
#include "html_entities.h"
#include "url.h"
#include "contrib/libucl/khash.h"
#include "libmime/images.h"

#include <unicode/uversion.h>
#include <unicode/ucnv.h>
@@ -1482,6 +1483,58 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
}
}

static void
rspamd_html_process_data_image (rspamd_mempool_t *pool,
struct html_image *img,
struct html_tag_component *src)
{
/*
* Here, we do very basic processing of the data:
* detect if we have something like: `data:image/xxx;base64,yyyzzz==`
* We only parse base64 encoded data.
* We ignore content type so far
*/
struct rspamd_image *parsed_image;
const gchar *semicolon_pos = NULL, *end = src->start + src->len;

semicolon_pos = src->start;

while ((semicolon_pos = memchr (semicolon_pos, ';', end - semicolon_pos)) != NULL) {
if (end - semicolon_pos > sizeof ("base64,")) {
if (memcmp (semicolon_pos + 1, "base64,", sizeof ("base64,") - 1) == 0) {
const gchar *data_pos = semicolon_pos + sizeof ("base64,");
gchar *decoded;
gsize encoded_len = end - data_pos, decoded_len;
rspamd_ftok_t inp;

decoded_len = (encoded_len / 4 * 3) + 12;
decoded = rspamd_mempool_alloc (pool, decoded_len);
rspamd_cryptobox_base64_decode (data_pos, encoded_len,
decoded, &decoded_len);
inp.begin = decoded;
inp.len = decoded_len;

parsed_image = rspamd_maybe_process_image (pool, &inp);

if (parsed_image) {
msg_debug_html ("detected %s image of size %ud x %ud in data url",
rspamd_image_type_str (parsed_image->type),
parsed_image->width, parsed_image->height);
img->embedded_image = parsed_image;
}
}

break;
}
else {
/* Nothing useful */
return;
}

semicolon_pos ++;
}
}

static void
rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
struct html_content *hc)
@@ -1517,7 +1570,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
/* We have an embedded image in HTML tag */
img->flags |=
(RSPAMD_HTML_FLAG_IMAGE_EMBEDDED|RSPAMD_HTML_FLAG_IMAGE_DATA);
rspamd_html_process_data_image (pool, img, comp);
}
else {
img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
@@ -1593,6 +1646,15 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
hc->images);
}

if (img->embedded_image) {
if (!seen_height) {
img->height = img->embedded_image->height;
}
if (!seen_width) {
img->width = img->embedded_image->width;
}
}

g_ptr_array_add (hc->images, img);
tag->extra = img;
}

Loading…
Cancel
Save