diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-23 13:02:24 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-23 13:02:24 +0100 |
commit | c5580a4ac9b8eeb44b80c0fbed78f5ecb541687e (patch) | |
tree | 4831144297deb553f3c66e9315cb50a2798b3482 /src | |
parent | 908078ff19c650fb1275c5468b3011738c3cb09e (diff) | |
download | rspamd-c5580a4ac9b8eeb44b80c0fbed78f5ecb541687e.tar.gz rspamd-c5580a4ac9b8eeb44b80c0fbed78f5ecb541687e.zip |
Add HTML images concept.
Diffstat (limited to 'src')
-rw-r--r-- | src/libserver/html.c | 95 | ||||
-rw-r--r-- | src/libserver/html.h | 17 |
2 files changed, 102 insertions, 10 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index 16d966c81..e9b7eab18 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -982,6 +982,25 @@ rspamd_html_parse_tag_component (rspamd_mempool_t *pool, ret = TRUE; } } + else if (tag->id == Tag_IMG) { + /* Check width and height if presented */ + if (len == 5 && g_ascii_strncasecmp (begin, "width", len) == 0) { + comp = rspamd_mempool_alloc (pool, sizeof (*comp)); + comp->type = RSPAMD_HTML_COMPONENT_WIDTH; + comp->start = NULL; + comp->len = 0; + tag->params = g_list_prepend (tag->params, comp); + ret = TRUE; + } + else if (len == 5 && g_ascii_strncasecmp (begin, "height", len) == 0) { + comp = rspamd_mempool_alloc (pool, sizeof (*comp)); + comp->type = RSPAMD_HTML_COMPONENT_HEIGHT; + comp->start = NULL; + comp->len = 0; + tag->params = g_list_prepend (tag->params, comp); + ret = TRUE; + } + } return ret; } @@ -1282,6 +1301,57 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) return NULL; } +static void +rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, + struct html_content *hc) +{ + struct html_tag_component *comp; + struct html_image *img; + rspamd_fstring_t fstr; + GList *cur; + gulong val; + + cur = tag->params; + img = rspamd_mempool_alloc0 (pool, sizeof (*img)); + + while (cur) { + comp = cur->data; + + if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) { + fstr.begin = (gchar *)comp->start; + fstr.len = comp->len; + img->src = rspamd_mempool_fstrdup (pool, &fstr); + + if (comp->len > sizeof ("cid:") - 1 && memcmp (comp->start, + "cid:", sizeof ("cid:") - 1) == 0) { + /* We have an embedded image */ + img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED; + } + else { + img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL; + } + } + else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) { + if (rspamd_strtoul (comp->start, comp->len, &val)) { + img->height = val; + } + } + else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) { + if (rspamd_strtoul (comp->start, comp->len, &val)) { + img->width = val; + } + } + } + + if (hc->images == NULL) { + hc->images = g_ptr_array_sized_new (4); + rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard, + hc->images); + } + + g_ptr_array_add (hc->images, img); +} + GByteArray* rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in, GList **exceptions, GHashTable *urls, GHashTable *emails) @@ -1626,16 +1696,18 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, target_tbl = urls; } - turl = g_hash_table_lookup (target_tbl, url); - - if (turl != NULL && turl->phished_url == NULL) { - g_hash_table_insert (target_tbl, url, url); - } - else if (turl == NULL) { - g_hash_table_insert (target_tbl, url, url); - } - else { - url = NULL; + if (target_tbl != NULL) { + turl = g_hash_table_lookup (target_tbl, url); + + if (turl != NULL && turl->phished_url == NULL) { + g_hash_table_insert (target_tbl, url, url); + } + else if (turl == NULL) { + g_hash_table_insert (target_tbl, url, url); + } + else { + url = NULL; + } } href_offset = dest->len; @@ -1662,6 +1734,9 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, url = NULL; } } + else if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) { + rspamd_html_process_img_tag (pool, cur_tag, hc); + } } else { state = content_write; diff --git a/src/libserver/html.h b/src/libserver/html.h index 5516594e4..29716eb75 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -8,6 +8,9 @@ #include "config.h" #include "mem_pool.h" +/* + * HTML content flags + */ #define RSPAMD_HTML_FLAG_BAD_START (1 << 0) #define RSPAMD_HTML_FLAG_BAD_ELEMENTS (1 << 1) #define RSPAMD_HTML_FLAG_XML (1 << 2) @@ -15,6 +18,12 @@ #define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4) #define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5) +/* + * Image flags + */ +#define RSPAMD_HTML_FLAG_IMAGE_EMBEDDED (1 << 0) +#define RSPAMD_HTML_FLAG_IMAGE_EXTERNAL (1 << 1) + enum html_component_type { RSPAMD_HTML_COMPONENT_NAME = 0, RSPAMD_HTML_COMPONENT_HREF, @@ -29,6 +38,13 @@ struct html_tag_component { guint len; }; +struct html_image { + guint height; + guint width; + guint flags; + gchar *src; +}; + struct html_tag { gint id; struct html_tag_component name; @@ -43,6 +59,7 @@ struct html_content { GNode *html_tags; gint flags; guchar *tags_seen; + GPtrArray *images; }; /* |