aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-23 13:02:24 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-23 13:02:24 +0100
commitc5580a4ac9b8eeb44b80c0fbed78f5ecb541687e (patch)
tree4831144297deb553f3c66e9315cb50a2798b3482 /src
parent908078ff19c650fb1275c5468b3011738c3cb09e (diff)
downloadrspamd-c5580a4ac9b8eeb44b80c0fbed78f5ecb541687e.tar.gz
rspamd-c5580a4ac9b8eeb44b80c0fbed78f5ecb541687e.zip
Add HTML images concept.
Diffstat (limited to 'src')
-rw-r--r--src/libserver/html.c95
-rw-r--r--src/libserver/html.h17
2 files changed, 102 insertions, 10 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 16d966c81..e9b7eab18 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -982,6 +982,25 @@ rspamd_html_parse_tag_component (rspamd_mempool_t *pool,
ret = TRUE;
}
}
+ else if (tag->id == Tag_IMG) {
+ /* Check width and height if presented */
+ if (len == 5 && g_ascii_strncasecmp (begin, "width", len) == 0) {
+ comp = rspamd_mempool_alloc (pool, sizeof (*comp));
+ comp->type = RSPAMD_HTML_COMPONENT_WIDTH;
+ comp->start = NULL;
+ comp->len = 0;
+ tag->params = g_list_prepend (tag->params, comp);
+ ret = TRUE;
+ }
+ else if (len == 5 && g_ascii_strncasecmp (begin, "height", len) == 0) {
+ comp = rspamd_mempool_alloc (pool, sizeof (*comp));
+ comp->type = RSPAMD_HTML_COMPONENT_HEIGHT;
+ comp->start = NULL;
+ comp->len = 0;
+ tag->params = g_list_prepend (tag->params, comp);
+ ret = TRUE;
+ }
+ }
return ret;
}
@@ -1282,6 +1301,57 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag)
return NULL;
}
+static void
+rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc)
+{
+ struct html_tag_component *comp;
+ struct html_image *img;
+ rspamd_fstring_t fstr;
+ GList *cur;
+ gulong val;
+
+ cur = tag->params;
+ img = rspamd_mempool_alloc0 (pool, sizeof (*img));
+
+ while (cur) {
+ comp = cur->data;
+
+ if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
+ fstr.begin = (gchar *)comp->start;
+ fstr.len = comp->len;
+ img->src = rspamd_mempool_fstrdup (pool, &fstr);
+
+ if (comp->len > sizeof ("cid:") - 1 && memcmp (comp->start,
+ "cid:", sizeof ("cid:") - 1) == 0) {
+ /* We have an embedded image */
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+ }
+ else {
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+ }
+ }
+ else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) {
+ if (rspamd_strtoul (comp->start, comp->len, &val)) {
+ img->height = val;
+ }
+ }
+ else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) {
+ if (rspamd_strtoul (comp->start, comp->len, &val)) {
+ img->width = val;
+ }
+ }
+ }
+
+ if (hc->images == NULL) {
+ hc->images = g_ptr_array_sized_new (4);
+ rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
+ hc->images);
+ }
+
+ g_ptr_array_add (hc->images, img);
+}
+
GByteArray*
rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
GByteArray *in, GList **exceptions, GHashTable *urls, GHashTable *emails)
@@ -1626,16 +1696,18 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
target_tbl = urls;
}
- turl = g_hash_table_lookup (target_tbl, url);
-
- if (turl != NULL && turl->phished_url == NULL) {
- g_hash_table_insert (target_tbl, url, url);
- }
- else if (turl == NULL) {
- g_hash_table_insert (target_tbl, url, url);
- }
- else {
- url = NULL;
+ if (target_tbl != NULL) {
+ turl = g_hash_table_lookup (target_tbl, url);
+
+ if (turl != NULL && turl->phished_url == NULL) {
+ g_hash_table_insert (target_tbl, url, url);
+ }
+ else if (turl == NULL) {
+ g_hash_table_insert (target_tbl, url, url);
+ }
+ else {
+ url = NULL;
+ }
}
href_offset = dest->len;
@@ -1662,6 +1734,9 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
url = NULL;
}
}
+ else if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
+ rspamd_html_process_img_tag (pool, cur_tag, hc);
+ }
}
else {
state = content_write;
diff --git a/src/libserver/html.h b/src/libserver/html.h
index 5516594e4..29716eb75 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -8,6 +8,9 @@
#include "config.h"
#include "mem_pool.h"
+/*
+ * HTML content flags
+ */
#define RSPAMD_HTML_FLAG_BAD_START (1 << 0)
#define RSPAMD_HTML_FLAG_BAD_ELEMENTS (1 << 1)
#define RSPAMD_HTML_FLAG_XML (1 << 2)
@@ -15,6 +18,12 @@
#define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
#define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)
+/*
+ * Image flags
+ */
+#define RSPAMD_HTML_FLAG_IMAGE_EMBEDDED (1 << 0)
+#define RSPAMD_HTML_FLAG_IMAGE_EXTERNAL (1 << 1)
+
enum html_component_type {
RSPAMD_HTML_COMPONENT_NAME = 0,
RSPAMD_HTML_COMPONENT_HREF,
@@ -29,6 +38,13 @@ struct html_tag_component {
guint len;
};
+struct html_image {
+ guint height;
+ guint width;
+ guint flags;
+ gchar *src;
+};
+
struct html_tag {
gint id;
struct html_tag_component name;
@@ -43,6 +59,7 @@ struct html_content {
GNode *html_tags;
gint flags;
guchar *tags_seen;
+ GPtrArray *images;
};
/*