diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-06-25 17:59:04 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-06-25 17:59:04 +0100 |
commit | 0bcc686d0f5a1b3470104b6311dd85d39fa31c88 (patch) | |
tree | c085c8cf6ebf97b34d7ae976e688c84c78a95278 /src/libserver/html.c | |
parent | e077b8fe2076e33281ca73ea741d4ec39a0a5ef4 (diff) | |
download | rspamd-0bcc686d0f5a1b3470104b6311dd85d39fa31c88.tar.gz rspamd-0bcc686d0f5a1b3470104b6311dd85d39fa31c88.zip |
[Feature] Support base tag in HTML
Diffstat (limited to 'src/libserver/html.c')
-rw-r--r-- | src/libserver/html.c | 65 |
1 files changed, 61 insertions, 4 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index 4b7f28e9b..4b3eb9d7f 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1629,11 +1629,14 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, } static struct rspamd_url * -rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) +rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag, + struct html_content *hc) { struct html_tag_component *comp; GList *cur; struct rspamd_url *url; + const gchar *start; + gsize len; cur = tag->params->head; @@ -1641,7 +1644,40 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) comp = cur->data; if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) { - url = rspamd_html_process_url (pool, comp->start, comp->len, comp); + start = comp->start; + len = comp->len; + + /* Check base url */ + if (hc && hc->base_url && comp->len > 0) { + /* + * Relative url canot start from the following: + * schema:// + * slash + */ + + if (comp->start[0] != '/' && + rspamd_substring_search (start, len, "://", 3) == -1) { + /* Assume relative url */ + gchar *buf; + gboolean need_slash = FALSE; + + len += hc->base_url->urllen; + + if (hc->base_url->string[hc->base_url->urllen - 1] != '/') { + need_slash = TRUE; + len ++; + } + + buf = rspamd_mempool_alloc (pool, len + 1); + rspamd_snprintf (buf, len + 1, "%*s%s%*s", + hc->base_url->urllen, hc->base_url->string, + need_slash ? "/" : "", + (gint)len, start); + start = buf; + } + } + + url = rspamd_html_process_url (pool, start, len, comp); if (url && tag->extra == NULL) { tag->extra = url; @@ -2889,7 +2925,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) { if (!(cur_tag->flags & (FL_CLOSING))) { - url = rspamd_html_process_url_tag (pool, cur_tag); + url = rspamd_html_process_url_tag (pool, cur_tag, hc); if (url != NULL) { @@ -2958,7 +2994,28 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, } } else if (cur_tag->id == Tag_LINK) { - url = rspamd_html_process_url_tag (pool, cur_tag); + url = rspamd_html_process_url_tag (pool, cur_tag, hc); + } + else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) { + struct html_tag *prev_tag = NULL; + + if (cur_level && cur_level->parent) { + prev_tag = cur_level->parent->data; + } + + /* + * Base is allowed only within head tag but we slightly + * relax that + */ + if (!prev_tag || prev_tag->id == Tag_HEAD || + prev_tag->id == Tag_HTML) { + url = rspamd_html_process_url_tag (pool, cur_tag, hc); + + if (url != NULL) { + /* We have a base tag available */ + hc->base_url = url; + } + } } if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) { |