summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-06-25 17:59:04 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-06-25 17:59:04 +0100
commit0bcc686d0f5a1b3470104b6311dd85d39fa31c88 (patch)
treec085c8cf6ebf97b34d7ae976e688c84c78a95278 /src
parente077b8fe2076e33281ca73ea741d4ec39a0a5ef4 (diff)
downloadrspamd-0bcc686d0f5a1b3470104b6311dd85d39fa31c88.tar.gz
rspamd-0bcc686d0f5a1b3470104b6311dd85d39fa31c88.zip
[Feature] Support base tag in HTML
Diffstat (limited to 'src')
-rw-r--r--src/libserver/html.c65
-rw-r--r--src/libserver/html.h1
2 files changed, 62 insertions, 4 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 4b7f28e9b..4b3eb9d7f 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1629,11 +1629,14 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
}
static struct rspamd_url *
-rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag)
+rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc)
{
struct html_tag_component *comp;
GList *cur;
struct rspamd_url *url;
+ const gchar *start;
+ gsize len;
cur = tag->params->head;
@@ -1641,7 +1644,40 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag)
comp = cur->data;
if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
- url = rspamd_html_process_url (pool, comp->start, comp->len, comp);
+ start = comp->start;
+ len = comp->len;
+
+ /* Check base url */
+ if (hc && hc->base_url && comp->len > 0) {
+ /*
+ * Relative url canot start from the following:
+ * schema://
+ * slash
+ */
+
+ if (comp->start[0] != '/' &&
+ rspamd_substring_search (start, len, "://", 3) == -1) {
+ /* Assume relative url */
+ gchar *buf;
+ gboolean need_slash = FALSE;
+
+ len += hc->base_url->urllen;
+
+ if (hc->base_url->string[hc->base_url->urllen - 1] != '/') {
+ need_slash = TRUE;
+ len ++;
+ }
+
+ buf = rspamd_mempool_alloc (pool, len + 1);
+ rspamd_snprintf (buf, len + 1, "%*s%s%*s",
+ hc->base_url->urllen, hc->base_url->string,
+ need_slash ? "/" : "",
+ (gint)len, start);
+ start = buf;
+ }
+ }
+
+ url = rspamd_html_process_url (pool, start, len, comp);
if (url && tag->extra == NULL) {
tag->extra = url;
@@ -2889,7 +2925,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) {
if (!(cur_tag->flags & (FL_CLOSING))) {
- url = rspamd_html_process_url_tag (pool, cur_tag);
+ url = rspamd_html_process_url_tag (pool, cur_tag, hc);
if (url != NULL) {
@@ -2958,7 +2994,28 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
}
}
else if (cur_tag->id == Tag_LINK) {
- url = rspamd_html_process_url_tag (pool, cur_tag);
+ url = rspamd_html_process_url_tag (pool, cur_tag, hc);
+ }
+ else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) {
+ struct html_tag *prev_tag = NULL;
+
+ if (cur_level && cur_level->parent) {
+ prev_tag = cur_level->parent->data;
+ }
+
+ /*
+ * Base is allowed only within head tag but we slightly
+ * relax that
+ */
+ if (!prev_tag || prev_tag->id == Tag_HEAD ||
+ prev_tag->id == Tag_HTML) {
+ url = rspamd_html_process_url_tag (pool, cur_tag, hc);
+
+ if (url != NULL) {
+ /* We have a base tag available */
+ hc->base_url = url;
+ }
+ }
}
if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
diff --git a/src/libserver/html.h b/src/libserver/html.h
index 0414f48d9..c7534d309 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -107,6 +107,7 @@ struct html_tag {
struct rspamd_task;
struct html_content {
+ struct rspamd_url *base_url;
GNode *html_tags;
gint flags;
guint total_tags;