From 8a5b3f4940311326250c78f3a9f0abcd7804b868 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 15 Jul 2017 23:16:03 +0100 Subject: [PATCH] [Fix] Parse HREF urls without explicit prefix Issue: #1752 --- src/libserver/html.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/libserver/html.c b/src/libserver/html.c index 7d0a30390..2d57f0b0d 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1453,7 +1453,7 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, const gchar *p, *s; gchar *d; guint i, dlen; - gboolean has_bad_chars = FALSE; + gboolean has_bad_chars = FALSE, no_prefix = FALSE; static const gchar hexdigests[16] = "0123456789abcdef"; p = start; @@ -1495,9 +1495,20 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, } } + if (rspamd_substring_search (s, len, "://", 3) == (-1)) { + /* We have no prefix */ + dlen += sizeof ("http://") - 1; + no_prefix = TRUE; + } + decoded = rspamd_mempool_alloc (pool, dlen + 1); d = decoded; + if (no_prefix) { + memcpy (d, "http://", sizeof ("http://") - 1); + d += sizeof ("http://") - 1; + } + /* We also need to remove all internal newlines and encode unsafe characters */ for (i = 0; i < len; i ++) { if (G_UNLIKELY (s[i] == '\r' || s[i] == '\n')) { -- 2.39.5