From c4274f305a103d22f51d2b843720877a091e688f Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 12 Aug 2017 19:29:02 +0100 Subject: [PATCH] [Fix] Use raw urls when sending requests to redirector --- src/libserver/url.c | 72 +++++++++++++++--------------- src/libserver/url.h | 4 ++ src/plugins/lua/url_redirector.lua | 2 +- 3 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/libserver/url.c b/src/libserver/url.c index cc7c7acbf..918f7eee3 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1414,6 +1414,7 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen, memmove (uri->string + uri->protocollen, uri->string + old_shift, uri->urllen - uri->protocollen); uri->urllen -= shift; + uri->flags |= RSPAMD_URL_FLAG_SCHEMAENCODED; break; case UF_HOST: if (nlen >= uri->hostlen) { @@ -1428,6 +1429,7 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen, memmove (uri->host + uri->hostlen, uri->host + old_shift, uri->datalen + uri->querylen + uri->fragmentlen); uri->urllen -= shift; + uri->flags |= RSPAMD_URL_FLAG_HOSTENCODED; break; case UF_PATH: if (nlen >= uri->datalen) { @@ -1442,6 +1444,7 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen, memmove (uri->data + uri->datalen, uri->data + old_shift, uri->querylen + uri->fragmentlen); uri->urllen -= shift; + uri->flags |= RSPAMD_URL_FLAG_PATHENCODED; break; case UF_QUERY: if (nlen >= uri->querylen) { @@ -1456,6 +1459,7 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen, memmove (uri->query + uri->querylen, uri->query + old_shift, uri->fragmentlen); uri->urllen -= shift; + uri->flags |= RSPAMD_URL_FLAG_QUERYENCODED; break; case UF_FRAGMENT: if (nlen >= uri->fragmentlen) { @@ -1542,42 +1546,46 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, } if (end > uristring && (guint) (end - uristring) != len) { - /* We have extra data at the end of uri, so we are ignoring it for now */ - p = rspamd_mempool_alloc (pool, end - uristring + 1); - rspamd_strlcpy (p, uristring, end - uristring + 1); len = end - uristring; } + uri->raw = p; + uri->rawlen = len; + uri->string = rspamd_mempool_alloc (pool, len + 1); + rspamd_strlcpy (uri->string, p, len + 1); + uri->urllen = len; + for (i = 0; i < UF_MAX; i++) { if (u.field_set & (1 << i)) { - comp = p + u.field_data[i].off; + comp = uri->string + u.field_data[i].off; complen = u.field_data[i].len; + switch (i) { - case UF_SCHEMA: - uri->protocollen = u.field_data[i].len; - break; - case UF_HOST: - uri->host = comp; - uri->hostlen = complen; - break; - case UF_PATH: - uri->data = comp; - uri->datalen = complen; - break; - case UF_QUERY: - uri->query = comp; - uri->querylen = complen; - break; - case UF_FRAGMENT: - uri->fragment = comp; - uri->fragmentlen = complen; - break; - case UF_USERINFO: - uri->user = comp; - uri->userlen = complen; - break; - default: - break; + case UF_SCHEMA: + uri->protocollen = u.field_data[i].len; + break; + case UF_HOST: + uri->host = comp; + uri->hostlen = complen; + break; + case UF_PATH: + uri->data = comp; + uri->datalen = complen; + break; + case UF_QUERY: + uri->query = comp; + uri->querylen = complen; + break; + case UF_FRAGMENT: + uri->fragment = comp; + uri->fragmentlen = complen; + break; + case UF_USERINFO: + uri->user = comp; + uri->userlen = complen; + break; + default: + break; } } } @@ -1592,12 +1600,6 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, uri->flags |= RSPAMD_URL_FLAG_OBSCURED; } - uri->raw = p; - uri->rawlen = len; - uri->string = rspamd_mempool_alloc (pool, len + 1); - rspamd_strlcpy (uri->string, p, len + 1); - uri->urllen = len; - /* Now decode url symbols */ unquoted_len = rspamd_url_decode (uri->string, uri->string, diff --git a/src/libserver/url.h b/src/libserver/url.h index 48f5acc12..a9466c636 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -17,6 +17,10 @@ enum rspamd_url_flags { RSPAMD_URL_FLAG_HTML_DISPLAYED = 1 << 4, RSPAMD_URL_FLAG_FROM_TEXT = 1 << 5, RSPAMD_URL_FLAG_SUBJECT = 1 << 6, + RSPAMD_URL_FLAG_HOSTENCODED = 1 << 7, + RSPAMD_URL_FLAG_SCHEMAENCODED = 1 << 8, + RSPAMD_URL_FLAG_PATHENCODED = 1 << 9, + RSPAMD_URL_FLAG_QUERYENCODED = 1 << 10, }; struct rspamd_url_tag { diff --git a/src/plugins/lua/url_redirector.lua b/src/plugins/lua/url_redirector.lua index 698ed6f8c..a6e7afc11 100644 --- a/src/plugins/lua/url_redirector.lua +++ b/src/plugins/lua/url_redirector.lua @@ -230,7 +230,7 @@ local function resolve_cached(task, orig_url, url, key, param, ntries) end local function url_redirector_handler(task, url, param) - local url_str = tostring(url) + local url_str = url:get_raw() -- 32 base32 characters are roughly 20 bytes of data or 160 bits local key = settings.key_prefix .. hash.create(url_str):base32():sub(1, 32) resolve_cached(task, url_str, url_str, key, param, 1) -- 2.39.5