From 31ef21901866b7b76755436646ab0ed2857815a1 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 2 Jul 2015 11:35:51 +0100 Subject: [PATCH] Add workarounds for numeric hosts in URLs. --- src/libserver/url.c | 85 +++++++++++++++++++++++++++++++++++++++++-- src/plugins/surbl.c | 6 +-- test/lua/unit/url.lua | 19 +++++++++- 3 files changed, 101 insertions(+), 9 deletions(-) diff --git a/src/libserver/url.c b/src/libserver/url.c index 15606118e..4a9fc7667 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1007,11 +1007,12 @@ rspamd_tld_trie_callback (int strnum, int textpos, void *context) static gboolean rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool) { - const gchar *p, *end; - gchar buf[INET6_ADDRSTRLEN + 1]; + const gchar *p, *end, *c; + gchar buf[INET6_ADDRSTRLEN + 1], *errstr; struct in_addr in4; struct in6_addr in6; - gboolean ret = FALSE; + gboolean ret = FALSE, check_num = TRUE; + guint32 n, dots, t, i, shift, nshift; p = uri->host; end = p + uri->hostlen; @@ -1021,6 +1022,10 @@ rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool) end --; } + while (*(end - 1) == '.' && end > p) { + end --; + } + if (end - p > (gint)sizeof (buf) - 1) { return FALSE; } @@ -1047,6 +1052,78 @@ rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool) uri->is_numeric = TRUE; ret = TRUE; } + else { + /* Try also numeric notation */ + c = p; + n = 0; + dots = 0; + shift = 0; + + while (p <= end && check_num) { + if (shift < 32 && ((*p == '.' && dots < 3) || (p == end && dots <= 3))) { + g_assert (p - c + 1 < (gint)sizeof (buf)); + rspamd_strlcpy (buf, c, p - c + 1); + c = p + 1; + dots ++; + t = strtoul (buf, &errstr, 0); + + if (errstr == NULL || *errstr == '\0') { + + nshift = (t == 0 ? shift + 8 : shift); + + for (i = 0; i < 4; i ++) { + if ((t >> 8 * i) > 0) { + nshift += 8; + } + else { + break; + } + } + /* + * Here we need to find the proper shift of the previous + * components, so we check possible cases: + * 1) 1 octet + * 2) 2 octets + * 3) 3 octets + * 4) 4 octets + */ + switch (i) { + case 4: + n |= (GUINT32_FROM_BE (t)) << shift; + break; + case 3: + n |= (GUINT32_FROM_BE (t)) << (shift - 8); + break; + case 2: + n |= (GUINT16_FROM_BE (t)) << shift; + break; + default: + n |= t << shift; + break; + } + + shift = nshift; + } + else { + check_num = FALSE; + } + } + + p ++; + } + + if (check_num && dots <= 3) { + memcpy (&in4, &n, sizeof (in4)); + uri->host = rspamd_mempool_alloc (pool, INET_ADDRSTRLEN + 1); + memset (uri->host, 0, INET_ADDRSTRLEN + 1); + inet_ntop (AF_INET, &in4, uri->host, INET_ADDRSTRLEN); + uri->hostlen = strlen (uri->host); + uri->tld = uri->host; + uri->tldlen = uri->hostlen; + uri->is_numeric = TRUE; + ret = TRUE; + } + } return ret; } @@ -1056,7 +1133,7 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, rspamd_mempool_t *pool) { struct http_parser_url u; - gchar *p, *comp, t; + gchar *p, *comp; const gchar *end; guint i, complen, ret; gint state = 0; diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index c6c7fe414..233e7bbb8 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -670,14 +670,14 @@ format_surbl_request (rspamd_mempool_t * pool, len = hostname->len + slen + 2; p = hostname->begin; + is_numeric = url->is_numeric; + while (p - hostname->begin < (gint)hostname->len && dots_num < MAX_LEVELS) { if (*p == '.') { dots[dots_num] = p; dots_num++; } - else if (!g_ascii_isdigit (*p)) { - is_numeric = FALSE; - } + p++; } diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua index 4362a3999..9836ed127 100644 --- a/test/lua/unit/url.lua +++ b/test/lua/unit/url.lua @@ -65,7 +65,22 @@ context("URL check functions", function() }}, {"http://%25DOMAIN:foobar@foodomain.com/", true, { host = 'foodomain.com', user = '%25DOMAIN' - }} + }}, + {"http://0.0xFFFFFF", true, { + host = '0.255.255.255' + }}, + {"http://030052000001", true, { + host = '192.168.0.1' + }}, + {"http://0xc0.052000001", true, { + host = '192.168.0.1' + }}, + {"http://192.168.0.1.", true, { + host = '192.168.0.1' + }}, + {"http://[::eeee:192.168.0.1]", true, { + host = '::eeee:c0a8:1' + }}, } for _,c in ipairs(cases) do @@ -78,7 +93,7 @@ context("URL check functions", function() for k,v in pairs(c[3]) do assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v) - assert_equal(uf[k], v, 'expected ' .. v .. ' for ' .. k .. ' but got ' .. uf[k]) + assert_equal(uf[k], v, 'expected ' .. v .. ' for ' .. k .. ' but got ' .. uf[k] .. ' in url ' .. c[1]) end for k,v in pairs(uf) do if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then -- 2.39.5