]> source.dussan.org Git - rspamd.git/commitdiff
Add workarounds for numeric hosts in URLs.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 2 Jul 2015 10:35:51 +0000 (11:35 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 2 Jul 2015 10:36:28 +0000 (11:36 +0100)
src/libserver/url.c
src/plugins/surbl.c
test/lua/unit/url.lua

index 15606118eea8668a3c950ffd27816ec75ea3f683..4a9fc7667dd8a57531041ab147123dcf374b8491 100644 (file)
@@ -1007,11 +1007,12 @@ rspamd_tld_trie_callback (int strnum, int textpos, void *context)
 static gboolean
 rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool)
 {
-       const gchar *p, *end;
-       gchar buf[INET6_ADDRSTRLEN + 1];
+       const gchar *p, *end, *c;
+       gchar buf[INET6_ADDRSTRLEN + 1], *errstr;
        struct in_addr in4;
        struct in6_addr in6;
-       gboolean ret = FALSE;
+       gboolean ret = FALSE, check_num = TRUE;
+       guint32 n, dots, t, i, shift, nshift;
 
        p = uri->host;
        end = p + uri->hostlen;
@@ -1021,6 +1022,10 @@ rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool)
                end --;
        }
 
+       while (*(end - 1) == '.' && end > p) {
+               end --;
+       }
+
        if (end - p > (gint)sizeof (buf) - 1) {
                return FALSE;
        }
@@ -1047,6 +1052,78 @@ rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool)
                uri->is_numeric = TRUE;
                ret = TRUE;
        }
+       else {
+               /* Try also numeric notation */
+               c = p;
+               n = 0;
+               dots = 0;
+               shift = 0;
+
+               while (p <= end && check_num) {
+                       if (shift < 32 && ((*p == '.' && dots < 3) || (p == end && dots <= 3))) {
+                               g_assert (p - c + 1 < (gint)sizeof (buf));
+                               rspamd_strlcpy (buf, c, p - c + 1);
+                               c = p + 1;
+                               dots ++;
+                               t = strtoul (buf, &errstr, 0);
+
+                               if (errstr == NULL || *errstr == '\0') {
+
+                                       nshift = (t == 0 ? shift + 8 : shift);
+
+                                       for (i = 0; i < 4; i ++) {
+                                               if ((t >> 8 * i) > 0) {
+                                                       nshift += 8;
+                                               }
+                                               else {
+                                                       break;
+                                               }
+                                       }
+                                       /*
+                                        * Here we need to find the proper shift of the previous
+                                        * components, so we check possible cases:
+                                        * 1) 1 octet
+                                        * 2) 2 octets
+                                        * 3) 3 octets
+                                        * 4) 4 octets
+                                        */
+                                       switch (i) {
+                                       case 4:
+                                               n |= (GUINT32_FROM_BE (t)) << shift;
+                                               break;
+                                       case 3:
+                                               n |= (GUINT32_FROM_BE (t)) << (shift - 8);
+                                               break;
+                                       case 2:
+                                               n |= (GUINT16_FROM_BE (t)) << shift;
+                                               break;
+                                       default:
+                                               n |= t << shift;
+                                               break;
+                                       }
+
+                                       shift = nshift;
+                               }
+                               else {
+                                       check_num = FALSE;
+                               }
+                       }
+
+                       p ++;
+               }
+
+               if (check_num && dots <= 3) {
+                       memcpy (&in4, &n, sizeof (in4));
+                       uri->host = rspamd_mempool_alloc (pool, INET_ADDRSTRLEN + 1);
+                       memset (uri->host, 0, INET_ADDRSTRLEN + 1);
+                       inet_ntop (AF_INET, &in4, uri->host, INET_ADDRSTRLEN);
+                       uri->hostlen = strlen (uri->host);
+                       uri->tld = uri->host;
+                       uri->tldlen = uri->hostlen;
+                       uri->is_numeric = TRUE;
+                       ret = TRUE;
+               }
+       }
 
        return ret;
 }
@@ -1056,7 +1133,7 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
                rspamd_mempool_t *pool)
 {
        struct http_parser_url u;
-       gchar *p, *comp, t;
+       gchar *p, *comp;
        const gchar *end;
        guint i, complen, ret;
        gint state = 0;
index c6c7fe414d26a544d2f5426b0c54a2d56b030992..233e7bbb88d46f2f7f8a02bc0e99f4cbe86a0cd1 100644 (file)
@@ -670,14 +670,14 @@ format_surbl_request (rspamd_mempool_t * pool,
        len = hostname->len + slen + 2;
 
        p = hostname->begin;
+       is_numeric = url->is_numeric;
+
        while (p - hostname->begin < (gint)hostname->len && dots_num < MAX_LEVELS) {
                if (*p == '.') {
                        dots[dots_num] = p;
                        dots_num++;
                }
-               else if (!g_ascii_isdigit (*p)) {
-                       is_numeric = FALSE;
-               }
+
                p++;
        }
 
index 4362a3999e60b4beed466b672a1aab388abd82da..9836ed127f4596549e4bc28c6903edd31e2f1517 100644 (file)
@@ -65,7 +65,22 @@ context("URL check functions", function()
       }},
       {"http://%25DOMAIN:foobar@foodomain.com/", true, {
         host = 'foodomain.com', user = '%25DOMAIN'
-      }}
+      }},
+      {"http://0.0xFFFFFF", true, {
+        host = '0.255.255.255'
+      }},
+      {"http://030052000001", true, {
+        host = '192.168.0.1'
+      }},
+      {"http://0xc0.052000001", true, {
+        host = '192.168.0.1'
+      }},
+      {"http://192.168.0.1.", true, {
+        host = '192.168.0.1'
+      }},
+      {"http://[::eeee:192.168.0.1]", true, {
+        host = '::eeee:c0a8:1'
+      }},
     }
     
     for _,c in ipairs(cases) do
@@ -78,7 +93,7 @@ context("URL check functions", function()
         
         for k,v in pairs(c[3]) do
           assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v)
-          assert_equal(uf[k], v, 'expected ' .. v .. ' for ' .. k .. ' but got ' .. uf[k])
+          assert_equal(uf[k], v, 'expected ' .. v .. ' for ' .. k .. ' but got ' .. uf[k] .. ' in url ' .. c[1])
         end
         for k,v in pairs(uf) do
           if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then