Fix ipv6 addresses and other issues in url parser.

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Thu, 23 Apr 2015 10:33:54 +0000 (11:33 +0100)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Thu, 23 Apr 2015 10:33:54 +0000 (11:33 +0100)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 23 Apr 2015 10:33:54 +0000 (11:33 +0100)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 23 Apr 2015 10:33:54 +0000 (11:33 +0100)
diff --git a/src/libserver/url.c b/src/libserver/url.c

index 9e0fed110700f4981b9e0e5e74f349493e6f7122..a52011346b344f6746ff41712f4a9bead87be581 100644 (file)
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -638,9 +638,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
                                         goto out;
                                 }
                         }
-                       else {
-                               p ++;
-                       }
+                       p ++;
                         break;
                 case parse_user:
                         if (t == ':') {
@@ -687,6 +685,11 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
                 case parse_at:
                         c = p;
                         st = parse_domain;
+                       if (t == '[') {
+                               st = parse_ipv6;
+                               p ++;
+                               c = p;
+                       }
                         break;
                 case parse_domain:
                         if (t == '/' || t == ':') {
@@ -934,15 +937,24 @@ rspamd_tld_trie_callback (int strnum, int textpos, void *context)
         }
  
         pos = url->host + textpos - pat->len;
+       p = pos - 1;
         start = url->host;
  
         if (*pos != '.' || textpos != (gint)url->hostlen) {
                 /* Something weird has been found */
-               return 0;
+               if (textpos == (gint)url->hostlen - 1) {
+                       pos = url->host + textpos;
+                       if (*pos == '.') {
+                               /* This is dot at the end of domain */
+                               url->hostlen --;
+                       }
+               }
+               else {
+                       return 0;
+               }
         }
  
         /* Now we need to find top level domain */
-       p = pos - 1;
         pos = start;
         while (p >= start && ndots > 0) {
                 if (*p == '.') {
@@ -1259,6 +1271,16 @@ url_tld_end (const gchar *begin,
                 }
  
         }
+       else if (*p == '.') {
+               p ++;
+               if (p < end) {
+                       if (g_ascii_isspace (*p) || *p == '/' ||
+                                       *p == '?' || *p == ':') {
+                               return url_web_end (begin, end, match->m_begin, match);
+                       }
+               }
+       }
+
         return FALSE;
  }
  
@@ -1479,7 +1501,19 @@ rspamd_url_trie_callback (int strnum, int textpos, void *context)
                 pos = &cb->begin[textpos];
                 if (pos < cb->end) {
                         if (!g_ascii_isspace (*pos) && *pos != '/' && *pos != '?' && *pos != ':') {
-                               return 0;
+                               if (*pos == '.') {
+                                       /* We allow . at the end of the domain however */
+                                       pos ++;
+                                       if (pos < cb->end) {
+                                               if (!g_ascii_isspace (*pos) && *pos != '/' &&
+                                                               *pos != '?' && *pos != ':') {
+                                                       return 0;
+                                               }
+                                       }
+                               }
+                               else {
+                                       return 0;
+                               }
                         }
                 }
         }
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Thu, 23 Apr 2015 10:33:54 +0000 (11:33 +0100)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Thu, 23 Apr 2015 10:33:54 +0000 (11:33 +0100)