From 31adf2042911d755510a1992018cd3bf479b9bc8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 23 Apr 2015 11:33:54 +0100 Subject: [PATCH] Fix ipv6 addresses and other issues in url parser. --- src/libserver/url.c | 46 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/src/libserver/url.c b/src/libserver/url.c index 9e0fed110..a52011346 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -638,9 +638,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, goto out; } } - else { - p ++; - } + p ++; break; case parse_user: if (t == ':') { @@ -687,6 +685,11 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, case parse_at: c = p; st = parse_domain; + if (t == '[') { + st = parse_ipv6; + p ++; + c = p; + } break; case parse_domain: if (t == '/' || t == ':') { @@ -934,15 +937,24 @@ rspamd_tld_trie_callback (int strnum, int textpos, void *context) } pos = url->host + textpos - pat->len; + p = pos - 1; start = url->host; if (*pos != '.' || textpos != (gint)url->hostlen) { /* Something weird has been found */ - return 0; + if (textpos == (gint)url->hostlen - 1) { + pos = url->host + textpos; + if (*pos == '.') { + /* This is dot at the end of domain */ + url->hostlen --; + } + } + else { + return 0; + } } /* Now we need to find top level domain */ - p = pos - 1; pos = start; while (p >= start && ndots > 0) { if (*p == '.') { @@ -1259,6 +1271,16 @@ url_tld_end (const gchar *begin, } } + else if (*p == '.') { + p ++; + if (p < end) { + if (g_ascii_isspace (*p) || *p == '/' || + *p == '?' || *p == ':') { + return url_web_end (begin, end, match->m_begin, match); + } + } + } + return FALSE; } @@ -1479,7 +1501,19 @@ rspamd_url_trie_callback (int strnum, int textpos, void *context) pos = &cb->begin[textpos]; if (pos < cb->end) { if (!g_ascii_isspace (*pos) && *pos != '/' && *pos != '?' && *pos != ':') { - return 0; + if (*pos == '.') { + /* We allow . at the end of the domain however */ + pos ++; + if (pos < cb->end) { + if (!g_ascii_isspace (*pos) && *pos != '/' && + *pos != '?' && *pos != ':') { + return 0; + } + } + } + else { + return 0; + } } } } -- 2.39.5