From 32c8c7b394d2c4dfa4a520f832a79f93f41a79b7 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 3 May 2019 11:31:17 +0100 Subject: [PATCH] [Minor] More url parsing fixes --- src/libserver/url.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/libserver/url.c b/src/libserver/url.c index ea46c0353..b26bad6c6 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -569,7 +569,7 @@ rspamd_url_init (const gchar *tld_file) } \ } while (0) -static gboolean +static bool is_url_start (gchar c) { if (c == '(' || @@ -583,7 +583,7 @@ is_url_start (gchar c) return FALSE; } -static gboolean +static bool is_url_end (gchar c) { if (c == ')' || @@ -597,6 +597,19 @@ is_url_end (gchar c) return FALSE; } +static bool +is_domain_start (int p) +{ + if (g_ascii_isalnum (p) || + p == '[' || + p == '%' || + (p & 0x80)) { + return TRUE; + } + + return FALSE; +} + static gint rspamd_mailto_parse (struct http_parser_url *u, const gchar *str, gsize len, @@ -1115,7 +1128,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, } break; case parse_domain_start: - if (g_ascii_isalnum (t) || t & 0x80) { + if (is_domain_start (t)) { st = parse_domain; } else { @@ -1965,6 +1978,11 @@ rspamd_url_parse (struct rspamd_url *uri, uri->flags |= RSPAMD_URL_FLAG_UNNORMALISED; } + /* Ensure that hostname starts with something sane (exclude numeric urls) */ + if (!(is_domain_start (uri->host[0]) || uri->host[0] == ':')) { + return URI_ERRNO_BAD_FORMAT; + } + rspamd_url_shift (uri, unquoted_len, UF_HOST); if (uri->datalen) { -- 2.39.5