From 193879d7466b42f8b20fccc9b0cf403f41f82672 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 19 Mar 2020 15:26:45 +0000 Subject: [PATCH] [Fix] Fix some corner cases in urls parsing, add limits --- src/libserver/url.c | 47 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/src/libserver/url.c b/src/libserver/url.c index 73272799a..6b2ecdfde 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -623,6 +623,10 @@ is_domain_start (int p) return FALSE; } +static const guint max_domain_length = 253; +static const guint max_dns_label = 63; +static const guint max_email_user = 64; + static gint rspamd_mailto_parse (struct http_parser_url *u, const gchar *str, gsize len, @@ -654,6 +658,10 @@ rspamd_mailto_parse (struct http_parser_url *u, while (p < last) { t = *p; + if (p - str > max_email_user + max_domain_length + 1) { + goto out; + } + switch (st) { case parse_mailto: if (t == ':') { @@ -725,6 +733,9 @@ rspamd_mailto_parse (struct http_parser_url *u, else if (!is_mailsafe (t)) { goto out; } + else if (p - c > max_email_user) { + goto out; + } p++; break; case parse_at: @@ -739,6 +750,9 @@ rspamd_mailto_parse (struct http_parser_url *u, else if (!is_domain (t) && t != '.' && t != '_') { goto out; } + else if (p - c > max_domain_length) { + goto out; + } p++; break; case parse_suffix_question: @@ -810,6 +824,10 @@ rspamd_telephone_parse (struct http_parser_url *u, while (p < last) { t = *p; + if (p - str > max_email_user) { + goto out; + } + switch (st) { case parse_protocol: if (t == ':') { @@ -926,7 +944,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, { const gchar *p = str, *c = str, *last = str + len, *slash = NULL, *password_start = NULL, *user_start = NULL; - gchar t; + gchar t = 0; UChar32 uc; glong pt; gint ret = 1; @@ -1075,6 +1093,10 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, else if (!g_ascii_isgraph (t)) { goto out; } + else if (p - c > max_email_user) { + goto out; + } + p++; break; case parse_multiple_at: @@ -1130,6 +1152,9 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, else if (!g_ascii_isgraph (t)) { goto out; } + else if (p - c > max_domain_length) { + goto out; + } p++; break; case parse_at: @@ -1157,6 +1182,10 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, } break; case parse_domain: + if (p - c > max_domain_length) { + /* Too large domain */ + goto out; + } if (t == '/' || t == ':' || t == '?' || t == '#') { if (p - c == 0) { goto out; @@ -1175,7 +1204,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, st = parse_part; c = p + 1; } - else if (!user_seen) { + else if (t == ':' && !user_seen) { /* * Here we can have both port and password, hence we need * to apply some heuristic here @@ -1193,7 +1222,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, p++; } else { - if (is_url_end (t)) { + if (is_url_end (t) || is_url_start (t)) { goto set; } else if (*p == '@' && !user_seen) { @@ -2615,6 +2644,7 @@ url_web_end (struct url_callback_data *cb, } match->m_len = (last - pos); + cb->fin = last + 1; return TRUE; } @@ -2909,7 +2939,10 @@ rspamd_url_trie_callback (struct rspamd_multipattern *mp, } cb->start = m.m_begin; - cb->fin = pos; + + if (pos > cb->fin) { + cb->fin = pos; + } return 1; } @@ -3047,7 +3080,11 @@ rspamd_url_trie_generic_callback_common (struct rspamd_multipattern *mp, } cb->start = m.m_begin; - cb->fin = pos; + + if (pos > cb->fin) { + cb->fin = pos; + } + url = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_url)); g_strstrip (cb->url_str); rc = rspamd_url_parse (url, cb->url_str, -- 2.39.5