From f0462bf947bafb429f47162a84b4daaf7379463e Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 4 Mar 2019 20:18:57 +0000 Subject: [PATCH] [Minor] Ignore completely damaged urls --- src/libserver/html.c | 4 +++- src/libserver/url.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/libserver/html.c b/src/libserver/html.c index 7da18cdfe..63638d28b 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1441,7 +1441,9 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF); - if (rc == URI_ERRNO_OK) { + /* Filter some completely damaged urls */ + if (rc == URI_ERRNO_OK && url->hostlen > 0 && + !((url->flags & RSPAMD_URL_FLAG_OBSCURED) && (url->protocol & PROTOCOL_UNKNOWN))) { url->flags |= saved_flags; if (has_bad_chars) { diff --git a/src/libserver/url.c b/src/libserver/url.c index 6b4a0d2d0..f860eec0c 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1388,7 +1388,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, /* Parse remaining */ switch (st) { case parse_domain: - if (p - c == 0) { + if (p - c == 0 || !is_domain (*(p - 1)) || !is_domain (*c)) { goto out; } SET_U (u, UF_HOST); @@ -2032,7 +2032,7 @@ rspamd_url_parse (struct rspamd_url *uri, } else { /* Hack, hack, hack */ - uri->protocol = PROTOCOL_HTTP; + uri->protocol = PROTOCOL_UNKNOWN; } } -- 2.39.5