From b8d890c115464793b78a1ce01f2ba84db2b237b4 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 26 Nov 2022 20:57:50 +0000 Subject: [PATCH] [Fix] Treat hostnames with no dots as eSLD of their own --- src/libserver/url.c | 50 +++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/src/libserver/url.c b/src/libserver/url.c index 4984b0d2d..2ae50b95b 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -2492,31 +2492,41 @@ rspamd_url_parse (struct rspamd_url *uri, } if (uri->tldlen == 0) { - if (uri->protocol != PROTOCOL_MAILTO) { - if (url_scanner->has_tld_file && !(parse_flags & RSPAMD_URL_PARSE_HREF)) { - /* Ignore URL's without TLD if it is not a numeric URL */ - if (!rspamd_url_is_ip(uri, pool)) { - return URI_ERRNO_TLD_MISSING; + /* + * If we have not detected eSLD, but there are no dots in the hostname, + * then we should treat the whole hostname as eSLD - a rule of thumb + */ + if (uri->hostlen > 0 && memchr(rspamd_url_host_unsafe(uri), '.', uri->hostlen) == NULL) { + uri->tldlen = uri->hostlen; + uri->tldshift = uri->hostshift; + } + else { + if (uri->protocol != PROTOCOL_MAILTO) { + if (url_scanner->has_tld_file && !(parse_flags & RSPAMD_URL_PARSE_HREF)) { + /* Ignore URL's without TLD if it is not a numeric URL */ + if (!rspamd_url_is_ip(uri, pool)) { + return URI_ERRNO_TLD_MISSING; + } + } + else { + if (!rspamd_url_is_ip(uri, pool)) { + /* Assume tld equal to host */ + uri->tldshift = uri->hostshift; + uri->tldlen = uri->hostlen; + } + else if (uri->flags & RSPAMD_URL_FLAG_SCHEMALESS) { + /* Ignore urls with both no schema and no tld */ + return URI_ERRNO_TLD_MISSING; + } + + uri->flags |= RSPAMD_URL_FLAG_NO_TLD; } } else { - if (!rspamd_url_is_ip(uri, pool)) { - /* Assume tld equal to host */ - uri->tldshift = uri->hostshift; - uri->tldlen = uri->hostlen; - } - else if (uri->flags & RSPAMD_URL_FLAG_SCHEMALESS) { - /* Ignore urls with both no schema and no tld */ - return URI_ERRNO_TLD_MISSING; - } - - uri->flags |= RSPAMD_URL_FLAG_NO_TLD; + /* Ignore IP like domains for mailto, as it is really never supported */ + return URI_ERRNO_TLD_MISSING; } } - else { - /* Ignore IP like domains for mailto, as it is really never supported */ - return URI_ERRNO_TLD_MISSING; - } } /* Replace stupid '\' with '/' after schema */ -- 2.39.5