diff options
-rw-r--r-- | src/libserver/html.c | 14 | ||||
-rw-r--r-- | src/libserver/protocol.c | 11 | ||||
-rw-r--r-- | src/libserver/url.c | 78 | ||||
-rw-r--r-- | src/libserver/url.h | 8 | ||||
-rw-r--r-- | src/lua/lua_url.c | 8 |
5 files changed, 68 insertions, 51 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index b7e78e57b..7dca72453 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -658,14 +658,14 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool, if (rc == URI_ERRNO_OK) { disp_tok.len = text_url->hostlen; - disp_tok.begin = text_url->host; + disp_tok.begin = rspamd_url_host_unsafe (text_url); #if U_ICU_VERSION_MAJOR_NUM >= 46 - if (rspamd_substring_search_caseless (text_url->host, + if (rspamd_substring_search_caseless (rspamd_url_host_unsafe (text_url), text_url->hostlen, "xn--", 4) != -1) { idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1); /* We need to convert it to the normal value first */ disp_tok.len = uidna_nameToUnicodeUTF8 (udn, - text_url->host, text_url->hostlen, + rspamd_url_host_unsafe (text_url), text_url->hostlen, idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err); if (uc_err != U_ZERO_ERROR) { @@ -679,14 +679,14 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool, } #endif href_tok.len = href_url->hostlen; - href_tok.begin = href_url->host; + href_tok.begin = rspamd_url_host_unsafe (href_url); #if U_ICU_VERSION_MAJOR_NUM >= 46 - if (rspamd_substring_search_caseless (href_url->host, + if (rspamd_substring_search_caseless (rspamd_url_host_unsafe (href_url), href_url->hostlen, "xn--", 4) != -1) { idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1); /* We need to convert it to the normal value first */ href_tok.len = uidna_nameToUnicodeUTF8 (udn, - href_url->host, href_url->hostlen, + rspamd_url_host_unsafe (href_url), href_url->hostlen, idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err); if (uc_err != U_ZERO_ERROR) { @@ -1594,7 +1594,7 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag, buf = rspamd_mempool_alloc (pool, len + 1); rspamd_snprintf (buf, len + 1, "%*s://%*s/%*s", hc->base_url->protocollen, hc->base_url->string, - hc->base_url->hostlen, hc->base_url->host, + hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url), (gint)orig_len, start); start = buf; } diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 4c1a94d99..16dc05491 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -882,7 +882,7 @@ rspamd_protocol_extended_url (struct rspamd_task *task, ucl_object_insert_key (obj, elt, "tld", 0, false); } if (url->hostlen > 0) { - elt = ucl_object_fromstring_common (url->host, url->hostlen, 0); + elt = ucl_object_fromstring_common (rspamd_url_host_unsafe (url), url->hostlen, 0); ucl_object_insert_key (obj, elt, "host", 0, false); } @@ -925,11 +925,14 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud) goffset err_offset; - if ((err_offset = rspamd_fast_utf8_validate (url->host, url->hostlen)) == 0) { - obj = ucl_object_fromstring_common (url->host, url->hostlen, 0); + if ((err_offset = rspamd_fast_utf8_validate (rspamd_url_host_unsafe (url), + url->hostlen)) == 0) { + obj = ucl_object_fromstring_common (rspamd_url_host_unsafe (url), + url->hostlen, 0); } else { - obj = ucl_object_fromstring_common (url->host, err_offset - 1, 0); + obj = ucl_object_fromstring_common (rspamd_url_host_unsafe (url), + err_offset - 1, 0); } } else { diff --git a/src/libserver/url.c b/src/libserver/url.c index a2a9d852f..ac4c11916 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1526,12 +1526,12 @@ rspamd_tld_trie_callback (struct rspamd_multipattern *mp, pos = text + match_start; p = pos - 1; - start = url->host; + start = rspamd_url_host_unsafe (url); if (*pos != '.' || match_pos != (gint) url->hostlen) { /* Something weird has been found */ if (match_pos == (gint) url->hostlen - 1) { - pos = url->host + match_pos; + pos = rspamd_url_host_unsafe (url) + match_pos; if (*pos == '.') { /* This is dot at the end of domain */ url->hostlen--; @@ -1560,9 +1560,9 @@ rspamd_tld_trie_callback (struct rspamd_multipattern *mp, } if ((ndots == 0 || p == start - 1) && - url->tldlen < url->host + url->hostlen - pos) { + url->tldlen < rspamd_url_host_unsafe (url) + url->hostlen - pos) { url->tld = (gchar *) pos; - url->tldlen = url->host + url->hostlen - pos; + url->tldlen = rspamd_url_host_unsafe (url) + url->hostlen - pos; } return 0; @@ -1586,13 +1586,13 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a /* Allocate new string to build it from IP */ strbuf = rspamd_mempool_alloc (pool, slen + 1); r += rspamd_snprintf (strbuf + r, slen - r, "%*s", - (gint)(uri->host - uri->string), + (gint)(uri->hostshift), uri->string); - uri->host = strbuf + r; + uri->hostshift = r; inet_ntop (af, addr, strbuf + r, slen - r + 1); - uri->hostlen = strlen (uri->host); + uri->hostlen = strlen (rspamd_url_host_unsafe (uri)); r += uri->hostlen; - uri->tld = uri->host; + uri->tld = rspamd_url_host_unsafe (uri); uri->tldlen = uri->hostlen; uri->flags |= RSPAMD_URL_FLAG_NUMERIC; @@ -1638,7 +1638,7 @@ rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool) gboolean ret = FALSE, check_num = TRUE; guint32 n, dots, t = 0, i = 0, shift, nshift; - p = uri->host; + p = rspamd_url_host_unsafe (uri); end = p + uri->hostlen; if (*p == '[' && *(end - 1) == ']') { @@ -1814,9 +1814,10 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen, old_shift = uri->hostlen; uri->hostlen -= shift; - remain = (uri->urllen - (uri->host - uri->string)) - old_shift; + remain = (uri->urllen - (uri->hostshift)) - old_shift; g_assert (remain >= 0); - memmove (uri->host + uri->hostlen, uri->host + old_shift, + memmove (rspamd_url_host_unsafe (uri) + uri->hostlen, + rspamd_url_host_unsafe (uri) + old_shift, remain); uri->urllen -= shift; uri->flags |= RSPAMD_URL_FLAG_HOSTENCODED; @@ -1877,7 +1878,7 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen, uri->usershift -= shift; } if (uri->hostlen > 0) { - uri->host -= shift; + uri->hostshift -= shift; } /* Go forward */ case UF_HOST: @@ -1908,9 +1909,9 @@ rspamd_telephone_normalise_inplace (struct rspamd_url *uri) gint i = 0, w, orig_len; UChar32 uc; - t = uri->host; + t = rspamd_url_host_unsafe (uri); h = t; - end = uri->host + uri->hostlen; + end = t + uri->hostlen; orig_len = uri->hostlen; if (*h == '+') { @@ -1931,7 +1932,7 @@ rspamd_telephone_normalise_inplace (struct rspamd_url *uri) h += i; } - uri->hostlen = t - uri->host; + uri->hostlen = t - rspamd_url_host_unsafe (uri); uri->urllen -= (orig_len - uri->hostlen); } @@ -2022,7 +2023,7 @@ rspamd_url_parse (struct rspamd_url *uri, uri->protocollen = u.field_data[i].len; break; case UF_HOST: - uri->host = comp; + uri->hostshift = u.field_data[i].off; uri->hostlen = complen; break; case UF_PATH: @@ -2059,16 +2060,20 @@ rspamd_url_parse (struct rspamd_url *uri, uri->string, uri->protocollen); rspamd_url_shift (uri, unquoted_len, UF_SCHEMA); - unquoted_len = rspamd_url_decode (uri->host, uri->host, uri->hostlen); + unquoted_len = rspamd_url_decode (rspamd_url_host_unsafe (uri), + rspamd_url_host_unsafe (uri), uri->hostlen); - if (rspamd_normalise_unicode_inplace (pool, uri->host, &unquoted_len)) { + if (rspamd_normalise_unicode_inplace (pool, + rspamd_url_host_unsafe (uri), &unquoted_len)) { uri->flags |= RSPAMD_URL_FLAG_UNNORMALISED; } if (uri->protocol & (PROTOCOL_HTTP|PROTOCOL_HTTPS|PROTOCOL_MAILTO|PROTOCOL_FTP|PROTOCOL_FILE)) { /* Ensure that hostname starts with something sane (exclude numeric urls) */ - if (!(is_domain_start (uri->host[0]) || uri->host[0] == ':')) { + const gchar* host = rspamd_url_host_unsafe (uri); + + if (!(is_domain_start (host[0]) || host[0] == ':')) { return URI_ERRNO_BAD_FORMAT; } } @@ -2093,7 +2098,7 @@ rspamd_url_parse (struct rspamd_url *uri, struct UConverter *utf8_conv = rspamd_get_utf8_converter (); utf16_len = ucnv_toUChars (utf8_conv, utf16_hostname, uri->hostlen, - uri->host, uri->hostlen, &uc_err); + rspamd_url_host_unsafe (uri), uri->hostlen, &uc_err); if (!U_SUCCESS (uc_err)) { @@ -2110,7 +2115,8 @@ rspamd_url_parse (struct rspamd_url *uri, } /* Convert back to utf8, sigh... */ - norm_utf8_len = ucnv_fromUChars (utf8_conv, uri->host, uri->hostlen, + norm_utf8_len = ucnv_fromUChars (utf8_conv, + rspamd_url_host_unsafe (uri), uri->hostlen, norm_utf16, norm_utf16_len, &uc_err); if (!U_SUCCESS (uc_err)) { @@ -2154,7 +2160,7 @@ rspamd_url_parse (struct rspamd_url *uri, } rspamd_str_lc (uri->string, uri->protocollen); - unquoted_len = rspamd_str_lc_utf8 (uri->host, uri->hostlen); + unquoted_len = rspamd_str_lc_utf8 (rspamd_url_host_unsafe (uri), uri->hostlen); rspamd_url_shift (uri, unquoted_len, UF_HOST); if (uri->protocol == PROTOCOL_UNKNOWN) { @@ -2172,7 +2178,7 @@ rspamd_url_parse (struct rspamd_url *uri, if (uri->protocol & (PROTOCOL_HTTP|PROTOCOL_HTTPS|PROTOCOL_MAILTO|PROTOCOL_FTP|PROTOCOL_FILE)) { /* Find TLD part */ rspamd_multipattern_lookup (url_scanner->search_trie, - uri->host, uri->hostlen, + rspamd_url_host_unsafe (uri), uri->hostlen, rspamd_tld_trie_callback, uri, NULL); if (uri->tldlen == 0) { @@ -2184,7 +2190,7 @@ rspamd_url_parse (struct rspamd_url *uri, } else { if (!rspamd_url_is_ip (uri, pool)) { /* Assume tld equal to host */ - uri->tld = uri->host; + uri->tld = rspamd_url_host_unsafe (uri); uri->tldlen = uri->hostlen; } } @@ -2194,7 +2200,8 @@ rspamd_url_parse (struct rspamd_url *uri, if (uri->protocol & (PROTOCOL_HTTP|PROTOCOL_HTTPS|PROTOCOL_FTP) && uri->protocollen > 0 && uri->urllen > uri->protocollen + 2) { - gchar *pos = &uri->string[uri->protocollen], *host_start = uri->host; + gchar *pos = &uri->string[uri->protocollen], + *host_start = rspamd_url_host_unsafe (uri); while (pos < host_start) { if (*pos == '\\') { @@ -2209,12 +2216,12 @@ rspamd_url_parse (struct rspamd_url *uri, /* We need to normalise phone number: remove all spaces and braces */ rspamd_telephone_normalise_inplace (uri); - if (uri->host[0] == '+') { - uri->tld = uri->host + 1; + if (rspamd_url_host_unsafe (uri)[0] == '+') { + uri->tld = rspamd_url_host_unsafe (uri) + 1; uri->tldlen = uri->hostlen - 1; } else { - uri->tld = uri->host; + uri->tld = rspamd_url_host_unsafe (uri); uri->tldlen = uri->hostlen; } } @@ -3362,7 +3369,8 @@ rspamd_url_host_hash (gconstpointer u) const struct rspamd_url *url = u; if (url->hostlen > 0) { - return (guint)rspamd_cryptobox_fast_hash (url->host, url->hostlen, + return (guint)rspamd_cryptobox_fast_hash (rspamd_url_host_unsafe (url), + url->hostlen, rspamd_hash_seed ()); } @@ -3378,7 +3386,7 @@ rspamd_email_hash (gconstpointer u) rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); if (url->hostlen > 0) { - rspamd_cryptobox_fast_hash_update (&st, url->host, url->hostlen); + rspamd_cryptobox_fast_hash_update (&st, rspamd_url_host_unsafe (url), url->hostlen); } if (url->userlen > 0) { @@ -3399,7 +3407,8 @@ rspamd_emails_cmp (gconstpointer a, gconstpointer b) return FALSE; } else { - if ((r = rspamd_lc_cmp (u1->host, u2->host, u1->hostlen)) == 0) { + if ((r = rspamd_lc_cmp (rspamd_url_host_unsafe (u1), + rspamd_url_host_unsafe (u2), u1->hostlen)) == 0) { if (u1->userlen != u2->userlen || u1->userlen == 0) { return FALSE; } @@ -3443,7 +3452,8 @@ rspamd_urls_host_cmp (gconstpointer a, gconstpointer b) return FALSE; } else { - r = memcmp (u1->host, u2->host, u1->hostlen); + r = memcmp (rspamd_url_host_unsafe (u1), rspamd_url_host_unsafe (u2), + u1->hostlen); } return r == 0; @@ -3637,7 +3647,7 @@ rspamd_url_encode (struct rspamd_url *url, gsize *pdlen, g_assert (pdlen != NULL && url != NULL && pool != NULL); - CHECK_URL_COMPONENT ((guchar *)url->host, url->hostlen, + CHECK_URL_COMPONENT (rspamd_url_host_unsafe (url), url->hostlen, RSPAMD_URL_FLAGS_HOSTSAFE); CHECK_URL_COMPONENT (rspamd_url_user_unsafe(url), url->userlen, RSPAMD_URL_FLAGS_USERSAFE); @@ -3683,7 +3693,7 @@ rspamd_url_encode (struct rspamd_url *url, gsize *pdlen, *d++ = ':'; } - ENCODE_URL_COMPONENT ((guchar *)url->host, url->hostlen, + ENCODE_URL_COMPONENT (rspamd_url_host_unsafe (url), url->hostlen, RSPAMD_URL_FLAGS_HOSTSAFE); if (url->datalen > 0) { diff --git a/src/libserver/url.h b/src/libserver/url.h index 78330d814..080f005c3 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -50,7 +50,9 @@ struct rspamd_url { guint usershift; guint userlen; - gchar *host; + guint hostshift; + guint hostlen; + gchar *data; gchar *query; gchar *fragment; @@ -60,7 +62,6 @@ struct rspamd_url { struct rspamd_url *phished_url; guint protocollen; - guint hostlen; guint datalen; guint querylen; guint fragmentlen; @@ -75,6 +76,9 @@ struct rspamd_url { #define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL) #define rspamd_url_user_unsafe(u) ((u)->string + (u)->usershift) +#define rspamd_url_host(u) ((u)->hostlen > 0 ? (u)->string + (u)->hostshift : NULL) +#define rspamd_url_host_unsafe(u) ((u)->string + (u)->hostshift) + enum uri_errno { URI_ERRNO_OK = 0, /* Parsing went well */ URI_ERRNO_EMPTY, /* The URI string was empty */ diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 7b0dee89b..bd94120e2 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -158,8 +158,8 @@ lua_url_get_host (lua_State *L) LUA_TRACE_POINT; struct rspamd_lua_url *url = lua_check_url (L, 1); - if (url != NULL) { - lua_pushlstring (L, url->url->host, url->url->hostlen); + if (url != NULL && url->url && url->url->hostlen > 0) { + lua_pushlstring (L, rspamd_url_host (url->url), url->url->hostlen); } else { lua_pushnil (L); @@ -312,7 +312,7 @@ lua_url_tostring (lua_State *L) } tmp[url->url->userlen] = '@'; - memcpy (tmp + url->url->userlen + 1, url->url->host, + memcpy (tmp + url->url->userlen + 1, rspamd_url_host_unsafe (url->url), url->url->hostlen); lua_pushlstring (L, tmp, url->url->userlen + 1 + url->url->hostlen); @@ -660,7 +660,7 @@ lua_url_to_table (lua_State *L) if (u->hostlen > 0) { lua_pushstring (L, "host"); - lua_pushlstring (L, u->host, u->hostlen); + lua_pushlstring (L, rspamd_url_host_unsafe (u), u->hostlen); lua_settable (L, -3); } |