diff options
-rw-r--r-- | src/libserver/html.c | 12 | ||||
-rw-r--r-- | src/libserver/protocol.c | 6 | ||||
-rw-r--r-- | src/libserver/url.c | 10 | ||||
-rw-r--r-- | src/libserver/url.h | 19 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 2 | ||||
-rw-r--r-- | src/lua/lua_url.c | 4 |
6 files changed, 27 insertions, 26 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index e1a211d2c..981141ad8 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -704,14 +704,14 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool, /* Apply the same logic for TLD */ disp_tok.len = text_url->tldlen; - disp_tok.begin = text_url->tld; + disp_tok.begin = rspamd_url_tld_unsafe (text_url); #if U_ICU_VERSION_MAJOR_NUM >= 46 - if (rspamd_substring_search_caseless (text_url->tld, + if (rspamd_substring_search_caseless (rspamd_url_tld_unsafe (text_url), text_url->tldlen, "xn--", 4) != -1) { idn_hbuf = rspamd_mempool_alloc (pool, text_url->tldlen * 2 + 1); /* We need to convert it to the normal value first */ disp_tok.len = uidna_nameToUnicodeUTF8 (udn, - text_url->tld, text_url->tldlen, + rspamd_url_tld_unsafe (text_url), text_url->tldlen, idn_hbuf, text_url->tldlen * 2 + 1, &uinfo, &uc_err); if (uc_err != U_ZERO_ERROR) { @@ -725,14 +725,14 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool, } #endif href_tok.len = href_url->tldlen; - href_tok.begin = href_url->tld; + href_tok.begin = rspamd_url_tld_unsafe (href_url); #if U_ICU_VERSION_MAJOR_NUM >= 46 - if (rspamd_substring_search_caseless (href_url->tld, + if (rspamd_substring_search_caseless (rspamd_url_tld_unsafe (href_url), href_url->tldlen, "xn--", 4) != -1) { idn_hbuf = rspamd_mempool_alloc (pool, href_url->tldlen * 2 + 1); /* We need to convert it to the normal value first */ href_tok.len = uidna_nameToUnicodeUTF8 (udn, - href_url->tld, href_url->tldlen, + rspamd_url_tld_unsafe (href_url), href_url->tldlen, idn_hbuf, href_url->tldlen * 2 + 1, &uinfo, &uc_err); if (uc_err != U_ZERO_ERROR) { diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 16dc05491..739d3b950 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -878,11 +878,13 @@ rspamd_protocol_extended_url (struct rspamd_task *task, ucl_object_insert_key (obj, elt, "url", 0, false); if (url->tldlen > 0) { - elt = ucl_object_fromstring_common (url->tld, url->tldlen, 0); + elt = ucl_object_fromstring_common (rspamd_url_tld_unsafe (url), + url->tldlen, 0); ucl_object_insert_key (obj, elt, "tld", 0, false); } if (url->hostlen > 0) { - elt = ucl_object_fromstring_common (rspamd_url_host_unsafe (url), url->hostlen, 0); + elt = ucl_object_fromstring_common (rspamd_url_host_unsafe (url), + url->hostlen, 0); ucl_object_insert_key (obj, elt, "host", 0, false); } diff --git a/src/libserver/url.c b/src/libserver/url.c index 7e85a460e..043f523f0 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1561,7 +1561,7 @@ rspamd_tld_trie_callback (struct rspamd_multipattern *mp, if ((ndots == 0 || p == start - 1) && url->tldlen < rspamd_url_host_unsafe (url) + url->hostlen - pos) { - url->tld = (gchar *) pos; + url->tldshift = (pos - url->string); url->tldlen = rspamd_url_host_unsafe (url) + url->hostlen - pos; } @@ -1590,11 +1590,11 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a (gint)(uri->hostshift), uri->string); uri->hostshift = r; + uri->tldshift = r; start_offset = strbuf + r; inet_ntop (af, addr, strbuf + r, slen - r + 1); uri->hostlen = strlen (start_offset); r += uri->hostlen; - uri->tld = (const gchar *)start_offset; uri->tldlen = uri->hostlen; uri->flags |= RSPAMD_URL_FLAG_NUMERIC; @@ -2214,7 +2214,7 @@ rspamd_url_parse (struct rspamd_url *uri, } else { if (!rspamd_url_is_ip (uri, pool)) { /* Assume tld equal to host */ - uri->tld = rspamd_url_host_unsafe (uri); + uri->tldshift = uri->hostshift; uri->tldlen = uri->hostlen; } } @@ -2241,11 +2241,11 @@ rspamd_url_parse (struct rspamd_url *uri, rspamd_telephone_normalise_inplace (uri); if (rspamd_url_host_unsafe (uri)[0] == '+') { - uri->tld = rspamd_url_host_unsafe (uri) + 1; + uri->tldshift = uri->hostshift + 1; uri->tldlen = uri->hostlen - 1; } else { - uri->tld = rspamd_url_host_unsafe (uri); + uri->tldshift = uri->hostshift; uri->tldlen = uri->hostlen; } } diff --git a/src/libserver/url.h b/src/libserver/url.h index 87766c4e6..00f09ac30 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -53,15 +53,7 @@ struct rspamd_url { guint datashift; guint queryshift; guint fragmentshift; - - gchar *tld; - gchar *visible_part; - - struct rspamd_url *phished_url; - - guint urllen; - guint rawlen; - guint32 flags; + guint tldshift; guint16 protocollen; guint16 userlen; @@ -70,8 +62,14 @@ struct rspamd_url { guint16 querylen; guint16 fragmentlen; guint16 tldlen; - guint16 count; + + guint urllen; + guint rawlen; + guint32 flags; + + gchar *visible_part; + struct rspamd_url *phished_url; }; #define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL) @@ -79,6 +77,7 @@ struct rspamd_url { #define rspamd_url_host(u) ((u)->hostlen > 0 ? (u)->string + (u)->hostshift : NULL) #define rspamd_url_host_unsafe(u) ((u)->string + (u)->hostshift) +#define rspamd_url_tld_unsafe(u) ((u)->string + (u)->tldshift) #define rspamd_url_data_unsafe(u) ((u)->string + (u)->datashift) #define rspamd_url_query_unsafe(u) ((u)->string + (u)->queryshift) diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 77a924f41..9f1b14daf 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -262,7 +262,7 @@ rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res) uri = ex->ptr; if (uri && uri->tldlen > 0) { - token.original.begin = uri->tld; + token.original.begin = rspamd_url_tld_unsafe (uri); token.original.len = uri->tldlen; } diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index cb54a694c..efd34dc6c 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -560,7 +560,7 @@ lua_url_get_tld (lua_State *L) struct rspamd_lua_url *url = lua_check_url (L, 1); if (url != NULL && url->url->tldlen > 0) { - lua_pushlstring (L, url->url->tld, url->url->tldlen); + lua_pushlstring (L, rspamd_url_tld_unsafe (url->url), url->url->tldlen); } else { lua_pushnil (L); @@ -672,7 +672,7 @@ lua_url_to_table (lua_State *L) if (u->tldlen > 0) { lua_pushstring (L, "tld"); - lua_pushlstring (L, u->tld, u->tldlen); + lua_pushlstring (L, rspamd_url_tld_unsafe (u), u->tldlen); lua_settable (L, -3); } |