From: Vsevolod Stakhov Date: Sun, 23 Jul 2023 19:41:02 +0000 (+0100) Subject: [Feature] Reorganise struct rspamd_url to be 64 bytes size X-Git-Tag: 3.6~27 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=eef2f3cac7c975af050efaf4cf1acafcb9b501e3;p=rspamd.git [Feature] Reorganise struct rspamd_url to be 64 bytes size --- diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx index 0068ea30f..ae2514ba1 100644 --- a/src/libserver/html/html_url.cxx +++ b/src/libserver/html/html_url.cxx @@ -183,8 +183,12 @@ html_url_is_phished(rspamd_mempool_t *pool, if (!rspamd_url_is_subdomain(disp_tok, href_tok)) { href_url->flags |= RSPAMD_URL_FLAG_PHISHED; - href_url->linked_url = text_url; text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; + + if (href_url->ext == nullptr) { + href_url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext); + } + href_url->ext->linked_url = text_url; } } } @@ -241,18 +245,21 @@ html_check_displayed_url(rspamd_mempool_t *pool, return; } - url->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1); - rspamd_strlcpy(url->visible_part, + if (url->ext == nullptr) { + url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext); + } + url->ext->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1); + rspamd_strlcpy(url->ext->visible_part, visible_part.data(), visible_part.size() + 1); dlen = visible_part.size(); /* Strip unicode spaces from the start and the end */ - url->visible_part = const_cast( - rspamd_string_unicode_trim_inplace(url->visible_part, + url->ext->visible_part = const_cast( + rspamd_string_unicode_trim_inplace(url->ext->visible_part, &dlen)); auto maybe_url = html_url_is_phished(pool, url, - {url->visible_part, dlen}); + {url->ext->visible_part, dlen}); if (maybe_url) { url->flags |= saved_flags; @@ -300,7 +307,7 @@ html_check_displayed_url(rspamd_mempool_t *pool, } } - rspamd_normalise_unicode_inplace(url->visible_part, &dlen); + rspamd_normalise_unicode_inplace(url->ext->visible_part, &dlen); } auto diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 3708d823f..1d1214c27 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -909,9 +909,9 @@ rspamd_protocol_extended_url (struct rspamd_task *task, ucl_object_insert_key (obj, flags, "flags", 0, false); - if (url->linked_url) { - encoded = rspamd_url_encode (url->linked_url, &enclen, task->task_pool); - elt = rspamd_protocol_extended_url (task, url->linked_url, encoded, + if (url->ext && url->ext->linked_url) { + encoded = rspamd_url_encode (url->ext->linked_url, &enclen, task->task_pool); + elt = rspamd_protocol_extended_url (task, url->ext->linked_url, encoded, enclen); ucl_object_insert_key (obj, elt, "linked_url", 0, false); } diff --git a/src/libserver/url.c b/src/libserver/url.c index 824dc05cc..0deede068 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1797,11 +1797,11 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a uri->flags |= RSPAMD_URL_FLAG_NUMERIC; /* Reconstruct URL */ - if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT) { + if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT && uri->ext) { p = strbuf + r; start_offset = p + 1; r += rspamd_snprintf (strbuf + r, slen - r, ":%ud", - (unsigned int)uri->port); + (unsigned int)uri->ext->port); } if (uri->datalen > 0) { p = strbuf + r; @@ -2351,7 +2351,6 @@ rspamd_url_parse (struct rspamd_url *uri, } } - uri->port = u.port; uri->flags = flags; if (!uri->hostlen) { diff --git a/src/libserver/url.h b/src/libserver/url.h index 0b326869b..9c5b7be28 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -52,35 +52,46 @@ struct rspamd_url_tag { struct rspamd_url_tag *prev, *next; }; - +struct rspamd_url_ext; +/** + * URL structure + */ struct rspamd_url { - gchar *string; - gchar *raw; + char *string; + char *raw; + struct rspamd_url_ext *ext; - gchar *visible_part; - struct rspamd_url *linked_url; + uint32_t flags; - guint32 flags; + uint8_t protocol; + uint8_t protocollen; - guint8 protocol; - guint8 protocollen; - - guint16 port; + uint16_t hostshift; + uint16_t datashift; + uint16_t queryshift; + uint16_t fragmentshift; + uint16_t tldshift; guint16 usershift; - guint16 hostshift; - guint16 datashift; - guint16 queryshift; - guint16 fragmentshift; - guint16 tldshift; guint16 userlen; - guint16 hostlen; - guint16 datalen; - guint16 querylen; - guint16 fragmentlen; - guint16 tldlen; - guint16 count; - guint16 urllen; - guint16 rawlen; + + uint16_t hostlen; + uint16_t datalen; + uint16_t querylen; + uint16_t fragmentlen; + uint16_t tldlen; + uint16_t count; + uint16_t urllen; + uint16_t rawlen; +}; + +/** + * Rarely used url fields + */ +struct rspamd_url_ext { + gchar *visible_part; + struct rspamd_url *linked_url; + + guint16 port; }; #define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL) @@ -350,6 +361,22 @@ int rspamd_url_cmp(const struct rspamd_url *u1, const struct rspamd_url *u2); */ int rspamd_url_cmp_qsort(const void *u1, const void *u2); +static inline uint16_t rspamd_url_get_port(struct rspamd_url *u) +{ + if (u->flags & RSPAMD_URL_FLAG_HAS_PORT && u->ext) { + return u->ext->port; + } + else { + /* Assume standard port */ + if (u->protocol == PROTOCOL_HTTPS) { + return 443; + } + else { + return 80; + } + } +} + /** * Normalize unicode input and set out url flags as appropriate * @param pool diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 39b0293aa..a46f4e276 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -186,7 +186,7 @@ lua_url_get_port (lua_State *L) struct rspamd_lua_url *url = lua_check_url (L, 1); if (url != NULL) { - lua_pushinteger (L, url->url->port); + lua_pushinteger (L, rspamd_url_get_port(url->url)); } else { lua_pushnil (L); @@ -475,12 +475,13 @@ lua_url_get_phished (lua_State *L) struct rspamd_lua_url *purl, *url = lua_check_url (L, 1); if (url) { - if (url->url->linked_url != NULL) { + if (url->url->ext && url->url->ext->linked_url != NULL) { + /* XXX: in fact, this is the only possible combination of flags, so this check is redundant */ if (url->url->flags & (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) { purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url)); rspamd_lua_setclass (L, "rspamd{url}", -1); - purl->url = url->url->linked_url; + purl->url = url->url->ext->linked_url; return 1; } @@ -535,7 +536,11 @@ lua_url_set_redirected (lua_State *L) redir = lua_check_url (L, -1); url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED; - url->url->linked_url = redir->url; + + if (url->url->ext == NULL) { + url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext); + } + url->url->ext->linked_url = redir->url; } } else { @@ -546,7 +551,10 @@ lua_url_set_redirected (lua_State *L) } url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED; - url->url->linked_url = redir->url; + if (url->url->ext == NULL) { + url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext); + } + url->url->ext->linked_url = redir->url; /* Push back on stack */ lua_pushvalue (L, 2); @@ -629,8 +637,8 @@ lua_url_get_visible (lua_State *L) LUA_TRACE_POINT; struct rspamd_lua_url *url = lua_check_url (L, 1); - if (url != NULL && url->url->visible_part) { - lua_pushstring (L, url->url->visible_part); + if (url != NULL && url->url->ext && url->url->ext->visible_part) { + lua_pushstring (L, url->url->ext->visible_part); } else { lua_pushnil (L); @@ -671,11 +679,9 @@ lua_url_to_table (lua_State *L) lua_settable (L, -3); } - if (u->port != 0) { - lua_pushstring (L, "port"); - lua_pushinteger (L, u->port); - lua_settable (L, -3); - } + lua_pushstring (L, "port"); + lua_pushinteger (L, rspamd_url_get_port(u)); + lua_settable (L, -3); if (u->tldlen > 0) { lua_pushstring (L, "tld");