]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Reorganise struct rspamd_url to be 64 bytes size
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 23 Jul 2023 19:41:02 +0000 (20:41 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 23 Jul 2023 19:41:02 +0000 (20:41 +0100)
src/libserver/html/html_url.cxx
src/libserver/protocol.c
src/libserver/url.c
src/libserver/url.h
src/lua/lua_url.c

index 0068ea30f591c6a02fa96c46847e26e40431fa4e..ae2514ba157a300f9a27daaea634781e28ca633a 100644 (file)
@@ -183,8 +183,12 @@ html_url_is_phished(rspamd_mempool_t *pool,
 
                                                if (!rspamd_url_is_subdomain(disp_tok, href_tok)) {
                                                        href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
-                                                       href_url->linked_url = text_url;
                                                        text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+
+                                                       if (href_url->ext == nullptr) {
+                                                               href_url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+                                                       }
+                                                       href_url->ext->linked_url = text_url;
                                                }
                                        }
                                }
@@ -241,18 +245,21 @@ html_check_displayed_url(rspamd_mempool_t *pool,
                return;
        }
 
-       url->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
-       rspamd_strlcpy(url->visible_part,
+       if (url->ext == nullptr) {
+               url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+       }
+       url->ext->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
+       rspamd_strlcpy(url->ext->visible_part,
                        visible_part.data(),
                        visible_part.size() + 1);
        dlen = visible_part.size();
 
        /* Strip unicode spaces from the start and the end */
-       url->visible_part = const_cast<char *>(
-                       rspamd_string_unicode_trim_inplace(url->visible_part,
+       url->ext->visible_part = const_cast<char *>(
+                       rspamd_string_unicode_trim_inplace(url->ext->visible_part,
                        &dlen));
        auto maybe_url = html_url_is_phished(pool, url,
-                       {url->visible_part, dlen});
+                       {url->ext->visible_part, dlen});
 
        if (maybe_url) {
                url->flags |= saved_flags;
@@ -300,7 +307,7 @@ html_check_displayed_url(rspamd_mempool_t *pool,
                }
        }
 
-       rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
+       rspamd_normalise_unicode_inplace(url->ext->visible_part, &dlen);
 }
 
 auto
index 3708d823fd84240fd92d9ed5c92e56e55961dc4b..1d1214c275eccd24741ac5e4cdfa4d4226fbd765 100644 (file)
@@ -909,9 +909,9 @@ rspamd_protocol_extended_url (struct rspamd_task *task,
 
        ucl_object_insert_key (obj, flags, "flags", 0, false);
 
-       if (url->linked_url) {
-               encoded = rspamd_url_encode (url->linked_url, &enclen, task->task_pool);
-               elt = rspamd_protocol_extended_url (task, url->linked_url, encoded,
+       if (url->ext && url->ext->linked_url) {
+               encoded = rspamd_url_encode (url->ext->linked_url, &enclen, task->task_pool);
+               elt = rspamd_protocol_extended_url (task, url->ext->linked_url, encoded,
                                enclen);
                ucl_object_insert_key (obj, elt, "linked_url", 0, false);
        }
index 824dc05cc1b8a40dcf369169c6e958e18a875bd8..0deede0680d0d12757785e642bcb66c5046d930b 100644 (file)
@@ -1797,11 +1797,11 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a
        uri->flags |= RSPAMD_URL_FLAG_NUMERIC;
 
        /* Reconstruct URL */
-       if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT) {
+       if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT && uri->ext) {
                p = strbuf + r;
                start_offset = p + 1;
                r += rspamd_snprintf (strbuf + r, slen - r, ":%ud",
-                               (unsigned int)uri->port);
+                               (unsigned int)uri->ext->port);
        }
        if (uri->datalen > 0) {
                p = strbuf + r;
@@ -2351,7 +2351,6 @@ rspamd_url_parse (struct rspamd_url *uri,
                }
        }
 
-       uri->port = u.port;
        uri->flags = flags;
 
        if (!uri->hostlen) {
index 0b326869b25b61cfca3e0a0c1c51cdd1f40a3c3c..9c5b7be280b495274e319edf04cbecb7389f8d04 100644 (file)
@@ -52,35 +52,46 @@ struct rspamd_url_tag {
        struct rspamd_url_tag *prev, *next;
 };
 
-
+struct rspamd_url_ext;
+/**
+ * URL structure
+ */
 struct rspamd_url {
-       gchar *string;
-       gchar *raw;
+       char *string;
+       char *raw;
+       struct rspamd_url_ext *ext;
 
-       gchar *visible_part;
-       struct rspamd_url *linked_url;
+       uint32_t flags;
 
-       guint32 flags;
+       uint8_t protocol;
+       uint8_t protocollen;
 
-       guint8 protocol;
-       guint8 protocollen;
-
-       guint16 port;
+       uint16_t hostshift;
+       uint16_t datashift;
+       uint16_t queryshift;
+       uint16_t fragmentshift;
+       uint16_t tldshift;
        guint16 usershift;
-       guint16 hostshift;
-       guint16 datashift;
-       guint16 queryshift;
-       guint16 fragmentshift;
-       guint16 tldshift;
        guint16 userlen;
-       guint16 hostlen;
-       guint16 datalen;
-       guint16 querylen;
-       guint16 fragmentlen;
-       guint16 tldlen;
-       guint16 count;
-       guint16 urllen;
-       guint16 rawlen;
+
+       uint16_t hostlen;
+       uint16_t datalen;
+       uint16_t querylen;
+       uint16_t fragmentlen;
+       uint16_t tldlen;
+       uint16_t count;
+       uint16_t urllen;
+       uint16_t rawlen;
+};
+
+/**
+ * Rarely used url fields
+ */
+struct rspamd_url_ext {
+       gchar *visible_part;
+       struct rspamd_url *linked_url;
+
+       guint16 port;
 };
 
 #define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL)
@@ -350,6 +361,22 @@ int rspamd_url_cmp(const struct rspamd_url *u1, const struct rspamd_url *u2);
  */
 int rspamd_url_cmp_qsort(const void *u1, const void *u2);
 
+static inline uint16_t rspamd_url_get_port(struct rspamd_url *u)
+{
+       if (u->flags & RSPAMD_URL_FLAG_HAS_PORT && u->ext) {
+               return u->ext->port;
+       }
+       else {
+               /* Assume standard port */
+               if (u->protocol == PROTOCOL_HTTPS) {
+                       return 443;
+               }
+               else {
+                       return 80;
+               }
+       }
+}
+
 /**
  * Normalize unicode input and set out url flags as appropriate
  * @param pool
index 39b0293aae8e5d5b19979b6482a01f6e3b374ccd..a46f4e2769f32e33c1852e9c1496ad039fe40c6b 100644 (file)
@@ -186,7 +186,7 @@ lua_url_get_port (lua_State *L)
        struct rspamd_lua_url *url = lua_check_url (L, 1);
 
        if (url != NULL) {
-               lua_pushinteger (L, url->url->port);
+               lua_pushinteger (L, rspamd_url_get_port(url->url));
        }
        else {
                lua_pushnil (L);
@@ -475,12 +475,13 @@ lua_url_get_phished (lua_State *L)
        struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);
 
        if (url) {
-               if (url->url->linked_url != NULL) {
+               if (url->url->ext && url->url->ext->linked_url != NULL) {
+                       /* XXX: in fact, this is the only possible combination of flags, so this check is redundant */
                        if (url->url->flags &
                                        (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
                                purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
                                rspamd_lua_setclass (L, "rspamd{url}", -1);
-                               purl->url = url->url->linked_url;
+                               purl->url = url->url->ext->linked_url;
 
                                return 1;
                        }
@@ -535,7 +536,11 @@ lua_url_set_redirected (lua_State *L)
                        redir = lua_check_url (L, -1);
 
                        url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
-                       url->url->linked_url = redir->url;
+
+                       if (url->url->ext == NULL) {
+                               url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+                       }
+                       url->url->ext->linked_url = redir->url;
                }
        }
        else {
@@ -546,7 +551,10 @@ lua_url_set_redirected (lua_State *L)
                }
 
                url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
-               url->url->linked_url = redir->url;
+               if (url->url->ext == NULL) {
+                       url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+               }
+               url->url->ext->linked_url = redir->url;
 
                /* Push back on stack */
                lua_pushvalue (L, 2);
@@ -629,8 +637,8 @@ lua_url_get_visible (lua_State *L)
        LUA_TRACE_POINT;
        struct rspamd_lua_url *url = lua_check_url (L, 1);
 
-       if (url != NULL && url->url->visible_part) {
-               lua_pushstring (L, url->url->visible_part);
+       if (url != NULL && url->url->ext && url->url->ext->visible_part) {
+               lua_pushstring (L, url->url->ext->visible_part);
        }
        else {
                lua_pushnil (L);
@@ -671,11 +679,9 @@ lua_url_to_table (lua_State *L)
                        lua_settable (L, -3);
                }
 
-               if (u->port != 0) {
-                       lua_pushstring (L, "port");
-                       lua_pushinteger (L, u->port);
-                       lua_settable (L, -3);
-               }
+               lua_pushstring (L, "port");
+               lua_pushinteger (L, rspamd_url_get_port(u));
+               lua_settable (L, -3);
 
                if (u->tldlen > 0) {
                        lua_pushstring (L, "tld");