aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-03-06 13:14:41 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-03-09 10:46:11 +0000
commit3e3b94276f03f520bcd1756876c1077f250127d9 (patch)
tree950caa40276702d2bf2532deb87f4f2acda01f51
parentcd270c51b2ccd814804e4f17eb31dc7d91a69980 (diff)
downloadrspamd-3e3b94276f03f520bcd1756876c1077f250127d9.tar.gz
rspamd-3e3b94276f03f520bcd1756876c1077f250127d9.zip
[Rework] Rework URL structure: more structure optimisations
-rw-r--r--src/libserver/html.c4
-rw-r--r--src/libserver/url.c114
-rw-r--r--src/libserver/url.h37
-rw-r--r--src/lua/lua_url.c12
4 files changed, 98 insertions, 69 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 7dca72453..e1a211d2c 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1631,7 +1631,7 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
if (url->querylen > 0) {
- if (rspamd_url_find (pool, url->query, url->querylen, &url_str,
+ if (rspamd_url_find (pool, rspamd_url_query_unsafe (url), url->querylen, &url_str,
RSPAMD_URL_FIND_ALL,
NULL, &prefix_added)) {
query_url = rspamd_mempool_alloc0 (pool,
@@ -1646,7 +1646,7 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
if (rc == URI_ERRNO_OK &&
query_url->hostlen > 0) {
msg_debug_html ("found url %s in query of url"
- " %*s", url_str, url->querylen, url->query);
+ " %*s", url_str, url->querylen, rspamd_url_query_unsafe (url));
if (query_url->protocol == PROTOCOL_MAILTO) {
target_tbl = tbl_emails;
diff --git a/src/libserver/url.c b/src/libserver/url.c
index ac4c11916..7e85a460e 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1573,6 +1573,7 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a
rspamd_mempool_t *pool)
{
gchar *strbuf, *p;
+ const gchar *start_offset;
gsize slen = uri->urllen - uri->hostlen;
goffset r = 0;
@@ -1589,39 +1590,46 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a
(gint)(uri->hostshift),
uri->string);
uri->hostshift = r;
+ start_offset = strbuf + r;
inet_ntop (af, addr, strbuf + r, slen - r + 1);
- uri->hostlen = strlen (rspamd_url_host_unsafe (uri));
+ uri->hostlen = strlen (start_offset);
r += uri->hostlen;
- uri->tld = rspamd_url_host_unsafe (uri);
+ uri->tld = (const gchar *)start_offset;
uri->tldlen = uri->hostlen;
uri->flags |= RSPAMD_URL_FLAG_NUMERIC;
/* Reconstruct URL */
if (uri->datalen > 0) {
- p = strbuf + r + 1;
+ p = strbuf + r;
+ start_offset = p + 1;
r += rspamd_snprintf (strbuf + r, slen - r, "/%*s",
(gint)uri->datalen,
- uri->data);
- uri->data = p;
+ rspamd_url_data_unsafe (uri));
+ uri->datashift = start_offset - strbuf;
}
else {
/* Add trailing slash if needed */
- r += rspamd_snprintf (strbuf + r, slen - r, "/");
+ if (uri->hostlen + uri->hostshift < uri->urllen &&
+ *(rspamd_url_host_unsafe (uri) + uri->hostlen) == '/') {
+ r += rspamd_snprintf (strbuf + r, slen - r, "/");
+ }
}
if (uri->querylen > 0) {
- p = strbuf + r + 1;
+ p = strbuf + r;
+ start_offset = p + 1;
r += rspamd_snprintf (strbuf + r, slen - r, "?%*s",
(gint)uri->querylen,
- uri->query);
- uri->query = p;
+ rspamd_url_query_unsafe (uri));
+ uri->queryshift = start_offset - strbuf;
}
if (uri->fragmentlen > 0) {
- p = strbuf + r + 1;
+ p = strbuf + r;
+ start_offset = p + 1;
r += rspamd_snprintf (strbuf + r, slen - r, "#%*s",
(gint)uri->fragmentlen,
- uri->fragment);
- uri->fragment = p;
+ rspamd_url_fragment_unsafe (uri));
+ uri->fragmentshift = start_offset - strbuf;
}
uri->string = strbuf;
@@ -1832,9 +1840,10 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen,
old_shift = uri->datalen;
uri->datalen -= shift;
- remain = (uri->urllen - (uri->data - uri->string)) - old_shift;
+ remain = (uri->urllen - (uri->datashift)) - old_shift;
g_assert (remain >= 0);
- memmove (uri->data + uri->datalen, uri->data + old_shift,
+ memmove (rspamd_url_data_unsafe (uri) + uri->datalen,
+ rspamd_url_data_unsafe (uri) + old_shift,
remain);
uri->urllen -= shift;
uri->flags |= RSPAMD_URL_FLAG_PATHENCODED;
@@ -1849,9 +1858,10 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen,
old_shift = uri->querylen;
uri->querylen -= shift;
- remain = (uri->urllen - (uri->query - uri->string)) - old_shift;
+ remain = (uri->urllen - (uri->queryshift)) - old_shift;
g_assert (remain >= 0);
- memmove (uri->query + uri->querylen, uri->query + old_shift,
+ memmove (rspamd_url_query_unsafe (uri) + uri->querylen,
+ rspamd_url_query_unsafe (uri) + old_shift,
remain);
uri->urllen -= shift;
uri->flags |= RSPAMD_URL_FLAG_QUERYENCODED;
@@ -1881,21 +1891,25 @@ rspamd_url_shift (struct rspamd_url *uri, gsize nlen,
uri->hostshift -= shift;
}
/* Go forward */
+ /* FALLTHRU */
case UF_HOST:
if (uri->datalen > 0) {
- uri->data -= shift;
+ uri->datashift -= shift;
}
/* Go forward */
+ /* FALLTHRU */
case UF_PATH:
if (uri->querylen > 0) {
- uri->query -= shift;
+ uri->queryshift -= shift;
}
/* Go forward */
+ /* FALLTHRU */
case UF_QUERY:
if (uri->fragmentlen > 0) {
- uri->fragment -= shift;
+ uri->fragmentshift -= shift;
}
/* Go forward */
+ /* FALLTHRU */
case UF_FRAGMENT:
default:
break;
@@ -1943,7 +1957,7 @@ rspamd_url_parse (struct rspamd_url *uri,
enum rspamd_url_parse_flags parse_flags)
{
struct http_parser_url u;
- gchar *p, *comp;
+ gchar *p;
const gchar *end;
guint i, complen, ret, flags = 0;
guint unquoted_len = 0;
@@ -2015,31 +2029,36 @@ rspamd_url_parse (struct rspamd_url *uri,
for (i = 0; i < UF_MAX; i++) {
if (u.field_set & (1 << i)) {
- comp = uri->string + u.field_data[i].off;
+ guint shift = u.field_data[i].off;
complen = u.field_data[i].len;
+ if (complen >= G_MAXUINT16) {
+ /* Too large component length */
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
switch (i) {
case UF_SCHEMA:
uri->protocollen = u.field_data[i].len;
break;
case UF_HOST:
- uri->hostshift = u.field_data[i].off;
+ uri->hostshift = shift;
uri->hostlen = complen;
break;
case UF_PATH:
- uri->data = comp;
+ uri->datashift = shift;
uri->datalen = complen;
break;
case UF_QUERY:
- uri->query = comp;
+ uri->queryshift = shift;
uri->querylen = complen;
break;
case UF_FRAGMENT:
- uri->fragment = comp;
+ uri->fragmentshift = shift;
uri->fragmentlen = complen;
break;
case UF_USERINFO:
- uri->usershift = u.field_data[i].off;
+ uri->usershift = shift;
uri->userlen = complen;
break;
default:
@@ -2129,31 +2148,36 @@ rspamd_url_parse (struct rspamd_url *uri,
/* Process data part */
if (uri->datalen) {
- unquoted_len = rspamd_url_decode (uri->data, uri->data, uri->datalen);
- if (rspamd_normalise_unicode_inplace (pool, uri->data, &unquoted_len)) {
+ unquoted_len = rspamd_url_decode (rspamd_url_data_unsafe (uri),
+ rspamd_url_data_unsafe (uri), uri->datalen);
+ if (rspamd_normalise_unicode_inplace (pool, rspamd_url_data_unsafe (uri),
+ &unquoted_len)) {
uri->flags |= RSPAMD_URL_FLAG_UNNORMALISED;
}
rspamd_url_shift (uri, unquoted_len, UF_PATH);
/* We now normalize path */
- rspamd_http_normalize_path_inplace (uri->data, uri->datalen, &unquoted_len);
+ rspamd_http_normalize_path_inplace (rspamd_url_data_unsafe (uri),
+ uri->datalen, &unquoted_len);
rspamd_url_shift (uri, unquoted_len, UF_PATH);
}
if (uri->querylen) {
- unquoted_len = rspamd_url_decode (uri->query,
- uri->query,
+ unquoted_len = rspamd_url_decode (rspamd_url_query_unsafe (uri),
+ rspamd_url_query_unsafe (uri),
uri->querylen);
- if (rspamd_normalise_unicode_inplace (pool, uri->query, &unquoted_len)) {
+ if (rspamd_normalise_unicode_inplace (pool, rspamd_url_query_unsafe (uri),
+ &unquoted_len)) {
uri->flags |= RSPAMD_URL_FLAG_UNNORMALISED;
}
rspamd_url_shift (uri, unquoted_len, UF_QUERY);
}
if (uri->fragmentlen) {
- unquoted_len = rspamd_url_decode (uri->fragment,
- uri->fragment,
+ unquoted_len = rspamd_url_decode (rspamd_url_fragment_unsafe (uri),
+ rspamd_url_fragment_unsafe (uri),
uri->fragmentlen);
- if (rspamd_normalise_unicode_inplace (pool, uri->fragment, &unquoted_len)) {
+ if (rspamd_normalise_unicode_inplace (pool, rspamd_url_fragment_unsafe (uri),
+ &unquoted_len)) {
uri->flags |= RSPAMD_URL_FLAG_UNNORMALISED;
}
rspamd_url_shift (uri, unquoted_len, UF_FRAGMENT);
@@ -3148,7 +3172,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
/* We also search the query for additional url inside */
if (url->querylen > 0) {
- if (rspamd_url_find (task->task_pool, url->query, url->querylen,
+ if (rspamd_url_find (task->task_pool, rspamd_url_query_unsafe (url), url->querylen,
&url_str, RSPAMD_URL_FIND_ALL, NULL, &prefix_added)) {
query_url = rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct rspamd_url));
@@ -3161,7 +3185,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
if (rc == URI_ERRNO_OK &&
query_url->hostlen > 0) {
msg_debug_task ("found url %s in query of url"
- " %*s", url_str, url->querylen, url->query);
+ " %*s", url_str, url->querylen, rspamd_url_query_unsafe (url));
if (prefix_added) {
query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
@@ -3314,7 +3338,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
/* We also search the query for additional url inside */
if (url->querylen > 0) {
- if (rspamd_url_find (task->task_pool, url->query, url->querylen,
+ if (rspamd_url_find (task->task_pool, rspamd_url_query_unsafe (url), url->querylen,
&url_str, RSPAMD_URL_FIND_ALL, NULL, &prefix_added)) {
query_url = rspamd_mempool_alloc0 (task->task_pool,
@@ -3328,7 +3352,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
if (rc == URI_ERRNO_OK &&
url->hostlen > 0) {
msg_debug_task ("found url %s in query of url"
- " %*s", url_str, url->querylen, url->query);
+ " %*s", url_str, url->querylen, rspamd_url_query_unsafe (url));
if (prefix_added) {
query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
@@ -3651,11 +3675,11 @@ rspamd_url_encode (struct rspamd_url *url, gsize *pdlen,
RSPAMD_URL_FLAGS_HOSTSAFE);
CHECK_URL_COMPONENT (rspamd_url_user_unsafe(url), url->userlen,
RSPAMD_URL_FLAGS_USERSAFE);
- CHECK_URL_COMPONENT ((guchar *)url->data, url->datalen,
+ CHECK_URL_COMPONENT (rspamd_url_data_unsafe (url), url->datalen,
RSPAMD_URL_FLAGS_PATHSAFE);
- CHECK_URL_COMPONENT ((guchar *)url->query, url->querylen,
+ CHECK_URL_COMPONENT (rspamd_url_query_unsafe (url), url->querylen,
RSPAMD_URL_FLAGS_QUERYSAFE);
- CHECK_URL_COMPONENT ((guchar *)url->fragment, url->fragmentlen,
+ CHECK_URL_COMPONENT (rspamd_url_fragment_unsafe (url), url->fragmentlen,
RSPAMD_URL_FLAGS_FRAGMENTSAFE);
if (dlen == 0) {
@@ -3698,19 +3722,19 @@ rspamd_url_encode (struct rspamd_url *url, gsize *pdlen,
if (url->datalen > 0) {
*d++ = '/';
- ENCODE_URL_COMPONENT ((guchar *)url->data, url->datalen,
+ ENCODE_URL_COMPONENT (rspamd_url_data_unsafe (url), url->datalen,
RSPAMD_URL_FLAGS_PATHSAFE);
}
if (url->querylen > 0) {
*d++ = '?';
- ENCODE_URL_COMPONENT ((guchar *)url->query, url->querylen,
+ ENCODE_URL_COMPONENT (rspamd_url_query_unsafe (url), url->querylen,
RSPAMD_URL_FLAGS_QUERYSAFE);
}
if (url->fragmentlen > 0) {
*d++ = '#';
- ENCODE_URL_COMPONENT ((guchar *)url->fragment, url->fragmentlen,
+ ENCODE_URL_COMPONENT (rspamd_url_fragment_unsafe (url), url->fragmentlen,
RSPAMD_URL_FLAGS_FRAGMENTSAFE);
}
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 080f005c3..87766c4e6 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -44,33 +44,34 @@ struct rspamd_url_tag {
struct rspamd_url {
gchar *raw;
gchar *string;
- guint protocol;
- guint port;
- guint usershift;
- guint userlen;
+ guint16 protocol;
+ guint16 port;
+ guint usershift;
guint hostshift;
- guint hostlen;
+ guint datashift;
+ guint queryshift;
+ guint fragmentshift;
- gchar *data;
- gchar *query;
- gchar *fragment;
gchar *tld;
gchar *visible_part;
struct rspamd_url *phished_url;
- guint protocollen;
- guint datalen;
- guint querylen;
- guint fragmentlen;
- guint tldlen;
guint urllen;
guint rawlen;
+ guint32 flags;
- enum rspamd_url_flags flags;
- guint count;
+ guint16 protocollen;
+ guint16 userlen;
+ guint16 hostlen;
+ guint16 datalen;
+ guint16 querylen;
+ guint16 fragmentlen;
+ guint16 tldlen;
+
+ guint16 count;
};
#define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL)
@@ -79,6 +80,10 @@ struct rspamd_url {
#define rspamd_url_host(u) ((u)->hostlen > 0 ? (u)->string + (u)->hostshift : NULL)
#define rspamd_url_host_unsafe(u) ((u)->string + (u)->hostshift)
+#define rspamd_url_data_unsafe(u) ((u)->string + (u)->datashift)
+#define rspamd_url_query_unsafe(u) ((u)->string + (u)->queryshift)
+#define rspamd_url_fragment_unsafe(u) ((u)->string + (u)->fragmentshift)
+
enum uri_errno {
URI_ERRNO_OK = 0, /* Parsing went well */
URI_ERRNO_EMPTY, /* The URI string was empty */
@@ -97,7 +102,7 @@ enum rspamd_url_protocol {
PROTOCOL_HTTPS = 1u << 3u,
PROTOCOL_MAILTO = 1u << 4u,
PROTOCOL_TELEPHONE = 1u << 5u,
- PROTOCOL_UNKNOWN = 1u << 31u,
+ PROTOCOL_UNKNOWN = 1u << 15u,
};
enum rspamd_url_parse_flags {
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index bd94120e2..cb54a694c 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -220,7 +220,7 @@ lua_url_get_path (lua_State *L)
struct rspamd_lua_url *url = lua_check_url (L, 1);
if (url != NULL && url->url->datalen > 0) {
- lua_pushlstring (L, url->url->data, url->url->datalen);
+ lua_pushlstring (L, rspamd_url_data_unsafe (url->url), url->url->datalen);
}
else {
lua_pushnil (L);
@@ -241,7 +241,7 @@ lua_url_get_query (lua_State *L)
struct rspamd_lua_url *url = lua_check_url (L, 1);
if (url != NULL && url->url->querylen > 0) {
- lua_pushlstring (L, url->url->query, url->url->querylen);
+ lua_pushlstring (L, rspamd_url_query_unsafe (url->url), url->url->querylen);
}
else {
lua_pushnil (L);
@@ -262,7 +262,7 @@ lua_url_get_fragment (lua_State *L)
struct rspamd_lua_url *url = lua_check_url (L, 1);
if (url != NULL && url->url->fragmentlen > 0) {
- lua_pushlstring (L, url->url->fragment, url->url->fragmentlen);
+ lua_pushlstring (L, rspamd_url_fragment_unsafe (url->url), url->url->fragmentlen);
}
else {
lua_pushnil (L);
@@ -684,19 +684,19 @@ lua_url_to_table (lua_State *L)
if (u->datalen > 0) {
lua_pushstring (L, "path");
- lua_pushlstring (L, u->data, u->datalen);
+ lua_pushlstring (L, rspamd_url_data_unsafe (u), u->datalen);
lua_settable (L, -3);
}
if (u->querylen > 0) {
lua_pushstring (L, "query");
- lua_pushlstring (L, u->query, u->querylen);
+ lua_pushlstring (L, rspamd_url_query_unsafe (u), u->querylen);
lua_settable (L, -3);
}
if (u->fragmentlen > 0) {
lua_pushstring (L, "fragment");
- lua_pushlstring (L, u->fragment, u->fragmentlen);
+ lua_pushlstring (L, rspamd_url_fragment_unsafe (u), u->fragmentlen);
lua_settable (L, -3);
}