diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-02-13 00:34:07 +0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-13 00:34:07 +0600 |
commit | 2cc55730c95794b427fd9fd9c1d9ad82e37b9696 (patch) | |
tree | a91530a9c1828f9698bb94ada41bee535abcd823 | |
parent | ced57f74834e021342210db9015f7a29da88dd87 (diff) | |
parent | f8210a5ba96334cc88ea876c1e496b132c1236c5 (diff) | |
download | rspamd-2cc55730c95794b427fd9fd9c1d9ad82e37b9696.tar.gz rspamd-2cc55730c95794b427fd9fd9c1d9ad82e37b9696.zip |
Merge pull request #4814 from rspamd/vstakhov-redirector-fragment
Fix issues with URL fragment in HTTP requests
-rw-r--r-- | src/libserver/http/http_message.c | 32 | ||||
-rw-r--r-- | src/libserver/http/http_message.h | 8 | ||||
-rw-r--r-- | src/lua/lua_url.c | 138 | ||||
-rw-r--r-- | test/rspamd_cxx_unit_utils.hxx | 29 |
4 files changed, 174 insertions, 33 deletions
diff --git a/src/libserver/http/http_message.c b/src/libserver/http/http_message.c index 670122d52..e02827a76 100644 --- a/src/libserver/http/http_message.c +++ b/src/libserver/http/http_message.c @@ -1,11 +1,11 @@ -/*- - * Copyright 2019 Vsevolod Stakhov +/* + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -85,7 +85,21 @@ rspamd_http_message_from_url(const gchar *url) } else { path = url + pu.field_data[UF_PATH].off; - pathlen = urllen - pu.field_data[UF_PATH].off; + pathlen = pu.field_data[UF_PATH].len; + + if (path > url && *(path - 1) == '/') { + path--; + pathlen++; + } + + + /* Include query if needed */ + if ((pu.field_set & (1 << UF_QUERY)) != 0) { + /* Include both ? and query */ + pathlen += pu.field_data[UF_QUERY].len + 1; + } + + /* Do not include fragment here! */ } msg = rspamd_http_new_message(HTTP_REQUEST); @@ -722,4 +736,14 @@ bool rspamd_http_message_is_standard_port(struct rspamd_http_message *msg) } return msg->port == 80; +} + +const gchar *rspamd_http_message_get_url(struct rspamd_http_message *msg, gsize *len) +{ + if (msg->url) { + *len = msg->url->len; + return msg->url->str; + } + + return NULL; }
\ No newline at end of file diff --git a/src/libserver/http/http_message.h b/src/libserver/http/http_message.h index fa8ed04c2..a483b316a 100644 --- a/src/libserver/http/http_message.h +++ b/src/libserver/http/http_message.h @@ -1,11 +1,11 @@ -/*- - * Copyright 2019 Vsevolod Stakhov +/* + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -247,6 +247,8 @@ const gchar *rspamd_http_message_get_http_host(struct rspamd_http_message *msg, */ bool rspamd_http_message_is_standard_port(struct rspamd_http_message *msg); +const gchar *rspamd_http_message_get_url(struct rspamd_http_message *msg, gsize *len); + #ifdef __cplusplus } #endif diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 913469f6d..c09eedea6 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -1,5 +1,5 @@ /* - * Copyright 2023 Vsevolod Stakhov + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,6 +74,7 @@ LUA_FUNCTION_DEF(url, lt); LUA_FUNCTION_DEF(url, eq); LUA_FUNCTION_DEF(url, get_order); LUA_FUNCTION_DEF(url, get_part_order); +LUA_FUNCTION_DEF(url, to_http); static const struct luaL_reg urllib_m[] = { LUA_INTERFACE_DEF(url, get_length), @@ -101,6 +102,7 @@ static const struct luaL_reg urllib_m[] = { LUA_INTERFACE_DEF(url, get_flags_num), LUA_INTERFACE_DEF(url, get_order), LUA_INTERFACE_DEF(url, get_part_order), + LUA_INTERFACE_DEF(url, to_http), {"get_redirected", lua_url_get_phished}, LUA_INTERFACE_DEF(url, set_redirected), {"__tostring", lua_url_tostring}, @@ -344,6 +346,89 @@ lua_url_tostring(lua_State *L) } /*** + * @method url:to_http() + * Get URL suitable for HTTP request (e.g. by trimming fragment and user parts) + * @return {string} url as a string + */ +static gint +lua_url_to_http(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_url *url = lua_check_url(L, 1); + + if (url != NULL && url->url != NULL) { + if (url->url->protocol == PROTOCOL_MAILTO) { + /* Nothing to do here */ + lua_pushnil(L); + } + else { + + if (url->url->userlen > 0) { + /* We need to reconstruct url :( */ + gsize len = url->url->urllen - url->url->fragmentlen + 1; + + /* Strip the # character */ + if (url->url->fragmentlen > 0 && len > 0) { + while (url->url->string[len - 1] == '#' && len > 0) { + len--; + } + } + gchar *nstr = g_malloc(len); + gchar *d = nstr, *end = nstr + len; + memcpy(nstr, url->url->string, url->url->protocollen); + d += url->url->protocollen; + *d++ = ':'; + *d++ = '/'; + *d++ = '/'; + + /* Host part */ + memcpy(d, rspamd_url_host(url->url), url->url->hostlen); + d += url->url->hostlen; + + int port = rspamd_url_get_port_if_special(url->url); + + if (port > 0) { + d += rspamd_snprintf(d, end - d, ":%d/", port); + } + else { + *d++ = '/'; + } + + if (url->url->datalen > 0) { + memcpy(d, rspamd_url_data_unsafe(url->url), url->url->datalen); + d += url->url->datalen; + } + + if (url->url->querylen > 0) { + *d++ = '?'; + memcpy(d, rspamd_url_query_unsafe(url->url), url->url->querylen); + d += url->url->querylen; + } + + g_assert(d < end); + lua_pushlstring(L, nstr, d - nstr); + } + else { + gsize len = url->url->urllen - url->url->fragmentlen; + + /* Strip the # character */ + if (url->url->fragmentlen > 0 && len > 0) { + while (url->url->string[len - 1] == '#' && len > 0) { + len--; + } + } + lua_pushlstring(L, url->url->string, len); + } + } + } + else { + lua_pushnil(L); + } + + return 1; +} + +/*** * @method url:get_raw() * Get full content of the url as it was parsed (e.g. with urldecode) * @return {string} url string @@ -773,38 +858,41 @@ lua_url_create(lua_State *L) } else { pool = static_lua_url_pool; - t = lua_check_text_or_string(L, 2); + t = lua_check_text_or_string(L, 1); } - if (pool == NULL || t == NULL) { - return luaL_error(L, "invalid arguments"); + if (pool == NULL) { + return luaL_error(L, "invalid arguments: mempool is expected as the second argument"); } - else { - rspamd_url_find_single(pool, t->start, t->len, RSPAMD_URL_FIND_ALL, - lua_url_single_inserter, L); - if (lua_type(L, -1) != LUA_TUSERDATA) { - /* URL is actually not found */ - lua_pushnil(L); + if (t == NULL) { + return luaL_error(L, "invalid arguments: string/text is expected as the first argument"); + } - return 1; - } + rspamd_url_find_single(pool, t->start, t->len, RSPAMD_URL_FIND_ALL, + lua_url_single_inserter, L); - u = (struct rspamd_lua_url *) lua_touserdata(L, -1); + if (lua_type(L, -1) != LUA_TUSERDATA) { + /* URL is actually not found */ + lua_pushnil(L); - if (lua_type(L, 3) == LUA_TTABLE) { - /* Add flags */ - for (lua_pushnil(L); lua_next(L, 3); lua_pop(L, 1)) { - int nmask = 0; - const gchar *fname = lua_tostring(L, -1); + return 1; + } - if (rspamd_url_flag_from_string(fname, &nmask)) { - u->url->flags |= nmask; - } - else { - lua_pop(L, 1); - return luaL_error(L, "invalid flag: %s", fname); - } + u = (struct rspamd_lua_url *) lua_touserdata(L, -1); + + if (lua_type(L, 3) == LUA_TTABLE) { + /* Add flags */ + for (lua_pushnil(L); lua_next(L, 3); lua_pop(L, 1)) { + int nmask = 0; + const gchar *fname = lua_tostring(L, -1); + + if (rspamd_url_flag_from_string(fname, &nmask)) { + u->url->flags |= nmask; + } + else { + lua_pop(L, 1); + return luaL_error(L, "invalid flag: %s", fname); } } } diff --git a/test/rspamd_cxx_unit_utils.hxx b/test/rspamd_cxx_unit_utils.hxx index 126253fd6..1dea7af9d 100644 --- a/test/rspamd_cxx_unit_utils.hxx +++ b/test/rspamd_cxx_unit_utils.hxx @@ -1,5 +1,5 @@ /* - * Copyright 2023 Vsevolod Stakhov + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include "libmime/mime_headers.h" #include "contrib/libottery/ottery.h" #include "libcryptobox/cryptobox.h" +#include "libserver/http/http_message.h" #include <vector> #include <utility> @@ -204,6 +205,32 @@ TEST_SUITE("rspamd_utils") } } } + + TEST_CASE("rspamd_http_message_from_url") + { + std::vector<std::pair<std::string, std::string>> cases{ + {"http://example.com", "/"}, + {"http://example.com/", "/"}, + {"http://example.com/lol", "/lol"}, + {"http://example.com/lol#keke", "/lol"}, + {"http://example.com/lol?omg=huh&oh", "/lol?omg=huh&oh"}, + {"http://example.com/lol?omg=huh&oh#", "/lol?omg=huh&oh"}, + {"http://example.com/lol?omg=huh&oh#keke", "/lol?omg=huh&oh"}, + {"http://example.com/lol?", "/lol"}, + {"http://example.com/lol?#", "/lol"}, + }; + + for (const auto &c: cases) { + SUBCASE(("rspamd_http_message_from_url: " + c.first).c_str()) + { + auto *msg = rspamd_http_message_from_url(c.first.c_str()); + std::size_t nlen; + auto *path = rspamd_http_message_get_url(msg, &nlen); + CHECK(std::string{path, nlen} == c.second); + rspamd_http_message_unref(msg); + } + } + } } #endif |