summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-02-13 00:34:07 +0600
committerGitHub <noreply@github.com>2024-02-13 00:34:07 +0600
commit2cc55730c95794b427fd9fd9c1d9ad82e37b9696 (patch)
treea91530a9c1828f9698bb94ada41bee535abcd823
parentced57f74834e021342210db9015f7a29da88dd87 (diff)
parentf8210a5ba96334cc88ea876c1e496b132c1236c5 (diff)
downloadrspamd-2cc55730c95794b427fd9fd9c1d9ad82e37b9696.tar.gz
rspamd-2cc55730c95794b427fd9fd9c1d9ad82e37b9696.zip
Merge pull request #4814 from rspamd/vstakhov-redirector-fragment
Fix issues with URL fragment in HTTP requests
-rw-r--r--src/libserver/http/http_message.c32
-rw-r--r--src/libserver/http/http_message.h8
-rw-r--r--src/lua/lua_url.c138
-rw-r--r--test/rspamd_cxx_unit_utils.hxx29
4 files changed, 174 insertions, 33 deletions
diff --git a/src/libserver/http/http_message.c b/src/libserver/http/http_message.c
index 670122d52..e02827a76 100644
--- a/src/libserver/http/http_message.c
+++ b/src/libserver/http/http_message.c
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2019 Vsevolod Stakhov
+/*
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -85,7 +85,21 @@ rspamd_http_message_from_url(const gchar *url)
}
else {
path = url + pu.field_data[UF_PATH].off;
- pathlen = urllen - pu.field_data[UF_PATH].off;
+ pathlen = pu.field_data[UF_PATH].len;
+
+ if (path > url && *(path - 1) == '/') {
+ path--;
+ pathlen++;
+ }
+
+
+ /* Include query if needed */
+ if ((pu.field_set & (1 << UF_QUERY)) != 0) {
+ /* Include both ? and query */
+ pathlen += pu.field_data[UF_QUERY].len + 1;
+ }
+
+ /* Do not include fragment here! */
}
msg = rspamd_http_new_message(HTTP_REQUEST);
@@ -722,4 +736,14 @@ bool rspamd_http_message_is_standard_port(struct rspamd_http_message *msg)
}
return msg->port == 80;
+}
+
+const gchar *rspamd_http_message_get_url(struct rspamd_http_message *msg, gsize *len)
+{
+ if (msg->url) {
+ *len = msg->url->len;
+ return msg->url->str;
+ }
+
+ return NULL;
} \ No newline at end of file
diff --git a/src/libserver/http/http_message.h b/src/libserver/http/http_message.h
index fa8ed04c2..a483b316a 100644
--- a/src/libserver/http/http_message.h
+++ b/src/libserver/http/http_message.h
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2019 Vsevolod Stakhov
+/*
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -247,6 +247,8 @@ const gchar *rspamd_http_message_get_http_host(struct rspamd_http_message *msg,
*/
bool rspamd_http_message_is_standard_port(struct rspamd_http_message *msg);
+const gchar *rspamd_http_message_get_url(struct rspamd_http_message *msg, gsize *len);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 913469f6d..c09eedea6 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -74,6 +74,7 @@ LUA_FUNCTION_DEF(url, lt);
LUA_FUNCTION_DEF(url, eq);
LUA_FUNCTION_DEF(url, get_order);
LUA_FUNCTION_DEF(url, get_part_order);
+LUA_FUNCTION_DEF(url, to_http);
static const struct luaL_reg urllib_m[] = {
LUA_INTERFACE_DEF(url, get_length),
@@ -101,6 +102,7 @@ static const struct luaL_reg urllib_m[] = {
LUA_INTERFACE_DEF(url, get_flags_num),
LUA_INTERFACE_DEF(url, get_order),
LUA_INTERFACE_DEF(url, get_part_order),
+ LUA_INTERFACE_DEF(url, to_http),
{"get_redirected", lua_url_get_phished},
LUA_INTERFACE_DEF(url, set_redirected),
{"__tostring", lua_url_tostring},
@@ -344,6 +346,89 @@ lua_url_tostring(lua_State *L)
}
/***
+ * @method url:to_http()
+ * Get URL suitable for HTTP request (e.g. by trimming fragment and user parts)
+ * @return {string} url as a string
+ */
+static gint
+lua_url_to_http(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_url *url = lua_check_url(L, 1);
+
+ if (url != NULL && url->url != NULL) {
+ if (url->url->protocol == PROTOCOL_MAILTO) {
+ /* Nothing to do here */
+ lua_pushnil(L);
+ }
+ else {
+
+ if (url->url->userlen > 0) {
+ /* We need to reconstruct url :( */
+ gsize len = url->url->urllen - url->url->fragmentlen + 1;
+
+ /* Strip the # character */
+ if (url->url->fragmentlen > 0 && len > 0) {
+ while (url->url->string[len - 1] == '#' && len > 0) {
+ len--;
+ }
+ }
+ gchar *nstr = g_malloc(len);
+ gchar *d = nstr, *end = nstr + len;
+ memcpy(nstr, url->url->string, url->url->protocollen);
+ d += url->url->protocollen;
+ *d++ = ':';
+ *d++ = '/';
+ *d++ = '/';
+
+ /* Host part */
+ memcpy(d, rspamd_url_host(url->url), url->url->hostlen);
+ d += url->url->hostlen;
+
+ int port = rspamd_url_get_port_if_special(url->url);
+
+ if (port > 0) {
+ d += rspamd_snprintf(d, end - d, ":%d/", port);
+ }
+ else {
+ *d++ = '/';
+ }
+
+ if (url->url->datalen > 0) {
+ memcpy(d, rspamd_url_data_unsafe(url->url), url->url->datalen);
+ d += url->url->datalen;
+ }
+
+ if (url->url->querylen > 0) {
+ *d++ = '?';
+ memcpy(d, rspamd_url_query_unsafe(url->url), url->url->querylen);
+ d += url->url->querylen;
+ }
+
+ g_assert(d < end);
+ lua_pushlstring(L, nstr, d - nstr);
+ }
+ else {
+ gsize len = url->url->urllen - url->url->fragmentlen;
+
+ /* Strip the # character */
+ if (url->url->fragmentlen > 0 && len > 0) {
+ while (url->url->string[len - 1] == '#' && len > 0) {
+ len--;
+ }
+ }
+ lua_pushlstring(L, url->url->string, len);
+ }
+ }
+ }
+ else {
+ lua_pushnil(L);
+ }
+
+ return 1;
+}
+
+/***
* @method url:get_raw()
* Get full content of the url as it was parsed (e.g. with urldecode)
* @return {string} url string
@@ -773,38 +858,41 @@ lua_url_create(lua_State *L)
}
else {
pool = static_lua_url_pool;
- t = lua_check_text_or_string(L, 2);
+ t = lua_check_text_or_string(L, 1);
}
- if (pool == NULL || t == NULL) {
- return luaL_error(L, "invalid arguments");
+ if (pool == NULL) {
+ return luaL_error(L, "invalid arguments: mempool is expected as the second argument");
}
- else {
- rspamd_url_find_single(pool, t->start, t->len, RSPAMD_URL_FIND_ALL,
- lua_url_single_inserter, L);
- if (lua_type(L, -1) != LUA_TUSERDATA) {
- /* URL is actually not found */
- lua_pushnil(L);
+ if (t == NULL) {
+ return luaL_error(L, "invalid arguments: string/text is expected as the first argument");
+ }
- return 1;
- }
+ rspamd_url_find_single(pool, t->start, t->len, RSPAMD_URL_FIND_ALL,
+ lua_url_single_inserter, L);
- u = (struct rspamd_lua_url *) lua_touserdata(L, -1);
+ if (lua_type(L, -1) != LUA_TUSERDATA) {
+ /* URL is actually not found */
+ lua_pushnil(L);
- if (lua_type(L, 3) == LUA_TTABLE) {
- /* Add flags */
- for (lua_pushnil(L); lua_next(L, 3); lua_pop(L, 1)) {
- int nmask = 0;
- const gchar *fname = lua_tostring(L, -1);
+ return 1;
+ }
- if (rspamd_url_flag_from_string(fname, &nmask)) {
- u->url->flags |= nmask;
- }
- else {
- lua_pop(L, 1);
- return luaL_error(L, "invalid flag: %s", fname);
- }
+ u = (struct rspamd_lua_url *) lua_touserdata(L, -1);
+
+ if (lua_type(L, 3) == LUA_TTABLE) {
+ /* Add flags */
+ for (lua_pushnil(L); lua_next(L, 3); lua_pop(L, 1)) {
+ int nmask = 0;
+ const gchar *fname = lua_tostring(L, -1);
+
+ if (rspamd_url_flag_from_string(fname, &nmask)) {
+ u->url->flags |= nmask;
+ }
+ else {
+ lua_pop(L, 1);
+ return luaL_error(L, "invalid flag: %s", fname);
}
}
}
diff --git a/test/rspamd_cxx_unit_utils.hxx b/test/rspamd_cxx_unit_utils.hxx
index 126253fd6..1dea7af9d 100644
--- a/test/rspamd_cxx_unit_utils.hxx
+++ b/test/rspamd_cxx_unit_utils.hxx
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
#include "libmime/mime_headers.h"
#include "contrib/libottery/ottery.h"
#include "libcryptobox/cryptobox.h"
+#include "libserver/http/http_message.h"
#include <vector>
#include <utility>
@@ -204,6 +205,32 @@ TEST_SUITE("rspamd_utils")
}
}
}
+
+ TEST_CASE("rspamd_http_message_from_url")
+ {
+ std::vector<std::pair<std::string, std::string>> cases{
+ {"http://example.com", "/"},
+ {"http://example.com/", "/"},
+ {"http://example.com/lol", "/lol"},
+ {"http://example.com/lol#keke", "/lol"},
+ {"http://example.com/lol?omg=huh&oh", "/lol?omg=huh&oh"},
+ {"http://example.com/lol?omg=huh&oh#", "/lol?omg=huh&oh"},
+ {"http://example.com/lol?omg=huh&oh#keke", "/lol?omg=huh&oh"},
+ {"http://example.com/lol?", "/lol"},
+ {"http://example.com/lol?#", "/lol"},
+ };
+
+ for (const auto &c: cases) {
+ SUBCASE(("rspamd_http_message_from_url: " + c.first).c_str())
+ {
+ auto *msg = rspamd_http_message_from_url(c.first.c_str());
+ std::size_t nlen;
+ auto *path = rspamd_http_message_get_url(msg, &nlen);
+ CHECK(std::string{path, nlen} == c.second);
+ rspamd_http_message_unref(msg);
+ }
+ }
+ }
}
#endif