From 52d1dfb9acf71bd962cb3fe836818efb09dc1cc3 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 12 Feb 2024 14:59:52 +0000 Subject: [PATCH] [Feature] Lua_url: Add `to_http` method --- src/lua/lua_url.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 913469f6d..20f0a7121 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -1,5 +1,5 @@ /* - * Copyright 2023 Vsevolod Stakhov + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,6 +74,7 @@ LUA_FUNCTION_DEF(url, lt); LUA_FUNCTION_DEF(url, eq); LUA_FUNCTION_DEF(url, get_order); LUA_FUNCTION_DEF(url, get_part_order); +LUA_FUNCTION_DEF(url, to_http); static const struct luaL_reg urllib_m[] = { LUA_INTERFACE_DEF(url, get_length), @@ -101,6 +102,7 @@ static const struct luaL_reg urllib_m[] = { LUA_INTERFACE_DEF(url, get_flags_num), LUA_INTERFACE_DEF(url, get_order), LUA_INTERFACE_DEF(url, get_part_order), + LUA_INTERFACE_DEF(url, to_http), {"get_redirected", lua_url_get_phished}, LUA_INTERFACE_DEF(url, set_redirected), {"__tostring", lua_url_tostring}, @@ -343,6 +345,89 @@ lua_url_tostring(lua_State *L) return 1; } +/*** + * @method url:to_http() + * Get URL suitable for HTTP request (e.g. by trimming fragment and user parts) + * @return {string} url as a string + */ +static gint +lua_url_to_http(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_url *url = lua_check_url(L, 1); + + if (url != NULL && url->url != NULL) { + if (url->url->protocol == PROTOCOL_MAILTO) { + /* Nothing to do here */ + lua_pushnil(L); + } + else { + + if (url->url->userlen > 0) { + /* We need to reconstruct url :( */ + gsize len = url->url->urllen - url->url->fragmentlen + 1; + + /* Strip the # character */ + if (url->url->fragmentlen > 0 && len > 0) { + while (url->url->string[len - 1] == '#' && len > 0) { + len--; + } + } + gchar *nstr = g_malloc(len); + gchar *d = nstr, *end = nstr + len; + memcpy(nstr, url->url->string, url->url->protocollen); + d += url->url->protocollen; + *d++ = ':'; + *d++ = '/'; + *d++ = '/'; + + /* Host part */ + memcpy(d, rspamd_url_host(url->url), url->url->hostlen); + d += url->url->hostlen; + + int port = rspamd_url_get_port_if_special(url->url); + + if (port > 0) { + d += rspamd_snprintf(d, end - d, ":%d/", port); + } + else { + *d++ = '/'; + } + + if (url->url->datalen > 0) { + memcpy(d, rspamd_url_data_unsafe(url->url), url->url->datalen); + d += url->url->datalen; + } + + if (url->url->querylen > 0) { + *d++ = '?'; + memcpy(d, rspamd_url_query_unsafe(url->url), url->url->querylen); + d += url->url->querylen; + } + + g_assert(d < end); + lua_pushlstring(L, nstr, d - nstr); + } + else { + gsize len = url->url->urllen - url->url->fragmentlen; + + /* Strip the # character */ + if (url->url->fragmentlen > 0 && len > 0) { + while (url->url->string[len - 1] == '#' && len > 0) { + len--; + } + } + lua_pushlstring(L, url->url->string, len); + } + } + } + else { + lua_pushnil(L); + } + + return 1; +} + /*** * @method url:get_raw() * Get full content of the url as it was parsed (e.g. with urldecode) -- 2.39.5