From 3ec03548e2c72b69448bdadcc67bc008dddd43d5 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 12 Aug 2017 18:04:13 +0100 Subject: [PATCH] [Minor] Store raw urls --- src/libserver/url.c | 5 ++++- src/libserver/url.h | 2 ++ src/libutil/http.c | 1 + src/lua/lua_http.c | 3 +++ src/lua/lua_url.c | 22 ++++++++++++++++++++++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/libserver/url.c b/src/libserver/url.c index 0e28cf2d8..c908685bb 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1592,7 +1592,10 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, uri->flags |= RSPAMD_URL_FLAG_OBSCURED; } - uri->string = p; + uri->raw = p; + uri->rawlen = len; + uri->string = rspamd_mempool_alloc (pool, len + 1); + rspamd_strlcpy (uri->string, p, len + 1); uri->urllen = len; /* Now decode url symbols */ diff --git a/src/libserver/url.h b/src/libserver/url.h index e4834d9bc..48f5acc12 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -25,6 +25,7 @@ struct rspamd_url_tag { }; struct rspamd_url { + gchar *raw; gchar *string; gint protocol; guint port; @@ -48,6 +49,7 @@ struct rspamd_url { guint surbllen; guint tldlen; guint urllen; + guint rawlen; enum rspamd_url_flags flags; guint count; diff --git a/src/libutil/http.c b/src/libutil/http.c index 80eff63d3..7f69843ea 100644 --- a/src/libutil/http.c +++ b/src/libutil/http.c @@ -2385,6 +2385,7 @@ rspamd_http_message_from_url (const gchar *url) urllen = strlen (url); memset (&pu, 0, sizeof (pu)); + if (http_parser_parse_url (url, urllen, FALSE, &pu) != 0) { msg_warn ("cannot parse URL: %s", url); return NULL; diff --git a/src/lua/lua_http.c b/src/lua/lua_http.c index 99a967db7..ad2a7d981 100644 --- a/src/lua/lua_http.c +++ b/src/lua/lua_http.c @@ -371,13 +371,16 @@ lua_http_request (lua_State *L) if (lua_gettop (L) >= 2) { /* url, callback and event_base format */ url = luaL_checkstring (L, 1); + if (url == NULL || lua_type (L, 2) != LUA_TFUNCTION) { msg_err ("http request has bad params"); lua_pushboolean (L, FALSE); return 1; } + lua_pushvalue (L, 2); cbref = luaL_ref (L, LUA_REGISTRYINDEX); + if (lua_gettop (L) >= 3 && rspamd_lua_check_udata_maybe (L, 3, "rspamd{ev_base}")) { ev_base = *(struct event_base **)lua_touserdata (L, 3); } diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 15966bfa3..cbe9148a3 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -47,6 +47,7 @@ LUA_FUNCTION_DEF (url, get_path); LUA_FUNCTION_DEF (url, get_query); LUA_FUNCTION_DEF (url, get_fragment); LUA_FUNCTION_DEF (url, get_text); +LUA_FUNCTION_DEF (url, get_raw); LUA_FUNCTION_DEF (url, get_tld); LUA_FUNCTION_DEF (url, to_table); LUA_FUNCTION_DEF (url, is_phished); @@ -73,6 +74,7 @@ static const struct luaL_reg urllib_m[] = { LUA_INTERFACE_DEF (url, get_fragment), LUA_INTERFACE_DEF (url, get_text), LUA_INTERFACE_DEF (url, get_tld), + LUA_INTERFACE_DEF (url, get_raw), LUA_INTERFACE_DEF (url, to_table), LUA_INTERFACE_DEF (url, is_phished), LUA_INTERFACE_DEF (url, is_redirected), @@ -262,6 +264,26 @@ lua_url_get_text (lua_State *L) return 1; } +/*** + * @method url:get_raw() + * Get full content of the url as it was parsed (e.g. with urldecode) + * @return {string} url string + */ +static gint +lua_url_get_raw (lua_State *L) +{ + struct rspamd_lua_url *url = lua_check_url (L, 1); + + if (url != NULL) { + lua_pushlstring (L, url->url->raw, url->url->rawlen); + } + else { + lua_pushnil (L); + } + + return 1; +} + /*** * @method url:is_phished() * Check whether URL is treated as phished -- 2.39.5