diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-03-09 14:18:06 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-09 14:18:06 +0000 |
commit | fff6f70c33bbf080301230cbecb6aa00bbb6c5b3 (patch) | |
tree | 9b02bf0adba221b8ea6aff79fb982081b10286de /src | |
parent | f3277faf934a449ca680ba4ce4c3b98906b22af0 (diff) | |
parent | 33e60cf84e4c87149e6d00bb12e6744d28e702e9 (diff) | |
download | rspamd-fff6f70c33bbf080301230cbecb6aa00bbb6c5b3.tar.gz rspamd-fff6f70c33bbf080301230cbecb6aa00bbb6c5b3.zip |
Merge pull request #2771 from miecio45/feat_url_visible_part
[Minor] Export url visible part to lua and add new url flag
Diffstat (limited to 'src')
-rw-r--r-- | src/libserver/html.c | 26 | ||||
-rw-r--r-- | src/libserver/url.h | 3 | ||||
-rw-r--r-- | src/lua/lua_url.c | 25 |
3 files changed, 54 insertions, 0 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index 63638d28b..c831dc14e 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -2377,11 +2377,37 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool, return; } + url->visible_part = rspamd_mempool_alloc0(pool, dest->len - href_offset+1); + gchar *current_processed_char = dest->data + href_offset; + gchar *current_char_in_struct = url->visible_part; + gboolean previous_char_was_space = false; + + while (current_processed_char < (gchar*) dest->data + dest->len) { + if (g_ascii_isspace(*current_processed_char)) { + if (previous_char_was_space) { + current_processed_char++; + continue; + } + previous_char_was_space = true; + *current_char_in_struct = ' '; + } else { + *current_char_in_struct = *current_processed_char; + previous_char_was_space = false; + } + current_char_in_struct++; + current_processed_char++; + } + *current_char_in_struct = '\0'; + url->visible_partlen = current_char_in_struct - url->visible_part; + rspamd_html_url_is_phished (pool, url, dest->data + href_offset, dest->len - href_offset, &url_found, &displayed_url); + if (url_found) { + url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL; + } if (exceptions && url_found) { ex = rspamd_mempool_alloc (pool, sizeof (*ex)); diff --git a/src/libserver/url.h b/src/libserver/url.h index 3deeb8cf5..ad09c33ec 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -28,6 +28,7 @@ enum rspamd_url_flags { RSPAMD_URL_FLAG_SCHEMALESS = 1 << 15, RSPAMD_URL_FLAG_UNNORMALISED = 1 << 16, RSPAMD_URL_FLAG_ZW_SPACES = 1 << 17, + RSPAMD_URL_FLAG_DISPLAY_URL = 1 << 18, }; struct rspamd_url_tag { @@ -48,6 +49,7 @@ struct rspamd_url { gchar *fragment; gchar *surbl; gchar *tld; + gchar *visible_part; struct rspamd_url *phished_url; @@ -61,6 +63,7 @@ struct rspamd_url { guint tldlen; guint urllen; guint rawlen; + guint visible_partlen; enum rspamd_url_flags flags; guint count; diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 0a301e96d..58c6a83be 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -63,6 +63,7 @@ LUA_FUNCTION_DEF (url, get_tag); LUA_FUNCTION_DEF (url, get_count); LUA_FUNCTION_DEF (url, get_tags); LUA_FUNCTION_DEF (url, add_tag); +LUA_FUNCTION_DEF (url, get_visible); LUA_FUNCTION_DEF (url, create); LUA_FUNCTION_DEF (url, init); LUA_FUNCTION_DEF (url, all); @@ -89,6 +90,7 @@ static const struct luaL_reg urllib_m[] = { LUA_INTERFACE_DEF (url, get_tag), LUA_INTERFACE_DEF (url, get_tags), LUA_INTERFACE_DEF (url, add_tag), + LUA_INTERFACE_DEF (url, get_visible), LUA_INTERFACE_DEF (url, get_count), LUA_INTERFACE_DEF (url, get_flags), {"get_redirected", lua_url_get_phished}, @@ -650,6 +652,27 @@ lua_url_get_count (lua_State *L) return 1; } + /*** +* @method url:get_visible() +* Get visible part of the url with html tags stripped +* @return {string} url string +*/ +static gint +lua_url_get_visible (lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_url *url = lua_check_url (L, 1); + + if (url != NULL) { + lua_pushlstring (L, url->url->visible_part, url->url->visible_partlen); + } + else { + lua_pushnil (L); + } + +return 1; +} + /*** * @method url:to_table() * Return url as a table with the following fields: @@ -878,6 +901,7 @@ lua_url_all (lua_State *L) * - `schemaless`: URL has no schema * - `unnormalised`: URL has some unicode unnormalities * - `zw_spaces`: URL has some zero width spaces + * - `url_displayed`: URL has some other url-like string in visible part * @return {table} URL flags */ #define PUSH_FLAG(fl, name) do { \ @@ -918,6 +942,7 @@ lua_url_get_flags (lua_State *L) PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless"); PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised"); PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces"); + PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed"); } else { return luaL_error (L, "invalid arguments"); |