aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-03-09 14:18:06 +0000
committerGitHub <noreply@github.com>2019-03-09 14:18:06 +0000
commitfff6f70c33bbf080301230cbecb6aa00bbb6c5b3 (patch)
tree9b02bf0adba221b8ea6aff79fb982081b10286de /src
parentf3277faf934a449ca680ba4ce4c3b98906b22af0 (diff)
parent33e60cf84e4c87149e6d00bb12e6744d28e702e9 (diff)
downloadrspamd-fff6f70c33bbf080301230cbecb6aa00bbb6c5b3.tar.gz
rspamd-fff6f70c33bbf080301230cbecb6aa00bbb6c5b3.zip
Merge pull request #2771 from miecio45/feat_url_visible_part
[Minor] Export url visible part to lua and add new url flag
Diffstat (limited to 'src')
-rw-r--r--src/libserver/html.c26
-rw-r--r--src/libserver/url.h3
-rw-r--r--src/lua/lua_url.c25
3 files changed, 54 insertions, 0 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 63638d28b..c831dc14e 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -2377,11 +2377,37 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
return;
}
+ url->visible_part = rspamd_mempool_alloc0(pool, dest->len - href_offset+1);
+ gchar *current_processed_char = dest->data + href_offset;
+ gchar *current_char_in_struct = url->visible_part;
+ gboolean previous_char_was_space = false;
+
+ while (current_processed_char < (gchar*) dest->data + dest->len) {
+ if (g_ascii_isspace(*current_processed_char)) {
+ if (previous_char_was_space) {
+ current_processed_char++;
+ continue;
+ }
+ previous_char_was_space = true;
+ *current_char_in_struct = ' ';
+ } else {
+ *current_char_in_struct = *current_processed_char;
+ previous_char_was_space = false;
+ }
+ current_char_in_struct++;
+ current_processed_char++;
+ }
+ *current_char_in_struct = '\0';
+ url->visible_partlen = current_char_in_struct - url->visible_part;
+
rspamd_html_url_is_phished (pool, url,
dest->data + href_offset,
dest->len - href_offset,
&url_found, &displayed_url);
+ if (url_found) {
+ url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL;
+ }
if (exceptions && url_found) {
ex = rspamd_mempool_alloc (pool,
sizeof (*ex));
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 3deeb8cf5..ad09c33ec 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -28,6 +28,7 @@ enum rspamd_url_flags {
RSPAMD_URL_FLAG_SCHEMALESS = 1 << 15,
RSPAMD_URL_FLAG_UNNORMALISED = 1 << 16,
RSPAMD_URL_FLAG_ZW_SPACES = 1 << 17,
+ RSPAMD_URL_FLAG_DISPLAY_URL = 1 << 18,
};
struct rspamd_url_tag {
@@ -48,6 +49,7 @@ struct rspamd_url {
gchar *fragment;
gchar *surbl;
gchar *tld;
+ gchar *visible_part;
struct rspamd_url *phished_url;
@@ -61,6 +63,7 @@ struct rspamd_url {
guint tldlen;
guint urllen;
guint rawlen;
+ guint visible_partlen;
enum rspamd_url_flags flags;
guint count;
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 0a301e96d..58c6a83be 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -63,6 +63,7 @@ LUA_FUNCTION_DEF (url, get_tag);
LUA_FUNCTION_DEF (url, get_count);
LUA_FUNCTION_DEF (url, get_tags);
LUA_FUNCTION_DEF (url, add_tag);
+LUA_FUNCTION_DEF (url, get_visible);
LUA_FUNCTION_DEF (url, create);
LUA_FUNCTION_DEF (url, init);
LUA_FUNCTION_DEF (url, all);
@@ -89,6 +90,7 @@ static const struct luaL_reg urllib_m[] = {
LUA_INTERFACE_DEF (url, get_tag),
LUA_INTERFACE_DEF (url, get_tags),
LUA_INTERFACE_DEF (url, add_tag),
+ LUA_INTERFACE_DEF (url, get_visible),
LUA_INTERFACE_DEF (url, get_count),
LUA_INTERFACE_DEF (url, get_flags),
{"get_redirected", lua_url_get_phished},
@@ -650,6 +652,27 @@ lua_url_get_count (lua_State *L)
return 1;
}
+ /***
+* @method url:get_visible()
+* Get visible part of the url with html tags stripped
+* @return {string} url string
+*/
+static gint
+lua_url_get_visible (lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_url *url = lua_check_url (L, 1);
+
+ if (url != NULL) {
+ lua_pushlstring (L, url->url->visible_part, url->url->visible_partlen);
+ }
+ else {
+ lua_pushnil (L);
+ }
+
+return 1;
+}
+
/***
* @method url:to_table()
* Return url as a table with the following fields:
@@ -878,6 +901,7 @@ lua_url_all (lua_State *L)
* - `schemaless`: URL has no schema
* - `unnormalised`: URL has some unicode unnormalities
* - `zw_spaces`: URL has some zero width spaces
+ * - `url_displayed`: URL has some other url-like string in visible part
* @return {table} URL flags
*/
#define PUSH_FLAG(fl, name) do { \
@@ -918,6 +942,7 @@ lua_url_get_flags (lua_State *L)
PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
+ PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
}
else {
return luaL_error (L, "invalid arguments");