diff options
-rw-r--r-- | src/libserver/url.c | 79 | ||||
-rw-r--r-- | src/libserver/url.h | 16 | ||||
-rw-r--r-- | src/lua/lua_url.c | 66 |
3 files changed, 139 insertions, 22 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index db89073f5..195727c13 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -214,6 +214,35 @@ struct url_matcher static_matchers[] = { 0} }; +struct rspamd_url_flag_name { + const gchar *name; + gint flag; + gint hash; +} url_flag_names[] = { + {"phished", RSPAMD_URL_FLAG_PHISHED, -1}, + {"numeric", RSPAMD_URL_FLAG_NUMERIC, -1}, + {"obscured", RSPAMD_URL_FLAG_OBSCURED, -1}, + {"redirected", RSPAMD_URL_FLAG_REDIRECTED, -1}, + {"html_displayed", RSPAMD_URL_FLAG_HTML_DISPLAYED, -1}, + {"text", RSPAMD_URL_FLAG_FROM_TEXT, -1}, + {"subject", RSPAMD_URL_FLAG_SUBJECT, -1}, + {"host_encoded", RSPAMD_URL_FLAG_HOSTENCODED, -1}, + {"schema_encoded", RSPAMD_URL_FLAG_SCHEMAENCODED, -1}, + {"path_encoded", RSPAMD_URL_FLAG_PATHENCODED, -1}, + {"query_encoded", RSPAMD_URL_FLAG_QUERYENCODED, -1}, + {"missing_slahes", RSPAMD_URL_FLAG_MISSINGSLASHES, -1}, + {"idn", RSPAMD_URL_FLAG_IDN, -1}, + {"has_port", RSPAMD_URL_FLAG_HAS_PORT, -1}, + {"has_user", RSPAMD_URL_FLAG_HAS_USER, -1}, + {"schemaless", RSPAMD_URL_FLAG_SCHEMALESS, -1}, + {"unnormalised", RSPAMD_URL_FLAG_UNNORMALISED, -1}, + {"zw_spaces", RSPAMD_URL_FLAG_ZW_SPACES, -1}, + {"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1}, + {"image", RSPAMD_URL_FLAG_IMAGE, -1}, + {"query", RSPAMD_URL_FLAG_QUERY, -1}, + {"content", RSPAMD_URL_FLAG_CONTENT, -1} +}; + static inline khint_t rspamd_url_hash (struct rspamd_url *u); @@ -610,6 +639,26 @@ rspamd_url_init (const gchar *tld_file) url_scanner->matchers_strict->len); } } + + /* Generate hashes for flags */ + for (gint i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) { + url_flag_names[i].hash = + rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT, + url_flag_names[i].name, + strlen (url_flag_names[i].name), 0); + } + /* Ensure that we have no hashes collisions O(N^2) but this array is small */ + for (gint i = 0; i < G_N_ELEMENTS (url_flag_names) - 1; i ++) { + for (gint j = i + 1; j < G_N_ELEMENTS (url_flag_names); j ++) { + if (url_flag_names[i].hash == url_flag_names[j].hash) { + msg_err ("collision: both %s and %s map to %d", + url_flag_names[i].name, url_flag_names[j].name, + url_flag_names[i].hash); + abort (); + } + } + } + } #define SET_U(u, field) do { \ @@ -3991,3 +4040,33 @@ rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url return false; } + +bool +rspamd_url_flag_from_string (const gchar *str, gint *flag) +{ + gint h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT, + str, strlen (str), 0); + + for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) { + if (url_flag_names[i].hash == h) { + *flag |= url_flag_names[i].flag; + + return true; + } + } + + return false; +} + + +const gchar * +rspamd_url_flag_to_string (int flag) +{ + for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) { + if (url_flag_names[i].flag & flag) { + return url_flag_names[i].name; + } + } + + return NULL; +} diff --git a/src/libserver/url.h b/src/libserver/url.h index bb9c57399..2a5892fc5 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -36,6 +36,7 @@ enum rspamd_url_flags { RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u, RSPAMD_URL_FLAG_IMAGE = 1u << 19u, RSPAMD_URL_FLAG_QUERY = 1u << 20u, + RSPAMD_URL_FLAG_CONTENT = 1u << 21u, }; struct rspamd_url_tag { @@ -268,6 +269,21 @@ const gchar *rspamd_url_protocol_name (enum rspamd_url_protocol proto); */ enum rspamd_url_protocol rspamd_url_protocol_from_string (const gchar *str); +/** + * Converts string to a url flag + * @param str + * @param flag + * @return + */ +bool rspamd_url_flag_from_string (const gchar *str, gint *flag); + +/** + * Converts url flag to a string + * @param flag + * @return + */ +const gchar * rspamd_url_flag_to_string (int flag); + /* Defines sets of urls indexed by url as is */ KHASH_DECLARE (rspamd_url_hash, struct rspamd_url *, char); KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char); diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 6540919ea..94cb51dbd 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -728,6 +728,7 @@ lua_url_create (lua_State *L) const gchar *text; size_t length; gboolean own_pool = FALSE; + struct rspamd_lua_url *u; if (lua_type (L, 1) == LUA_TUSERDATA) { pool = rspamd_lua_check_mempool (L, 1); @@ -753,6 +754,26 @@ lua_url_create (lua_State *L) if (lua_type (L, -1) != LUA_TUSERDATA) { /* URL is actually not found */ lua_pushnil (L); + + return 1; + } + + u = (struct rspamd_lua_url *)lua_touserdata (L, -1); + + if (lua_type (L, 3) == LUA_TTABLE) { + /* Add flags */ + for (lua_pushnil (L); lua_next (L, 3); lua_pop (L, 1)) { + int nmask = 0; + const gchar *fname = lua_tostring (L, -1); + + if (rspamd_url_flag_from_string (fname, &nmask)) { + u->url->flags |= nmask; + } + else { + lua_pop (L, 1); + return luaL_error (L, "invalid flag: %s", fname); + } + } } } @@ -854,9 +875,9 @@ lua_url_all (lua_State *L) * - `image`: URL is from src attribute of img HTML tag * @return {table} URL flags */ -#define PUSH_FLAG(fl, name) do { \ +#define PUSH_FLAG(fl) do { \ if (flags & (fl)) { \ - lua_pushstring (L, (name)); \ + lua_pushstring (L, rspamd_url_flag_to_string (fl)); \ lua_pushboolean (L, true); \ lua_settable (L, -3); \ } \ @@ -874,26 +895,27 @@ lua_url_get_flags (lua_State *L) lua_createtable (L, 0, 4); - PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished"); - PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric"); - PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured"); - PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected"); - PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed"); - PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text"); - PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject"); - PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded"); - PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded"); - PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded"); - PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded"); - PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes"); - PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn"); - PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port"); - PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user"); - PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless"); - PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised"); - PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces"); - PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed"); - PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image"); + PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED); + PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC); + PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED); + PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED); + PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED); + PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT); + PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT); + PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED); + PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED); + PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED); + PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED); + PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES); + PUSH_FLAG (RSPAMD_URL_FLAG_IDN); + PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT); + PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER); + PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS); + PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED); + PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES); + PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL); + PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE); + PUSH_FLAG (RSPAMD_URL_FLAG_CONTENT); } else { return luaL_error (L, "invalid arguments"); |