From ec8a472f21d83d3bb93a200acdd0d5b257e4e88c Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 21 Jan 2019 12:41:03 +0000 Subject: [PATCH] [Feature] Add dedicated ZW spaces detection for URLs Issue: #2725 --- rules/misc.lua | 20 ++++++++++++++++++-- src/libserver/html.c | 4 ++++ src/libserver/url.h | 1 + src/lua/lua_url.c | 2 ++ 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/rules/misc.lua b/rules/misc.lua index 26f2a1357..05d4af5d5 100644 --- a/rules/misc.lua +++ b/rules/misc.lua @@ -101,25 +101,41 @@ rspamd_config.DATE_IN_PAST = { type = 'mime', } -rspamd_config.R_SUSPICIOUS_URL = { +local obscured_id = rspamd_config:register_symbol{ callback = function(task) local urls = task:get_urls() if urls then for _,u in ipairs(urls) do - if u:is_obscured() then + local fl = u:get_flags() + if fl.obscured then task:insert_result('R_SUSPICIOUS_URL', 1.0, u:get_host()) end + if fl.zw_spaces then + task:insert_result('ZERO_WIDTH_SPACE_URL', 1.0, u:get_host()) + end end end + return false end, + name = 'R_SUSPICIOUS_URL', score = 5.0, one_shot = true, description = 'Obfusicated or suspicious URL has been found in a message', group = 'url' } +rspamd_config:register_symbol{ + type = 'virtual', + name = 'ZERO_WIDTH_SPACE_URL', + score = 7.0, + one_shot = true, + description = 'Zero width space in url', + group = 'url', + parent = obscured_id, +} + rspamd_config.ENVFROM_PRVS = { callback = function (task) diff --git a/src/libserver/html.c b/src/libserver/html.c index e97a010fe..afaeae4c5 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1351,6 +1351,10 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, if (norm_res & (RSPAMD_UNICODE_NORM_ZERO_SPACES|RSPAMD_UNICODE_NORM_ERROR)) { saved_flags |= RSPAMD_URL_FLAG_OBSCURED; + + if (norm_res & RSPAMD_UNICODE_NORM_ZERO_SPACES) { + saved_flags |= RSPAMD_URL_FLAG_ZW_SPACES; + } } rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF); diff --git a/src/libserver/url.h b/src/libserver/url.h index a9eda71de..12a649ec7 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -27,6 +27,7 @@ enum rspamd_url_flags { RSPAMD_URL_FLAG_HAS_USER = 1 << 14, RSPAMD_URL_FLAG_SCHEMALESS = 1 << 15, RSPAMD_URL_FLAG_UNNORMALISED = 1 << 16, + RSPAMD_URL_FLAG_ZW_SPACES = 1 << 17, }; struct rspamd_url_tag { diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 8b18c7c3d..8bc0cf657 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -875,6 +875,7 @@ lua_url_all (lua_State *L) * - `has_user`: URL has user part * - `schemaless`: URL has no schema * - `unnormalised`: URL has some unicode unnormalities + * - `zw_spaces`: URL has some zero width spaces * @return {table} URL flags */ #define PUSH_FLAG(fl, name) do { \ @@ -914,6 +915,7 @@ lua_url_get_flags (lua_State *L) PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user"); PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless"); PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised"); + PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces"); } else { return luaL_error (L, "invalid arguments"); -- 2.39.5