aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-04 13:41:21 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-04 13:41:38 +0100
commit6c8b19b9422f6fb4b776a59617750d53a48da5f1 (patch)
tree1df2c37cb4027e258ef953bda529de7f473041e6
parent6670767e3a55bc9313ac5019e36e510c49ef3709 (diff)
downloadrspamd-6c8b19b9422f6fb4b776a59617750d53a48da5f1.tar.gz
rspamd-6c8b19b9422f6fb4b776a59617750d53a48da5f1.zip
[Feature] Detect URLs with suspicious omographs
-rw-r--r--rules/misc.lua26
-rw-r--r--src/lua/lua_util.c42
2 files changed, 67 insertions, 1 deletions
diff --git a/rules/misc.lua b/rules/misc.lua
index 6a1eec4fc..56de79a6b 100644
--- a/rules/misc.lua
+++ b/rules/misc.lua
@@ -756,4 +756,28 @@ local freemail_reply_neq_from_id = rspamd_config:register_symbol({
score = 3.0
})
rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_REPLYTO')
-rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM') \ No newline at end of file
+rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM')
+
+rspamd_config.OMOGRAPH_URL = {
+ callback = function(task)
+ local urls = task:get_urls()
+
+ if urls then
+ for _,u in ipairs(urls) do
+ local h = u:get_host()
+
+ if h then
+ local non_latin,total = util.count_non_ascii(h)
+
+ if non_latin ~= total and non_latin > 0 then
+ return true, 1.0, h
+ end
+ end
+ end
+ end
+
+ return false
+ end,
+ score = 5.0,
+ description = 'Url contains both latin and non-latin characters'
+}
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index b1bfdce28..7b858f4ce 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -362,6 +362,13 @@ LUA_FUNCTION_DEF (util, zstd_decompress);
* @return {number} normalized number
*/
LUA_FUNCTION_DEF (util, normalize_prob);
+/***
+ * @function util.count_non_ascii(str)
+ * Returns number of non ascii characters in a specified string counting merely alpha
+ * characters. A string can be in non-utf form.
+ * @return {number,number} number of non-ascii alphas and total number of alphas
+ */
+LUA_FUNCTION_DEF (util, count_non_ascii);
/***
* @function util.pack(fmt, ...)
@@ -485,6 +492,7 @@ static const struct luaL_reg utillib_f[] = {
LUA_INTERFACE_DEF (util, zstd_decompress),
LUA_INTERFACE_DEF (util, normalize_prob),
LUA_INTERFACE_DEF (util, caseless_hash),
+ LUA_INTERFACE_DEF (util, count_non_ascii),
LUA_INTERFACE_DEF (util, pack),
LUA_INTERFACE_DEF (util, unpack),
LUA_INTERFACE_DEF (util, packsize),
@@ -1825,6 +1833,40 @@ lua_util_caseless_hash (lua_State *L)
return 1;
}
+static gint
+lua_util_count_non_ascii (lua_State *L)
+{
+ gsize len;
+ const gchar *str = lua_tolstring (L, 1, &len);
+ const gchar *p, *end;
+ gint ret = 0, total = 0;
+
+ if (str != NULL) {
+ end = str + len;
+ p = str;
+
+ while (p < end) {
+ if (*p & 0x80) {
+ ret ++;
+ total ++;
+ }
+ else if (g_ascii_isalpha (*p)) {
+ total ++;
+ }
+
+ p ++;
+ }
+
+ lua_pushnumber (L, ret);
+ lua_pushnumber (L, total);
+ }
+ else {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return 2;
+}
+
/* Backport from Lua 5.3 */
/******************************************************************************