]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Detect URLs with suspicious omographs
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 4 Feb 2017 12:41:21 +0000 (13:41 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 4 Feb 2017 12:41:38 +0000 (13:41 +0100)
rules/misc.lua
src/lua/lua_util.c

index 6a1eec4fc769f9d806cb4c42177c6b58ebe304e0..56de79a6b57be61eb48946b56f07f56705f68873 100644 (file)
@@ -756,4 +756,28 @@ local freemail_reply_neq_from_id = rspamd_config:register_symbol({
   score = 3.0
 })
 rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_REPLYTO')
-rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM')
\ No newline at end of file
+rspamd_config:register_dependency(freemail_reply_neq_from_id, 'FREEMAIL_FROM')
+
+rspamd_config.OMOGRAPH_URL = {
+  callback = function(task)
+    local urls = task:get_urls()
+
+    if urls then
+      for _,u in ipairs(urls) do
+        local h = u:get_host()
+
+        if h then
+          local non_latin,total = util.count_non_ascii(h)
+
+          if non_latin ~= total and non_latin > 0 then
+            return true, 1.0, h
+          end
+        end
+      end
+    end
+
+    return false
+  end,
+  score = 5.0,
+  description = 'Url contains both latin and non-latin characters'
+}
index b1bfdce2833535af2e39c25a714e2b0f5b773bcf..7b858f4ce76d33d03a7d395944ea574003e89404 100644 (file)
@@ -362,6 +362,13 @@ LUA_FUNCTION_DEF (util, zstd_decompress);
  * @return {number} normalized number
  */
 LUA_FUNCTION_DEF (util, normalize_prob);
+/***
+ * @function util.count_non_ascii(str)
+ * Returns number of non ascii characters in a specified string counting merely alpha
+ * characters. A string can be in non-utf form.
+ * @return {number,number} number of non-ascii alphas and total number of alphas
+ */
+LUA_FUNCTION_DEF (util, count_non_ascii);
 
 /***
  * @function util.pack(fmt, ...)
@@ -485,6 +492,7 @@ static const struct luaL_reg utillib_f[] = {
        LUA_INTERFACE_DEF (util, zstd_decompress),
        LUA_INTERFACE_DEF (util, normalize_prob),
        LUA_INTERFACE_DEF (util, caseless_hash),
+       LUA_INTERFACE_DEF (util, count_non_ascii),
        LUA_INTERFACE_DEF (util, pack),
        LUA_INTERFACE_DEF (util, unpack),
        LUA_INTERFACE_DEF (util, packsize),
@@ -1825,6 +1833,40 @@ lua_util_caseless_hash (lua_State *L)
        return 1;
 }
 
+static gint
+lua_util_count_non_ascii (lua_State *L)
+{
+       gsize len;
+       const gchar *str = lua_tolstring (L, 1, &len);
+       const gchar *p, *end;
+       gint ret = 0, total = 0;
+
+       if (str != NULL) {
+               end = str + len;
+               p = str;
+
+               while (p < end) {
+                       if (*p & 0x80) {
+                               ret ++;
+                               total ++;
+                       }
+                       else if (g_ascii_isalpha (*p)) {
+                               total ++;
+                       }
+
+                       p ++;
+               }
+
+               lua_pushnumber (L, ret);
+               lua_pushnumber (L, total);
+       }
+       else {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       return 2;
+}
+
 /* Backport from Lua 5.3 */
 
 /******************************************************************************