]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Improve OMOGRAPH_URL rule
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 13 Feb 2017 12:17:58 +0000 (12:17 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 13 Feb 2017 12:19:20 +0000 (12:19 +0000)
- Calculate omographs in each dot component separately
- Normalize omographs
- Count utf8 characters properly

rules/misc.lua
src/lua/lua_util.c

index 56de79a6b57be61eb48946b56f07f56705f68873..75f31f39c7a030c80c37410f797adcab6aa2a961 100644 (file)
@@ -767,10 +767,21 @@ rspamd_config.OMOGRAPH_URL = {
         local h = u:get_host()
 
         if h then
-          local non_latin,total = util.count_non_ascii(h)
+          local parts = rspamd_str_split(h, '.')
 
-          if non_latin ~= total and non_latin > 0 then
-            return true, 1.0, h
+          local bad_omographs = 0
+
+          for _,p in ipairs(parts) do
+            local cnlat,ctot = util.count_non_ascii(p)
+
+            if cnlat > 0 and cnlat ~= ctot then
+              bad_omographs = bad_omographs + 1.0 / cnlat
+            end
+          end
+
+          if bad_omographs > 0 then
+            if bad_omographs > 1 then bad_omographs = 1.0 end
+            return true, bad_omographs, h
           end
         end
       end
index 220467664a2faa2fbe3beef289e41e99fc939457..669ac5bd90bf3a715f02fe7a28f89ea8ea2c01b0 100644 (file)
@@ -1891,7 +1891,7 @@ lua_util_count_non_ascii (lua_State *L)
 {
        gsize len;
        const gchar *str = lua_tolstring (L, 1, &len);
-       const gchar *p, *end;
+       const gchar *p, *end, *np;
        gint ret = 0, total = 0;
 
        if (str != NULL) {
@@ -1900,8 +1900,13 @@ lua_util_count_non_ascii (lua_State *L)
 
                while (p < end) {
                        if (*p & 0x80) {
+                               np = g_utf8_find_next_char (p, end);
                                ret ++;
                                total ++;
+
+                               p = (np != p) ? np : p + 1;
+
+                               continue;
                        }
                        else if (g_ascii_isalpha (*p)) {
                                total ++;