]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Lua_magic: Add ical and vcard support
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 21 Nov 2019 13:59:50 +0000 (13:59 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 21 Nov 2019 13:59:50 +0000 (13:59 +0000)
lualib/lua_magic/heuristics.lua
lualib/lua_magic/types.lua

index 3da6a84ab30ded9ec4fd42dedcedde57f273d918..d9d4081702e1d307a5705dda1b73ebf22fa25f55 100644 (file)
@@ -61,18 +61,24 @@ local zip_patterns = {
 local txt_trie
 local txt_patterns = {
   html = {
-    [[(?i)\s*<html]],
-    [[(?i)\s*<\!DOCTYPE HTML]],
-    [[(?i)\s*<xml]],
-    [[(?i)\s*<body]],
-    [[(?i)\s*<table]],
-    [[(?i)\s*<a]],
-    [[(?i)\s*<p]],
-    [[(?i)\s*<div]],
-    [[(?i)\s*<span]],
+    {[[(?i)\s*<html]], 30},
+    {[[(?i)\s*<\!DOCTYPE HTML]], 30},
+    {[[(?i)\s*<xml]], 20},
+    {[[(?i)\s*<body]], 20},
+    {[[(?i)\s*<table]], 20},
+    {[[(?i)\s*<a]], 10},
+    {[[(?i)\s*<p]], 10},
+    {[[(?i)\s*<div]], 10},
+    {[[(?i)\s*<span]], 10},
   },
   csv = {
-    [[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]]
+    {[[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]], 20}
+  },
+  ics = {
+    {[[^BEGIN:VCALENDAR\r?\n]], 40},
+  },
+  vcf = {
+    {[[^BEGIN:VCARD\r?\n]], 40},
   },
 }
 
@@ -95,7 +101,7 @@ local function compile_tries()
       for _,pat in ipairs(pats) do
         -- These are utf16 strings in fact...
         strs[#strs + 1] = transform_func(pat)
-        indexes[#indexes + 1] = ext
+        indexes[#indexes + 1] = {ext, pat}
       end
     end
 
@@ -131,7 +137,7 @@ local function compile_tries()
         function(pat) return pat end)
     -- Text patterns at the initial fragment
     txt_trie = compile_pats(txt_patterns, txt_patterns_indexes,
-        function(pat) return pat end,
+        function(pat_tbl) return pat_tbl[1] end,
         bit.bor(rspamd_trie.flags.re,
             rspamd_trie.flags.dot_all,
             rspamd_trie.flags.no_start))
@@ -184,8 +190,8 @@ local function detect_ole_format(input, log_obj)
         for n,_ in pairs(matches) do
           if msoffice_clsid_indexes[n] then
             lua_util.debugm(N, log_obj, "found valid clsid for %s",
-                msoffice_clsid_indexes[n])
-            return true,msoffice_clsid_indexes[n]
+                msoffice_clsid_indexes[n][1])
+            return true,msoffice_clsid_indexes[n][1]
           end
         end
       end
@@ -195,7 +201,7 @@ local function detect_ole_format(input, log_obj)
       if matches then
         for n,_ in pairs(matches) do
           if msoffice_patterns_indexes[n] then
-            return true,msoffice_patterns_indexes[n]
+            return true,msoffice_patterns_indexes[n][1]
           end
         end
       end
@@ -295,8 +301,8 @@ local function detect_archive_flaw(part, arch, log_obj)
         for n,_ in pairs(matches) do
           if zip_patterns_indexes[n] then
             lua_util.debugm(N, log_obj, "found zip pattern for %s",
-                zip_patterns_indexes[n])
-            return zip_patterns_indexes[n],40
+                zip_patterns_indexes[n][1])
+            return zip_patterns_indexes[n][1],40
           end
         end
       end
@@ -392,11 +398,11 @@ exports.text_part_heuristic = function(part, log_obj)
       if matches then
         -- Require at least 2 occurrences of those patterns
         for n,positions in pairs(matches) do
-          local ext = txt_patterns_indexes[n]
+          local ext,weight = txt_patterns_indexes[n][1], txt_patterns_indexes[n][2][2]
           if ext then
-            res[ext] = (res[ext] or 0) + 20 * #positions
+            res[ext] = (res[ext] or 0) + weight * #positions
             lua_util.debugm(N, log_obj, "found txt pattern for %s: %s, total: %s",
-                ext, #positions, res[ext])
+                ext, weight * #positions, res[ext])
           end
         end
 
index 2ee3e62d79ae925273cd489081d088702f9d4696..d15eec6e1d96da93305e8b45ff783d5a1f835297 100644 (file)
@@ -282,6 +282,16 @@ local types = {
     ct = 'text/csv',
     av_check = false,
   },
+  ics = {
+    type = 'text',
+    ct = 'text/calendar',
+    av_check = false,
+  },
+  vcf = {
+    type = 'text',
+    ct = 'text/vcard',
+    av_check = false,
+  },
   eml = {
     type = 'message',
     ct = 'message/rfc822',