]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Lua_magic: Return utf8 check as it is useful for many cases
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 22 Apr 2021 13:22:52 +0000 (14:22 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 22 Apr 2021 13:22:52 +0000 (14:22 +0100)
lualib/lua_magic/heuristics.lua

index aa8e9e81903e7ced081e30cd1a4a6025ada66217..66e186906daf3411083daa298a2e79fe7c288d8a 100644 (file)
@@ -334,6 +334,21 @@ exports.text_part_heuristic = function(part, log_obj, _)
       local n8bit = 0
 
       while b >= 127 and n8bit < remain do
+        -- utf8 part
+        if bit.band(b, 0xe0) == 0xc0 and remain > 1 and
+                bit.band(bytes[idx + 1], 0xc0) == 0x80 then
+          return true,1
+        elseif bit.band(b, 0xf0) == 0xe0 and remain > 2 and
+                bit.band(bytes[idx + 1], 0xc0) == 0x80 and
+                bit.band(bytes[idx + 2], 0xc0) == 0x80 then
+          return true,2
+        elseif bit.band(b, 0xf8) == 0xf0 and remain > 3 and
+                bit.band(bytes[idx + 1], 0xc0) == 0x80 and
+                bit.band(bytes[idx + 2], 0xc0) == 0x80 and
+                bit.band(bytes[idx + 3], 0xc0) == 0x80 then
+          return true,3
+        end
+
         n8bit = n8bit + 1
         idx = idx + 1
         b = bytes[idx]