summaryrefslogtreecommitdiffstats
path: root/lualib
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-04-22 14:22:52 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-04-22 14:22:52 +0100
commitf4d5f9c4e45952d37d3aa2549f8a8cd133d09181 (patch)
tree97ad34918cbf010b237b2fc903cf79ff9ddd7f4c /lualib
parent12e030093062ff6fbbb31f601d2f690337a9941b (diff)
downloadrspamd-f4d5f9c4e45952d37d3aa2549f8a8cd133d09181.tar.gz
rspamd-f4d5f9c4e45952d37d3aa2549f8a8cd133d09181.zip
[Minor] Lua_magic: Return utf8 check as it is useful for many cases
Diffstat (limited to 'lualib')
-rw-r--r--lualib/lua_magic/heuristics.lua15
1 files changed, 15 insertions, 0 deletions
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index aa8e9e819..66e186906 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -334,6 +334,21 @@ exports.text_part_heuristic = function(part, log_obj, _)
local n8bit = 0
while b >= 127 and n8bit < remain do
+ -- utf8 part
+ if bit.band(b, 0xe0) == 0xc0 and remain > 1 and
+ bit.band(bytes[idx + 1], 0xc0) == 0x80 then
+ return true,1
+ elseif bit.band(b, 0xf0) == 0xe0 and remain > 2 and
+ bit.band(bytes[idx + 1], 0xc0) == 0x80 and
+ bit.band(bytes[idx + 2], 0xc0) == 0x80 then
+ return true,2
+ elseif bit.band(b, 0xf8) == 0xf0 and remain > 3 and
+ bit.band(bytes[idx + 1], 0xc0) == 0x80 and
+ bit.band(bytes[idx + 2], 0xc0) == 0x80 and
+ bit.band(bytes[idx + 3], 0xc0) == 0x80 then
+ return true,3
+ end
+
n8bit = n8bit + 1
idx = idx + 1
b = bytes[idx]