2019-09-05 16:29:35 +02:00
|
|
|
--[[
|
2022-03-25 21:16:35 +01:00
|
|
|
Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
|
2019-09-05 16:29:35 +02:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
]]--
|
|
|
|
|
|
|
|
--[[[
|
|
|
|
-- @module lua_magic/patterns
|
|
|
|
-- This module contains most common patterns
|
|
|
|
--]]
|
|
|
|
|
2019-09-07 16:11:21 +02:00
|
|
|
local heuristics = require "lua_magic/heuristics"
|
|
|
|
|
2019-09-05 16:29:35 +02:00
|
|
|
local patterns = {
|
2019-09-06 15:06:14 +02:00
|
|
|
pdf = {
|
2019-09-05 16:29:35 +02:00
|
|
|
-- These are alternatives
|
|
|
|
matches = {
|
|
|
|
{
|
2020-05-11 18:03:00 +02:00
|
|
|
string = [[%PDF-[12]\.\d]],
|
2023-08-07 12:41:28 +02:00
|
|
|
position = { '<=', 1024 },
|
2019-09-05 16:29:35 +02:00
|
|
|
weight = 60,
|
2020-05-11 17:31:30 +02:00
|
|
|
heuristic = heuristics.pdf_format_heuristic
|
2019-09-05 16:29:35 +02:00
|
|
|
},
|
|
|
|
{
|
2020-05-11 18:03:00 +02:00
|
|
|
string = [[%FDF-[12]\.\d]],
|
2023-08-07 12:41:28 +02:00
|
|
|
position = { '<=', 1024 },
|
2019-09-05 16:29:35 +02:00
|
|
|
weight = 60,
|
2020-05-11 17:31:30 +02:00
|
|
|
heuristic = heuristics.pdf_format_heuristic
|
2019-09-05 16:29:35 +02:00
|
|
|
},
|
|
|
|
},
|
2019-09-06 15:06:14 +02:00
|
|
|
},
|
|
|
|
ps = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[%!PS-Adobe]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
-- RTF document
|
|
|
|
rtf = {
|
|
|
|
matches = {
|
|
|
|
{
|
2019-09-24 10:06:12 +02:00
|
|
|
string = [[^{\\rt]],
|
2019-09-09 18:21:44 +02:00
|
|
|
position = 4,
|
2019-09-06 15:06:14 +02:00
|
|
|
weight = 60,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
chm = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[ITSF]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
djvu = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[AT&TFORM]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
string = [[DJVM]],
|
|
|
|
relative_position = 0x0c,
|
|
|
|
weight = 60,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2019-09-07 16:11:21 +02:00
|
|
|
-- MS Office format, needs heuristic
|
|
|
|
ole = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[d0cf11e0a1b11ae1]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
heuristic = heuristics.ole_format_heuristic
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2019-09-06 15:06:14 +02:00
|
|
|
-- MS Exe file
|
|
|
|
exe = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[MZ]],
|
|
|
|
relative_position = 0,
|
2020-09-07 00:09:57 +02:00
|
|
|
weight = 15,
|
2019-09-06 15:06:14 +02:00
|
|
|
},
|
|
|
|
-- PE part
|
|
|
|
{
|
|
|
|
string = [[PE\x{00}\x{00}]],
|
2023-08-07 12:41:28 +02:00
|
|
|
position = { '>=', 0x3c + 4 },
|
2020-09-07 00:09:57 +02:00
|
|
|
weight = 15,
|
2021-01-04 12:41:04 +01:00
|
|
|
heuristic = heuristics.pe_part_heuristic,
|
2019-09-06 15:06:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2019-09-06 19:03:19 +02:00
|
|
|
elf = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[7f454c46]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
lnk = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[4C0000000114020000000000C000000000000046]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-24 16:59:58 +02:00
|
|
|
bat = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[(?i)@\s*ECHO\s+OFF]],
|
2023-08-07 12:41:28 +02:00
|
|
|
position = { '>=', 0 },
|
2019-09-24 16:59:58 +02:00
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-06 19:03:19 +02:00
|
|
|
class = {
|
|
|
|
-- Technically, this also matches MachO files, but I don't care about
|
|
|
|
-- Apple and their mental health problems here: just consider Java files,
|
|
|
|
-- Mach object files and all other cafe babes as bad and block them!
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[cafebabe]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2020-11-20 12:23:56 +01:00
|
|
|
ics = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[BEGIN:VCALENDAR]],
|
|
|
|
weight = 60,
|
|
|
|
relative_position = 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2021-01-12 16:21:40 +01:00
|
|
|
vcf = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[BEGIN:VCARD]],
|
|
|
|
weight = 60,
|
|
|
|
relative_position = 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2019-09-06 15:06:14 +02:00
|
|
|
-- Archives
|
|
|
|
arj = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = '60EA',
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ace = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[\*\*ACE\*\*]],
|
|
|
|
position = 14,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
cab = {
|
|
|
|
matches = {
|
|
|
|
{
|
2019-09-06 19:03:19 +02:00
|
|
|
hex = [[4d53434600000000]], -- Can be anywhere for SFX :(
|
2023-08-07 12:41:28 +02:00
|
|
|
position = { '>=', 8 },
|
2019-09-06 19:03:19 +02:00
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
tar = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[ustar]],
|
|
|
|
relative_position = 257,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
bz2 = {
|
|
|
|
matches = {
|
|
|
|
{
|
2019-09-24 10:06:12 +02:00
|
|
|
string = "^BZ[h0]",
|
2019-09-06 19:03:19 +02:00
|
|
|
position = 3,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
lz4 = {
|
|
|
|
matches = {
|
|
|
|
{
|
2019-09-11 15:00:43 +02:00
|
|
|
hex = "04224d18",
|
2019-09-06 19:03:19 +02:00
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
{
|
2019-09-11 15:00:43 +02:00
|
|
|
hex = "03214c18",
|
2019-09-06 19:03:19 +02:00
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
{
|
2019-09-11 15:00:43 +02:00
|
|
|
hex = "02214c18",
|
2019-09-06 19:03:19 +02:00
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
2019-09-12 12:55:09 +02:00
|
|
|
{
|
|
|
|
-- MozLZ4
|
|
|
|
hex = '6d6f7a4c7a343000',
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
}
|
2019-09-06 19:03:19 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
zst = {
|
|
|
|
matches = {
|
|
|
|
{
|
2019-09-24 10:06:12 +02:00
|
|
|
string = [[^[\x{22}-\x{40}]\x{B5}\x{2F}\x{FD}]],
|
2019-09-09 16:08:05 +02:00
|
|
|
position = 4,
|
2019-09-06 19:03:19 +02:00
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-08 10:56:55 +02:00
|
|
|
zoo = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[dca7c4fd]],
|
|
|
|
relative_position = 20,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-09 14:07:50 +02:00
|
|
|
xar = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[xar!]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-06 19:44:02 +02:00
|
|
|
iso = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[\x{01}CD001\x{01}]],
|
2023-08-07 12:41:28 +02:00
|
|
|
position = { '>=', 0x8000 + 7 }, -- first 32k is unused
|
2019-09-06 19:44:02 +02:00
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2020-09-20 23:55:36 +02:00
|
|
|
egg = {
|
|
|
|
-- ALZip egg
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[EGGA]],
|
|
|
|
weight = 60,
|
|
|
|
relative_position = 0,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
alz = {
|
|
|
|
-- ALZip alz
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[ALZ\x{01}]],
|
|
|
|
weight = 60,
|
|
|
|
relative_position = 0,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-06 19:03:19 +02:00
|
|
|
-- Apple is a 'special' child: this needs to be matched at the data tail...
|
|
|
|
dmg = {
|
|
|
|
matches = {
|
|
|
|
{
|
2019-10-05 16:17:08 +02:00
|
|
|
string = [[koly\x{00}\x{00}\x{00}\x{04}]],
|
|
|
|
position = -512 + 8,
|
2019-09-06 19:03:19 +02:00
|
|
|
weight = 61,
|
|
|
|
tail = 512,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
szdd = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[535a4444]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
xz = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[FD377A585A00]],
|
2019-09-06 15:06:14 +02:00
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
-- Images
|
|
|
|
psd = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[8BPS]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ico = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[00000100]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
pcx = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[0A050108]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
pic = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[FF80C9C71A00]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-06 19:03:19 +02:00
|
|
|
swf = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[5a5753]], -- LZMA
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
hex = [[435753]], -- Zlib
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
hex = [[465753]], -- Uncompressed
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
tiff = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[49492a00]], -- LE encoded
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
hex = [[4d4d]], -- BE tiff
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-06 15:06:14 +02:00
|
|
|
-- Other
|
|
|
|
pgp = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[A803504750]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
hex = [[2D424547494E20504750204D4553534147452D]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
2019-09-06 19:03:19 +02:00
|
|
|
},
|
|
|
|
uue = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
hex = [[626567696e20]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-23 13:13:54 +02:00
|
|
|
dwg = {
|
|
|
|
matches = {
|
2019-09-24 10:00:10 +02:00
|
|
|
{
|
2019-09-24 10:06:12 +02:00
|
|
|
string = '^AC10[12][2-9]',
|
2019-09-24 10:00:10 +02:00
|
|
|
position = 6,
|
|
|
|
weight = 60,
|
|
|
|
}
|
2019-09-23 13:13:54 +02:00
|
|
|
}
|
|
|
|
},
|
2019-09-24 16:43:04 +02:00
|
|
|
jpg = {
|
|
|
|
matches = {
|
|
|
|
{ -- JPEG2000
|
|
|
|
hex = [[0000000c6a5020200d0a870a]],
|
|
|
|
relative_position = 0,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
{
|
2019-09-24 19:42:52 +02:00
|
|
|
string = [[^\x{ff}\x{d8}\x{ff}]],
|
2019-09-24 16:43:04 +02:00
|
|
|
weight = 60,
|
2019-09-24 19:42:52 +02:00
|
|
|
position = 3,
|
2019-09-24 16:43:04 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
png = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[^\x{89}PNG\x{0d}\x{0a}\x{1a}\x{0a}]],
|
|
|
|
position = 8,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
gif = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[^GIF8\d]],
|
|
|
|
position = 5,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
bmp = {
|
|
|
|
matches = {
|
|
|
|
{
|
|
|
|
string = [[^BM...\x{00}\x{00}\x{00}\x{00}]],
|
|
|
|
position = 9,
|
|
|
|
weight = 60,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2019-09-05 16:29:35 +02:00
|
|
|
}
|
|
|
|
|
2021-01-04 12:41:04 +01:00
|
|
|
return patterns
|