123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- --[[
- Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ]]--
-
- --[[[
- -- @module lua_magic/patterns
- -- This module contains most common patterns
- --]]
-
- local heuristics = require "lua_magic/heuristics"
-
- local patterns = {
- pdf = {
- -- These are alternatives
- matches = {
- {
- string = [[%PDF-[12]\.\d]],
- position = {'<=', 1024},
- weight = 60,
- heuristic = heuristics.pdf_format_heuristic
- },
- {
- string = [[%FDF-[12]\.\d]],
- position = {'<=', 1024},
- weight = 60,
- heuristic = heuristics.pdf_format_heuristic
- },
- },
- },
- ps = {
- matches = {
- {
- string = [[%!PS-Adobe]],
- relative_position = 0,
- weight = 60,
- },
- },
- },
- -- RTF document
- rtf = {
- matches = {
- {
- string = [[^{\\rt]],
- position = 4,
- weight = 60,
- }
- }
- },
- chm = {
- matches = {
- {
- string = [[ITSF]],
- relative_position = 0,
- weight = 60,
- }
- }
- },
- djvu = {
- matches = {
- {
- string = [[AT&TFORM]],
- relative_position = 0,
- weight = 60,
- },
- {
- string = [[DJVM]],
- relative_position = 0x0c,
- weight = 60,
- }
- }
- },
- -- MS Office format, needs heuristic
- ole = {
- matches = {
- {
- hex = [[d0cf11e0a1b11ae1]],
- relative_position = 0,
- weight = 60,
- heuristic = heuristics.ole_format_heuristic
- }
- }
- },
- -- MS Exe file
- exe = {
- matches = {
- {
- string = [[MZ]],
- relative_position = 0,
- weight = 15,
- },
- -- PE part
- {
- string = [[PE\x{00}\x{00}]],
- position = {'>=', 0x3c + 4},
- weight = 15,
- }
- }
- },
- elf = {
- matches = {
- {
- hex = [[7f454c46]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- lnk = {
- matches = {
- {
- hex = [[4C0000000114020000000000C000000000000046]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- bat = {
- matches = {
- {
- string = [[(?i)@\s*ECHO\s+OFF]],
- position = {'>=', 0},
- weight = 60,
- },
- }
- },
- class = {
- -- Technically, this also matches MachO files, but I don't care about
- -- Apple and their mental health problems here: just consider Java files,
- -- Mach object files and all other cafe babes as bad and block them!
- matches = {
- {
- hex = [[cafebabe]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- -- Archives
- arj = {
- matches = {
- {
- hex = '60EA',
- relative_position = 0,
- weight = 60,
- },
- }
- },
- ace = {
- matches = {
- {
- string = [[\*\*ACE\*\*]],
- position = 14,
- weight = 60,
- },
- }
- },
- cab = {
- matches = {
- {
- hex = [[4d53434600000000]], -- Can be anywhere for SFX :(
- position = {'>=', 8},
- weight = 60,
- },
- }
- },
- tar = {
- matches = {
- {
- string = [[ustar]],
- relative_position = 257,
- weight = 60,
- },
- }
- },
- bz2 = {
- matches = {
- {
- string = "^BZ[h0]",
- position = 3,
- weight = 60,
- },
- }
- },
- lz4 = {
- matches = {
- {
- hex = "04224d18",
- relative_position = 0,
- weight = 60,
- },
- {
- hex = "03214c18",
- relative_position = 0,
- weight = 60,
- },
- {
- hex = "02214c18",
- relative_position = 0,
- weight = 60,
- },
- {
- -- MozLZ4
- hex = '6d6f7a4c7a343000',
- relative_position = 0,
- weight = 60,
- }
- }
- },
- zst = {
- matches = {
- {
- string = [[^[\x{22}-\x{40}]\x{B5}\x{2F}\x{FD}]],
- position = 4,
- weight = 60,
- },
- }
- },
- zoo = {
- matches = {
- {
- hex = [[dca7c4fd]],
- relative_position = 20,
- weight = 60,
- },
- }
- },
- xar = {
- matches = {
- {
- string = [[xar!]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- iso = {
- matches = {
- {
- string = [[\x{01}CD001\x{01}]],
- position = {'>=', 0x8000 + 7}, -- first 32k is unused
- weight = 60,
- },
- }
- },
- egg = {
- -- ALZip egg
- matches = {
- {
- string = [[EGGA]],
- weight = 60,
- relative_position = 0,
- },
- }
- },
- alz = {
- -- ALZip alz
- matches = {
- {
- string = [[ALZ\x{01}]],
- weight = 60,
- relative_position = 0,
- },
- }
- },
- -- Apple is a 'special' child: this needs to be matched at the data tail...
- dmg = {
- matches = {
- {
- string = [[koly\x{00}\x{00}\x{00}\x{04}]],
- position = -512 + 8,
- weight = 61,
- tail = 512,
- },
- }
- },
- szdd = {
- matches = {
- {
- hex = [[535a4444]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- xz = {
- matches = {
- {
- hex = [[FD377A585A00]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- -- Images
- psd = {
- matches = {
- {
- string = [[8BPS]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- ico = {
- matches = {
- {
- hex = [[00000100]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- pcx = {
- matches = {
- {
- hex = [[0A050108]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- pic = {
- matches = {
- {
- hex = [[FF80C9C71A00]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- swf = {
- matches = {
- {
- hex = [[5a5753]], -- LZMA
- relative_position = 0,
- weight = 60,
- },
- {
- hex = [[435753]], -- Zlib
- relative_position = 0,
- weight = 60,
- },
- {
- hex = [[465753]], -- Uncompressed
- relative_position = 0,
- weight = 60,
- },
- }
- },
- tiff = {
- matches = {
- {
- hex = [[49492a00]], -- LE encoded
- relative_position = 0,
- weight = 60,
- },
- {
- hex = [[4d4d]], -- BE tiff
- relative_position = 0,
- weight = 60,
- },
- }
- },
- -- Other
- pgp = {
- matches = {
- {
- hex = [[A803504750]],
- relative_position = 0,
- weight = 60,
- },
- {
- hex = [[2D424547494E20504750204D4553534147452D]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- uue = {
- matches = {
- {
- hex = [[626567696e20]],
- relative_position = 0,
- weight = 60,
- },
- }
- },
- dwg = {
- matches = {
- {
- string = '^AC10[12][2-9]',
- position = 6,
- weight = 60,
- }
- }
- },
- jpg = {
- matches = {
- { -- JPEG2000
- hex = [[0000000c6a5020200d0a870a]],
- relative_position = 0,
- weight = 60,
- },
- {
- string = [[^\x{ff}\x{d8}\x{ff}]],
- weight = 60,
- position = 3,
- },
- },
- },
- png = {
- matches = {
- {
- string = [[^\x{89}PNG\x{0d}\x{0a}\x{1a}\x{0a}]],
- position = 8,
- weight = 60,
- },
- }
- },
- gif = {
- matches = {
- {
- string = [[^GIF8\d]],
- position = 5,
- weight = 60,
- },
- }
- },
- bmp = {
- matches = {
- {
- string = [[^BM...\x{00}\x{00}\x{00}\x{00}]],
- position = 9,
- weight = 60,
- },
- }
- },
- }
-
- return patterns
|