You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

patterns.lua 7.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. --[[
  2. Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module lua_magic/patterns
  15. -- This module contains most common patterns
  16. --]]
  17. local heuristics = require "lua_magic/heuristics"
  18. local patterns = {
  19. pdf = {
  20. -- These are alternatives
  21. matches = {
  22. {
  23. string = [[%PDF-[12]\.\d]],
  24. position = {'<=', 1024},
  25. weight = 60,
  26. heuristic = heuristics.pdf_format_heuristic
  27. },
  28. {
  29. string = [[%FDF-[12]\.\d]],
  30. position = {'<=', 1024},
  31. weight = 60,
  32. heuristic = heuristics.pdf_format_heuristic
  33. },
  34. },
  35. },
  36. ps = {
  37. matches = {
  38. {
  39. string = [[%!PS-Adobe]],
  40. relative_position = 0,
  41. weight = 60,
  42. },
  43. },
  44. },
  45. -- RTF document
  46. rtf = {
  47. matches = {
  48. {
  49. string = [[^{\\rt]],
  50. position = 4,
  51. weight = 60,
  52. }
  53. }
  54. },
  55. chm = {
  56. matches = {
  57. {
  58. string = [[ITSF]],
  59. relative_position = 0,
  60. weight = 60,
  61. }
  62. }
  63. },
  64. djvu = {
  65. matches = {
  66. {
  67. string = [[AT&TFORM]],
  68. relative_position = 0,
  69. weight = 60,
  70. },
  71. {
  72. string = [[DJVM]],
  73. relative_position = 0x0c,
  74. weight = 60,
  75. }
  76. }
  77. },
  78. -- MS Office format, needs heuristic
  79. ole = {
  80. matches = {
  81. {
  82. hex = [[d0cf11e0a1b11ae1]],
  83. relative_position = 0,
  84. weight = 60,
  85. heuristic = heuristics.ole_format_heuristic
  86. }
  87. }
  88. },
  89. -- MS Exe file
  90. exe = {
  91. matches = {
  92. {
  93. string = [[MZ]],
  94. relative_position = 0,
  95. weight = 15,
  96. },
  97. -- PE part
  98. {
  99. string = [[PE\x{00}\x{00}]],
  100. position = {'>=', 0x3c + 4},
  101. weight = 15,
  102. }
  103. }
  104. },
  105. elf = {
  106. matches = {
  107. {
  108. hex = [[7f454c46]],
  109. relative_position = 0,
  110. weight = 60,
  111. },
  112. }
  113. },
  114. lnk = {
  115. matches = {
  116. {
  117. hex = [[4C0000000114020000000000C000000000000046]],
  118. relative_position = 0,
  119. weight = 60,
  120. },
  121. }
  122. },
  123. bat = {
  124. matches = {
  125. {
  126. string = [[(?i)@\s*ECHO\s+OFF]],
  127. position = {'>=', 0},
  128. weight = 60,
  129. },
  130. }
  131. },
  132. class = {
  133. -- Technically, this also matches MachO files, but I don't care about
  134. -- Apple and their mental health problems here: just consider Java files,
  135. -- Mach object files and all other cafe babes as bad and block them!
  136. matches = {
  137. {
  138. hex = [[cafebabe]],
  139. relative_position = 0,
  140. weight = 60,
  141. },
  142. }
  143. },
  144. -- Archives
  145. arj = {
  146. matches = {
  147. {
  148. hex = '60EA',
  149. relative_position = 0,
  150. weight = 60,
  151. },
  152. }
  153. },
  154. ace = {
  155. matches = {
  156. {
  157. string = [[\*\*ACE\*\*]],
  158. position = 14,
  159. weight = 60,
  160. },
  161. }
  162. },
  163. cab = {
  164. matches = {
  165. {
  166. hex = [[4d53434600000000]], -- Can be anywhere for SFX :(
  167. position = {'>=', 8},
  168. weight = 60,
  169. },
  170. }
  171. },
  172. tar = {
  173. matches = {
  174. {
  175. string = [[ustar]],
  176. relative_position = 257,
  177. weight = 60,
  178. },
  179. }
  180. },
  181. bz2 = {
  182. matches = {
  183. {
  184. string = "^BZ[h0]",
  185. position = 3,
  186. weight = 60,
  187. },
  188. }
  189. },
  190. lz4 = {
  191. matches = {
  192. {
  193. hex = "04224d18",
  194. relative_position = 0,
  195. weight = 60,
  196. },
  197. {
  198. hex = "03214c18",
  199. relative_position = 0,
  200. weight = 60,
  201. },
  202. {
  203. hex = "02214c18",
  204. relative_position = 0,
  205. weight = 60,
  206. },
  207. {
  208. -- MozLZ4
  209. hex = '6d6f7a4c7a343000',
  210. relative_position = 0,
  211. weight = 60,
  212. }
  213. }
  214. },
  215. zst = {
  216. matches = {
  217. {
  218. string = [[^[\x{22}-\x{40}]\x{B5}\x{2F}\x{FD}]],
  219. position = 4,
  220. weight = 60,
  221. },
  222. }
  223. },
  224. zoo = {
  225. matches = {
  226. {
  227. hex = [[dca7c4fd]],
  228. relative_position = 20,
  229. weight = 60,
  230. },
  231. }
  232. },
  233. xar = {
  234. matches = {
  235. {
  236. string = [[xar!]],
  237. relative_position = 0,
  238. weight = 60,
  239. },
  240. }
  241. },
  242. iso = {
  243. matches = {
  244. {
  245. string = [[\x{01}CD001\x{01}]],
  246. position = {'>=', 0x8000 + 7}, -- first 32k is unused
  247. weight = 60,
  248. },
  249. }
  250. },
  251. egg = {
  252. -- ALZip egg
  253. matches = {
  254. {
  255. string = [[EGGA]],
  256. weight = 60,
  257. relative_position = 0,
  258. },
  259. }
  260. },
  261. alz = {
  262. -- ALZip alz
  263. matches = {
  264. {
  265. string = [[ALZ\x{01}]],
  266. weight = 60,
  267. relative_position = 0,
  268. },
  269. }
  270. },
  271. -- Apple is a 'special' child: this needs to be matched at the data tail...
  272. dmg = {
  273. matches = {
  274. {
  275. string = [[koly\x{00}\x{00}\x{00}\x{04}]],
  276. position = -512 + 8,
  277. weight = 61,
  278. tail = 512,
  279. },
  280. }
  281. },
  282. szdd = {
  283. matches = {
  284. {
  285. hex = [[535a4444]],
  286. relative_position = 0,
  287. weight = 60,
  288. },
  289. }
  290. },
  291. xz = {
  292. matches = {
  293. {
  294. hex = [[FD377A585A00]],
  295. relative_position = 0,
  296. weight = 60,
  297. },
  298. }
  299. },
  300. -- Images
  301. psd = {
  302. matches = {
  303. {
  304. string = [[8BPS]],
  305. relative_position = 0,
  306. weight = 60,
  307. },
  308. }
  309. },
  310. ico = {
  311. matches = {
  312. {
  313. hex = [[00000100]],
  314. relative_position = 0,
  315. weight = 60,
  316. },
  317. }
  318. },
  319. pcx = {
  320. matches = {
  321. {
  322. hex = [[0A050108]],
  323. relative_position = 0,
  324. weight = 60,
  325. },
  326. }
  327. },
  328. pic = {
  329. matches = {
  330. {
  331. hex = [[FF80C9C71A00]],
  332. relative_position = 0,
  333. weight = 60,
  334. },
  335. }
  336. },
  337. swf = {
  338. matches = {
  339. {
  340. hex = [[5a5753]], -- LZMA
  341. relative_position = 0,
  342. weight = 60,
  343. },
  344. {
  345. hex = [[435753]], -- Zlib
  346. relative_position = 0,
  347. weight = 60,
  348. },
  349. {
  350. hex = [[465753]], -- Uncompressed
  351. relative_position = 0,
  352. weight = 60,
  353. },
  354. }
  355. },
  356. tiff = {
  357. matches = {
  358. {
  359. hex = [[49492a00]], -- LE encoded
  360. relative_position = 0,
  361. weight = 60,
  362. },
  363. {
  364. hex = [[4d4d]], -- BE tiff
  365. relative_position = 0,
  366. weight = 60,
  367. },
  368. }
  369. },
  370. -- Other
  371. pgp = {
  372. matches = {
  373. {
  374. hex = [[A803504750]],
  375. relative_position = 0,
  376. weight = 60,
  377. },
  378. {
  379. hex = [[2D424547494E20504750204D4553534147452D]],
  380. relative_position = 0,
  381. weight = 60,
  382. },
  383. }
  384. },
  385. uue = {
  386. matches = {
  387. {
  388. hex = [[626567696e20]],
  389. relative_position = 0,
  390. weight = 60,
  391. },
  392. }
  393. },
  394. dwg = {
  395. matches = {
  396. {
  397. string = '^AC10[12][2-9]',
  398. position = 6,
  399. weight = 60,
  400. }
  401. }
  402. },
  403. jpg = {
  404. matches = {
  405. { -- JPEG2000
  406. hex = [[0000000c6a5020200d0a870a]],
  407. relative_position = 0,
  408. weight = 60,
  409. },
  410. {
  411. string = [[^\x{ff}\x{d8}\x{ff}]],
  412. weight = 60,
  413. position = 3,
  414. },
  415. },
  416. },
  417. png = {
  418. matches = {
  419. {
  420. string = [[^\x{89}PNG\x{0d}\x{0a}\x{1a}\x{0a}]],
  421. position = 8,
  422. weight = 60,
  423. },
  424. }
  425. },
  426. gif = {
  427. matches = {
  428. {
  429. string = [[^GIF8\d]],
  430. position = 5,
  431. weight = 60,
  432. },
  433. }
  434. },
  435. bmp = {
  436. matches = {
  437. {
  438. string = [[^BM...\x{00}\x{00}\x{00}\x{00}]],
  439. position = 9,
  440. weight = 60,
  441. },
  442. }
  443. },
  444. }
  445. return patterns