You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

patterns.lua 8.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[[
  14. -- @module lua_magic/patterns
  15. -- This module contains most common patterns
  16. --]]
  17. local heuristics = require "lua_magic/heuristics"
  18. local patterns = {
  19. pdf = {
  20. -- These are alternatives
  21. matches = {
  22. {
  23. string = [[%PDF-[12]\.\d]],
  24. position = { '<=', 1024 },
  25. weight = 60,
  26. heuristic = heuristics.pdf_format_heuristic
  27. },
  28. {
  29. string = [[%FDF-[12]\.\d]],
  30. position = { '<=', 1024 },
  31. weight = 60,
  32. heuristic = heuristics.pdf_format_heuristic
  33. },
  34. },
  35. },
  36. ps = {
  37. matches = {
  38. {
  39. string = [[%!PS-Adobe]],
  40. relative_position = 0,
  41. weight = 60,
  42. },
  43. },
  44. },
  45. -- RTF document
  46. rtf = {
  47. matches = {
  48. {
  49. string = [[^{\\rt]],
  50. position = 4,
  51. weight = 60,
  52. }
  53. }
  54. },
  55. chm = {
  56. matches = {
  57. {
  58. string = [[ITSF]],
  59. relative_position = 0,
  60. weight = 60,
  61. }
  62. }
  63. },
  64. djvu = {
  65. matches = {
  66. {
  67. string = [[AT&TFORM]],
  68. relative_position = 0,
  69. weight = 60,
  70. },
  71. {
  72. string = [[DJVM]],
  73. relative_position = 0x0c,
  74. weight = 60,
  75. }
  76. }
  77. },
  78. -- MS Office format, needs heuristic
  79. ole = {
  80. matches = {
  81. {
  82. hex = [[d0cf11e0a1b11ae1]],
  83. relative_position = 0,
  84. weight = 60,
  85. heuristic = heuristics.ole_format_heuristic
  86. }
  87. }
  88. },
  89. -- MS Exe file
  90. exe = {
  91. matches = {
  92. {
  93. string = [[MZ]],
  94. relative_position = 0,
  95. weight = 15,
  96. },
  97. -- PE part
  98. {
  99. string = [[PE\x{00}\x{00}]],
  100. position = { '>=', 0x3c + 4 },
  101. weight = 15,
  102. heuristic = heuristics.pe_part_heuristic,
  103. }
  104. }
  105. },
  106. elf = {
  107. matches = {
  108. {
  109. hex = [[7f454c46]],
  110. relative_position = 0,
  111. weight = 60,
  112. },
  113. }
  114. },
  115. lnk = {
  116. matches = {
  117. {
  118. hex = [[4C0000000114020000000000C000000000000046]],
  119. relative_position = 0,
  120. weight = 60,
  121. },
  122. }
  123. },
  124. bat = {
  125. matches = {
  126. {
  127. string = [[(?i)@\s*ECHO\s+OFF]],
  128. position = { '>=', 0 },
  129. weight = 60,
  130. },
  131. }
  132. },
  133. class = {
  134. -- Technically, this also matches MachO files, but I don't care about
  135. -- Apple and their mental health problems here: just consider Java files,
  136. -- Mach object files and all other cafe babes as bad and block them!
  137. matches = {
  138. {
  139. hex = [[cafebabe]],
  140. relative_position = 0,
  141. weight = 60,
  142. },
  143. }
  144. },
  145. ics = {
  146. matches = {
  147. {
  148. string = [[BEGIN:VCALENDAR]],
  149. weight = 60,
  150. relative_position = 0,
  151. }
  152. }
  153. },
  154. vcf = {
  155. matches = {
  156. {
  157. string = [[BEGIN:VCARD]],
  158. weight = 60,
  159. relative_position = 0,
  160. }
  161. }
  162. },
  163. -- Archives
  164. arj = {
  165. matches = {
  166. {
  167. hex = '60EA',
  168. relative_position = 0,
  169. weight = 60,
  170. },
  171. }
  172. },
  173. ace = {
  174. matches = {
  175. {
  176. string = [[\*\*ACE\*\*]],
  177. position = 14,
  178. weight = 60,
  179. },
  180. }
  181. },
  182. cab = {
  183. matches = {
  184. {
  185. hex = [[4d53434600000000]], -- Can be anywhere for SFX :(
  186. position = { '>=', 8 },
  187. weight = 60,
  188. },
  189. }
  190. },
  191. tar = {
  192. matches = {
  193. {
  194. string = [[ustar]],
  195. relative_position = 257,
  196. weight = 60,
  197. },
  198. }
  199. },
  200. bz2 = {
  201. matches = {
  202. {
  203. string = "^BZ[h0]",
  204. position = 3,
  205. weight = 60,
  206. },
  207. }
  208. },
  209. lz4 = {
  210. matches = {
  211. {
  212. hex = "04224d18",
  213. relative_position = 0,
  214. weight = 60,
  215. },
  216. {
  217. hex = "03214c18",
  218. relative_position = 0,
  219. weight = 60,
  220. },
  221. {
  222. hex = "02214c18",
  223. relative_position = 0,
  224. weight = 60,
  225. },
  226. {
  227. -- MozLZ4
  228. hex = '6d6f7a4c7a343000',
  229. relative_position = 0,
  230. weight = 60,
  231. }
  232. }
  233. },
  234. zst = {
  235. matches = {
  236. {
  237. string = [[^[\x{22}-\x{40}]\x{B5}\x{2F}\x{FD}]],
  238. position = 4,
  239. weight = 60,
  240. },
  241. }
  242. },
  243. zoo = {
  244. matches = {
  245. {
  246. hex = [[dca7c4fd]],
  247. relative_position = 20,
  248. weight = 60,
  249. },
  250. }
  251. },
  252. xar = {
  253. matches = {
  254. {
  255. string = [[xar!]],
  256. relative_position = 0,
  257. weight = 60,
  258. },
  259. }
  260. },
  261. iso = {
  262. matches = {
  263. {
  264. string = [[\x{01}CD001\x{01}]],
  265. position = { '>=', 0x8000 + 7 }, -- first 32k is unused
  266. weight = 60,
  267. },
  268. }
  269. },
  270. egg = {
  271. -- ALZip egg
  272. matches = {
  273. {
  274. string = [[EGGA]],
  275. weight = 60,
  276. relative_position = 0,
  277. },
  278. }
  279. },
  280. alz = {
  281. -- ALZip alz
  282. matches = {
  283. {
  284. string = [[ALZ\x{01}]],
  285. weight = 60,
  286. relative_position = 0,
  287. },
  288. }
  289. },
  290. -- Apple is a 'special' child: this needs to be matched at the data tail...
  291. dmg = {
  292. matches = {
  293. {
  294. string = [[koly\x{00}\x{00}\x{00}\x{04}]],
  295. position = -512 + 8,
  296. weight = 61,
  297. tail = 512,
  298. },
  299. }
  300. },
  301. szdd = {
  302. matches = {
  303. {
  304. hex = [[535a4444]],
  305. relative_position = 0,
  306. weight = 60,
  307. },
  308. }
  309. },
  310. xz = {
  311. matches = {
  312. {
  313. hex = [[FD377A585A00]],
  314. relative_position = 0,
  315. weight = 60,
  316. },
  317. }
  318. },
  319. -- Images
  320. psd = {
  321. matches = {
  322. {
  323. string = [[8BPS]],
  324. relative_position = 0,
  325. weight = 60,
  326. },
  327. }
  328. },
  329. ico = {
  330. matches = {
  331. {
  332. hex = [[00000100]],
  333. relative_position = 0,
  334. weight = 60,
  335. },
  336. }
  337. },
  338. pcx = {
  339. matches = {
  340. {
  341. hex = [[0A050108]],
  342. relative_position = 0,
  343. weight = 60,
  344. },
  345. }
  346. },
  347. pic = {
  348. matches = {
  349. {
  350. hex = [[FF80C9C71A00]],
  351. relative_position = 0,
  352. weight = 60,
  353. },
  354. }
  355. },
  356. swf = {
  357. matches = {
  358. {
  359. hex = [[5a5753]], -- LZMA
  360. relative_position = 0,
  361. weight = 60,
  362. },
  363. {
  364. hex = [[435753]], -- Zlib
  365. relative_position = 0,
  366. weight = 60,
  367. },
  368. {
  369. hex = [[465753]], -- Uncompressed
  370. relative_position = 0,
  371. weight = 60,
  372. },
  373. }
  374. },
  375. tiff = {
  376. matches = {
  377. {
  378. hex = [[49492a00]], -- LE encoded
  379. relative_position = 0,
  380. weight = 60,
  381. },
  382. {
  383. hex = [[4d4d]], -- BE tiff
  384. relative_position = 0,
  385. weight = 60,
  386. },
  387. }
  388. },
  389. -- Other
  390. pgp = {
  391. matches = {
  392. {
  393. hex = [[A803504750]],
  394. relative_position = 0,
  395. weight = 60,
  396. },
  397. {
  398. hex = [[2D424547494E20504750204D4553534147452D]],
  399. relative_position = 0,
  400. weight = 60,
  401. },
  402. }
  403. },
  404. uue = {
  405. matches = {
  406. {
  407. hex = [[626567696e20]],
  408. relative_position = 0,
  409. weight = 60,
  410. },
  411. }
  412. },
  413. dwg = {
  414. matches = {
  415. {
  416. string = '^AC10[12][2-9]',
  417. position = 6,
  418. weight = 60,
  419. }
  420. }
  421. },
  422. jpg = {
  423. matches = {
  424. { -- JPEG2000
  425. hex = [[0000000c6a5020200d0a870a]],
  426. relative_position = 0,
  427. weight = 60,
  428. },
  429. {
  430. string = [[^\x{ff}\x{d8}\x{ff}]],
  431. weight = 60,
  432. position = 3,
  433. },
  434. },
  435. },
  436. png = {
  437. matches = {
  438. {
  439. string = [[^\x{89}PNG\x{0d}\x{0a}\x{1a}\x{0a}]],
  440. position = 8,
  441. weight = 60,
  442. },
  443. }
  444. },
  445. gif = {
  446. matches = {
  447. {
  448. string = [[^GIF8\d]],
  449. position = 5,
  450. weight = 60,
  451. },
  452. }
  453. },
  454. bmp = {
  455. matches = {
  456. {
  457. string = [[^BM...\x{00}\x{00}\x{00}\x{00}]],
  458. position = 9,
  459. weight = 60,
  460. },
  461. }
  462. },
  463. }
  464. return patterns