You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

dec_bcj.go 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. /*
  2. * Branch/Call/Jump (BCJ) filter decoders
  3. *
  4. * Authors: Lasse Collin <lasse.collin@tukaani.org>
  5. * Igor Pavlov <http://7-zip.org/>
  6. *
  7. * Translation to Go: Michael Cross <https://github.com/xi2>
  8. *
  9. * This file has been put into the public domain.
  10. * You can do whatever you want with this file.
  11. */
  12. package xz
  13. /* from linux/lib/xz/xz_dec_bcj.c *************************************/
  14. type xzDecBCJ struct {
  15. /* Type of the BCJ filter being used */
  16. typ xzFilterID
  17. /*
  18. * Return value of the next filter in the chain. We need to preserve
  19. * this information across calls, because we must not call the next
  20. * filter anymore once it has returned xzStreamEnd
  21. */
  22. ret xzRet
  23. /*
  24. * Absolute position relative to the beginning of the uncompressed
  25. * data (in a single .xz Block).
  26. */
  27. pos int
  28. /* x86 filter state */
  29. x86PrevMask uint32
  30. /* Temporary space to hold the variables from xzBuf */
  31. out []byte
  32. outPos int
  33. temp struct {
  34. /* Amount of already filtered data in the beginning of buf */
  35. filtered int
  36. /*
  37. * Buffer to hold a mix of filtered and unfiltered data. This
  38. * needs to be big enough to hold Alignment + 2 * Look-ahead:
  39. *
  40. * Type Alignment Look-ahead
  41. * x86 1 4
  42. * PowerPC 4 0
  43. * IA-64 16 0
  44. * ARM 4 0
  45. * ARM-Thumb 2 2
  46. * SPARC 4 0
  47. */
  48. buf []byte // slice buf will be backed by bufArray
  49. bufArray [16]byte
  50. }
  51. }
  52. /*
  53. * This is used to test the most significant byte of a memory address
  54. * in an x86 instruction.
  55. */
  56. func bcjX86TestMSByte(b byte) bool {
  57. return b == 0x00 || b == 0xff
  58. }
  59. func bcjX86Filter(s *xzDecBCJ, buf []byte) int {
  60. var maskToAllowedStatus = []bool{
  61. true, true, true, false, true, false, false, false,
  62. }
  63. var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3}
  64. var i int
  65. var prevPos int = -1
  66. var prevMask uint32 = s.x86PrevMask
  67. var src uint32
  68. var dest uint32
  69. var j uint32
  70. var b byte
  71. if len(buf) <= 4 {
  72. return 0
  73. }
  74. for i = 0; i < len(buf)-4; i++ {
  75. if buf[i]&0xfe != 0xe8 {
  76. continue
  77. }
  78. prevPos = i - prevPos
  79. if prevPos > 3 {
  80. prevMask = 0
  81. } else {
  82. prevMask = (prevMask << (uint(prevPos) - 1)) & 7
  83. if prevMask != 0 {
  84. b = buf[i+4-int(maskToBitNum[prevMask])]
  85. if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) {
  86. prevPos = i
  87. prevMask = prevMask<<1 | 1
  88. continue
  89. }
  90. }
  91. }
  92. prevPos = i
  93. if bcjX86TestMSByte(buf[i+4]) {
  94. src = getLE32(buf[i+1:])
  95. for {
  96. dest = src - uint32(s.pos+i+5)
  97. if prevMask == 0 {
  98. break
  99. }
  100. j = uint32(maskToBitNum[prevMask]) * 8
  101. b = byte(dest >> (24 - j))
  102. if !bcjX86TestMSByte(b) {
  103. break
  104. }
  105. src = dest ^ (1<<(32-j) - 1)
  106. }
  107. dest &= 0x01FFFFFF
  108. dest |= 0 - dest&0x01000000
  109. putLE32(dest, buf[i+1:])
  110. i += 4
  111. } else {
  112. prevMask = prevMask<<1 | 1
  113. }
  114. }
  115. prevPos = i - prevPos
  116. if prevPos > 3 {
  117. s.x86PrevMask = 0
  118. } else {
  119. s.x86PrevMask = prevMask << (uint(prevPos) - 1)
  120. }
  121. return i
  122. }
  123. func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int {
  124. var i int
  125. var instr uint32
  126. for i = 0; i+4 <= len(buf); i += 4 {
  127. instr = getBE32(buf[i:])
  128. if instr&0xFC000003 == 0x48000001 {
  129. instr &= 0x03FFFFFC
  130. instr -= uint32(s.pos + i)
  131. instr &= 0x03FFFFFC
  132. instr |= 0x48000001
  133. putBE32(instr, buf[i:])
  134. }
  135. }
  136. return i
  137. }
  138. var bcjIA64BranchTable = [...]byte{
  139. 0, 0, 0, 0, 0, 0, 0, 0,
  140. 0, 0, 0, 0, 0, 0, 0, 0,
  141. 4, 4, 6, 6, 0, 0, 7, 7,
  142. 4, 4, 0, 0, 4, 4, 0, 0,
  143. }
  144. func bcjIA64Filter(s *xzDecBCJ, buf []byte) int {
  145. var branchTable = bcjIA64BranchTable[:]
  146. /*
  147. * The local variables take a little bit stack space, but it's less
  148. * than what LZMA2 decoder takes, so it doesn't make sense to reduce
  149. * stack usage here without doing that for the LZMA2 decoder too.
  150. */
  151. /* Loop counters */
  152. var i int
  153. var j int
  154. /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
  155. var slot uint32
  156. /* Bitwise offset of the instruction indicated by slot */
  157. var bitPos uint32
  158. /* bit_pos split into byte and bit parts */
  159. var bytePos uint32
  160. var bitRes uint32
  161. /* Address part of an instruction */
  162. var addr uint32
  163. /* Mask used to detect which instructions to convert */
  164. var mask uint32
  165. /* 41-bit instruction stored somewhere in the lowest 48 bits */
  166. var instr uint64
  167. /* Instruction normalized with bit_res for easier manipulation */
  168. var norm uint64
  169. for i = 0; i+16 <= len(buf); i += 16 {
  170. mask = uint32(branchTable[buf[i]&0x1f])
  171. for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 {
  172. if (mask>>slot)&1 == 0 {
  173. continue
  174. }
  175. bytePos = bitPos >> 3
  176. bitRes = bitPos & 7
  177. instr = 0
  178. for j = 0; j < 6; j++ {
  179. instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j))
  180. }
  181. norm = instr >> bitRes
  182. if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 {
  183. addr = uint32((norm >> 13) & 0x0fffff)
  184. addr |= (uint32(norm>>36) & 1) << 20
  185. addr <<= 4
  186. addr -= uint32(s.pos + i)
  187. addr >>= 4
  188. norm &= ^(uint64(0x8fffff) << 13)
  189. norm |= uint64(addr&0x0fffff) << 13
  190. norm |= uint64(addr&0x100000) << (36 - 20)
  191. instr &= 1<<bitRes - 1
  192. instr |= norm << bitRes
  193. for j = 0; j < 6; j++ {
  194. buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j)))
  195. }
  196. }
  197. }
  198. }
  199. return i
  200. }
  201. func bcjARMFilter(s *xzDecBCJ, buf []byte) int {
  202. var i int
  203. var addr uint32
  204. for i = 0; i+4 <= len(buf); i += 4 {
  205. if buf[i+3] == 0xeb {
  206. addr = uint32(buf[i]) | uint32(buf[i+1])<<8 |
  207. uint32(buf[i+2])<<16
  208. addr <<= 2
  209. addr -= uint32(s.pos + i + 8)
  210. addr >>= 2
  211. buf[i] = byte(addr)
  212. buf[i+1] = byte(addr >> 8)
  213. buf[i+2] = byte(addr >> 16)
  214. }
  215. }
  216. return i
  217. }
  218. func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int {
  219. var i int
  220. var addr uint32
  221. for i = 0; i+4 <= len(buf); i += 2 {
  222. if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 {
  223. addr = uint32(buf[i+1]&0x07)<<19 |
  224. uint32(buf[i])<<11 |
  225. uint32(buf[i+3]&0x07)<<8 |
  226. uint32(buf[i+2])
  227. addr <<= 1
  228. addr -= uint32(s.pos + i + 4)
  229. addr >>= 1
  230. buf[i+1] = byte(0xf0 | (addr>>19)&0x07)
  231. buf[i] = byte(addr >> 11)
  232. buf[i+3] = byte(0xf8 | (addr>>8)&0x07)
  233. buf[i+2] = byte(addr)
  234. i += 2
  235. }
  236. }
  237. return i
  238. }
  239. func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int {
  240. var i int
  241. var instr uint32
  242. for i = 0; i+4 <= len(buf); i += 4 {
  243. instr = getBE32(buf[i:])
  244. if instr>>22 == 0x100 || instr>>22 == 0x1ff {
  245. instr <<= 2
  246. instr -= uint32(s.pos + i)
  247. instr >>= 2
  248. instr = (0x40000000 - instr&0x400000) |
  249. 0x40000000 | (instr & 0x3FFFFF)
  250. putBE32(instr, buf[i:])
  251. }
  252. }
  253. return i
  254. }
  255. /*
  256. * Apply the selected BCJ filter. Update *pos and s.pos to match the amount
  257. * of data that got filtered.
  258. */
  259. func bcjApply(s *xzDecBCJ, buf []byte, pos *int) {
  260. var filtered int
  261. buf = buf[*pos:]
  262. switch s.typ {
  263. case idBCJX86:
  264. filtered = bcjX86Filter(s, buf)
  265. case idBCJPowerPC:
  266. filtered = bcjPowerPCFilter(s, buf)
  267. case idBCJIA64:
  268. filtered = bcjIA64Filter(s, buf)
  269. case idBCJARM:
  270. filtered = bcjARMFilter(s, buf)
  271. case idBCJARMThumb:
  272. filtered = bcjARMThumbFilter(s, buf)
  273. case idBCJSPARC:
  274. filtered = bcjSPARCFilter(s, buf)
  275. default:
  276. /* Never reached */
  277. }
  278. *pos += filtered
  279. s.pos += filtered
  280. }
  281. /*
  282. * Flush pending filtered data from temp to the output buffer.
  283. * Move the remaining mixture of possibly filtered and unfiltered
  284. * data to the beginning of temp.
  285. */
  286. func bcjFlush(s *xzDecBCJ, b *xzBuf) {
  287. var copySize int
  288. copySize = len(b.out) - b.outPos
  289. if copySize > s.temp.filtered {
  290. copySize = s.temp.filtered
  291. }
  292. copy(b.out[b.outPos:], s.temp.buf[:copySize])
  293. b.outPos += copySize
  294. s.temp.filtered -= copySize
  295. copy(s.temp.buf, s.temp.buf[copySize:])
  296. s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize]
  297. }
  298. /*
  299. * Decode raw stream which has a BCJ filter as the first filter.
  300. *
  301. * The BCJ filter functions are primitive in sense that they process the
  302. * data in chunks of 1-16 bytes. To hide this issue, this function does
  303. * some buffering.
  304. */
  305. func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet {
  306. var outStart int
  307. /*
  308. * Flush pending already filtered data to the output buffer. Return
  309. * immediately if we couldn't flush everything, or if the next
  310. * filter in the chain had already returned xzStreamEnd.
  311. */
  312. if s.temp.filtered > 0 {
  313. bcjFlush(s, b)
  314. if s.temp.filtered > 0 {
  315. return xzOK
  316. }
  317. if s.ret == xzStreamEnd {
  318. return xzStreamEnd
  319. }
  320. }
  321. /*
  322. * If we have more output space than what is currently pending in
  323. * temp, copy the unfiltered data from temp to the output buffer
  324. * and try to fill the output buffer by decoding more data from the
  325. * next filter in the chain. Apply the BCJ filter on the new data
  326. * in the output buffer. If everything cannot be filtered, copy it
  327. * to temp and rewind the output buffer position accordingly.
  328. *
  329. * This needs to be always run when len(temp.buf) == 0 to handle a special
  330. * case where the output buffer is full and the next filter has no
  331. * more output coming but hasn't returned xzStreamEnd yet.
  332. */
  333. if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 {
  334. outStart = b.outPos
  335. copy(b.out[b.outPos:], s.temp.buf)
  336. b.outPos += len(s.temp.buf)
  337. s.ret = chain(b)
  338. if s.ret != xzStreamEnd && s.ret != xzOK {
  339. return s.ret
  340. }
  341. bcjApply(s, b.out[:b.outPos], &outStart)
  342. /*
  343. * As an exception, if the next filter returned xzStreamEnd,
  344. * we can do that too, since the last few bytes that remain
  345. * unfiltered are meant to remain unfiltered.
  346. */
  347. if s.ret == xzStreamEnd {
  348. return xzStreamEnd
  349. }
  350. s.temp.buf = s.temp.bufArray[:b.outPos-outStart]
  351. b.outPos -= len(s.temp.buf)
  352. copy(s.temp.buf, b.out[b.outPos:])
  353. /*
  354. * If there wasn't enough input to the next filter to fill
  355. * the output buffer with unfiltered data, there's no point
  356. * to try decoding more data to temp.
  357. */
  358. if b.outPos+len(s.temp.buf) < len(b.out) {
  359. return xzOK
  360. }
  361. }
  362. /*
  363. * We have unfiltered data in temp. If the output buffer isn't full
  364. * yet, try to fill the temp buffer by decoding more data from the
  365. * next filter. Apply the BCJ filter on temp. Then we hopefully can
  366. * fill the actual output buffer by copying filtered data from temp.
  367. * A mix of filtered and unfiltered data may be left in temp; it will
  368. * be taken care on the next call to this function.
  369. */
  370. if b.outPos < len(b.out) {
  371. /* Make b.out temporarily point to s.temp. */
  372. s.out = b.out
  373. s.outPos = b.outPos
  374. b.out = s.temp.bufArray[:]
  375. b.outPos = len(s.temp.buf)
  376. s.ret = chain(b)
  377. s.temp.buf = s.temp.bufArray[:b.outPos]
  378. b.out = s.out
  379. b.outPos = s.outPos
  380. if s.ret != xzOK && s.ret != xzStreamEnd {
  381. return s.ret
  382. }
  383. bcjApply(s, s.temp.buf, &s.temp.filtered)
  384. /*
  385. * If the next filter returned xzStreamEnd, we mark that
  386. * everything is filtered, since the last unfiltered bytes
  387. * of the stream are meant to be left as is.
  388. */
  389. if s.ret == xzStreamEnd {
  390. s.temp.filtered = len(s.temp.buf)
  391. }
  392. bcjFlush(s, b)
  393. if s.temp.filtered > 0 {
  394. return xzOK
  395. }
  396. }
  397. return s.ret
  398. }
  399. /*
  400. * Allocate memory for BCJ decoders. xzDecBCJReset must be used before
  401. * calling xzDecBCJRun.
  402. */
  403. func xzDecBCJCreate() *xzDecBCJ {
  404. return new(xzDecBCJ)
  405. }
  406. /*
  407. * Decode the Filter ID of a BCJ filter and check the start offset is
  408. * valid. Returns xzOK if the given Filter ID and offset is
  409. * supported. Otherwise xzOptionsError is returned.
  410. */
  411. func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet {
  412. switch id {
  413. case idBCJX86:
  414. case idBCJPowerPC:
  415. case idBCJIA64:
  416. case idBCJARM:
  417. case idBCJARMThumb:
  418. case idBCJSPARC:
  419. default:
  420. /* Unsupported Filter ID */
  421. return xzOptionsError
  422. }
  423. // check offset is a multiple of alignment
  424. switch id {
  425. case idBCJPowerPC, idBCJARM, idBCJSPARC:
  426. if offset%4 != 0 {
  427. return xzOptionsError
  428. }
  429. case idBCJIA64:
  430. if offset%16 != 0 {
  431. return xzOptionsError
  432. }
  433. case idBCJARMThumb:
  434. if offset%2 != 0 {
  435. return xzOptionsError
  436. }
  437. }
  438. s.typ = id
  439. s.ret = xzOK
  440. s.pos = offset
  441. s.x86PrevMask = 0
  442. s.temp.filtered = 0
  443. s.temp.buf = nil
  444. return xzOK
  445. }