123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461 |
- /*
- * Branch/Call/Jump (BCJ) filter decoders
- *
- * Authors: Lasse Collin <lasse.collin@tukaani.org>
- * Igor Pavlov <http://7-zip.org/>
- *
- * Translation to Go: Michael Cross <https://github.com/xi2>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
- */
-
- package xz
-
- /* from linux/lib/xz/xz_dec_bcj.c *************************************/
-
- type xzDecBCJ struct {
- /* Type of the BCJ filter being used */
- typ xzFilterID
- /*
- * Return value of the next filter in the chain. We need to preserve
- * this information across calls, because we must not call the next
- * filter anymore once it has returned xzStreamEnd
- */
- ret xzRet
- /*
- * Absolute position relative to the beginning of the uncompressed
- * data (in a single .xz Block).
- */
- pos int
- /* x86 filter state */
- x86PrevMask uint32
- /* Temporary space to hold the variables from xzBuf */
- out []byte
- outPos int
- temp struct {
- /* Amount of already filtered data in the beginning of buf */
- filtered int
- /*
- * Buffer to hold a mix of filtered and unfiltered data. This
- * needs to be big enough to hold Alignment + 2 * Look-ahead:
- *
- * Type Alignment Look-ahead
- * x86 1 4
- * PowerPC 4 0
- * IA-64 16 0
- * ARM 4 0
- * ARM-Thumb 2 2
- * SPARC 4 0
- */
- buf []byte // slice buf will be backed by bufArray
- bufArray [16]byte
- }
- }
-
- /*
- * This is used to test the most significant byte of a memory address
- * in an x86 instruction.
- */
- func bcjX86TestMSByte(b byte) bool {
- return b == 0x00 || b == 0xff
- }
-
- func bcjX86Filter(s *xzDecBCJ, buf []byte) int {
- var maskToAllowedStatus = []bool{
- true, true, true, false, true, false, false, false,
- }
- var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3}
- var i int
- var prevPos int = -1
- var prevMask uint32 = s.x86PrevMask
- var src uint32
- var dest uint32
- var j uint32
- var b byte
- if len(buf) <= 4 {
- return 0
- }
- for i = 0; i < len(buf)-4; i++ {
- if buf[i]&0xfe != 0xe8 {
- continue
- }
- prevPos = i - prevPos
- if prevPos > 3 {
- prevMask = 0
- } else {
- prevMask = (prevMask << (uint(prevPos) - 1)) & 7
- if prevMask != 0 {
- b = buf[i+4-int(maskToBitNum[prevMask])]
- if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) {
- prevPos = i
- prevMask = prevMask<<1 | 1
- continue
- }
- }
- }
- prevPos = i
- if bcjX86TestMSByte(buf[i+4]) {
- src = getLE32(buf[i+1:])
- for {
- dest = src - uint32(s.pos+i+5)
- if prevMask == 0 {
- break
- }
- j = uint32(maskToBitNum[prevMask]) * 8
- b = byte(dest >> (24 - j))
- if !bcjX86TestMSByte(b) {
- break
- }
- src = dest ^ (1<<(32-j) - 1)
- }
- dest &= 0x01FFFFFF
- dest |= 0 - dest&0x01000000
- putLE32(dest, buf[i+1:])
- i += 4
- } else {
- prevMask = prevMask<<1 | 1
- }
- }
- prevPos = i - prevPos
- if prevPos > 3 {
- s.x86PrevMask = 0
- } else {
- s.x86PrevMask = prevMask << (uint(prevPos) - 1)
- }
- return i
- }
-
- func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int {
- var i int
- var instr uint32
- for i = 0; i+4 <= len(buf); i += 4 {
- instr = getBE32(buf[i:])
- if instr&0xFC000003 == 0x48000001 {
- instr &= 0x03FFFFFC
- instr -= uint32(s.pos + i)
- instr &= 0x03FFFFFC
- instr |= 0x48000001
- putBE32(instr, buf[i:])
- }
- }
- return i
- }
-
- var bcjIA64BranchTable = [...]byte{
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 4, 4, 6, 6, 0, 0, 7, 7,
- 4, 4, 0, 0, 4, 4, 0, 0,
- }
-
- func bcjIA64Filter(s *xzDecBCJ, buf []byte) int {
- var branchTable = bcjIA64BranchTable[:]
- /*
- * The local variables take a little bit stack space, but it's less
- * than what LZMA2 decoder takes, so it doesn't make sense to reduce
- * stack usage here without doing that for the LZMA2 decoder too.
- */
- /* Loop counters */
- var i int
- var j int
- /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
- var slot uint32
- /* Bitwise offset of the instruction indicated by slot */
- var bitPos uint32
- /* bit_pos split into byte and bit parts */
- var bytePos uint32
- var bitRes uint32
- /* Address part of an instruction */
- var addr uint32
- /* Mask used to detect which instructions to convert */
- var mask uint32
- /* 41-bit instruction stored somewhere in the lowest 48 bits */
- var instr uint64
- /* Instruction normalized with bit_res for easier manipulation */
- var norm uint64
- for i = 0; i+16 <= len(buf); i += 16 {
- mask = uint32(branchTable[buf[i]&0x1f])
- for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 {
- if (mask>>slot)&1 == 0 {
- continue
- }
- bytePos = bitPos >> 3
- bitRes = bitPos & 7
- instr = 0
- for j = 0; j < 6; j++ {
- instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j))
- }
- norm = instr >> bitRes
- if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 {
- addr = uint32((norm >> 13) & 0x0fffff)
- addr |= (uint32(norm>>36) & 1) << 20
- addr <<= 4
- addr -= uint32(s.pos + i)
- addr >>= 4
- norm &= ^(uint64(0x8fffff) << 13)
- norm |= uint64(addr&0x0fffff) << 13
- norm |= uint64(addr&0x100000) << (36 - 20)
- instr &= 1<<bitRes - 1
- instr |= norm << bitRes
- for j = 0; j < 6; j++ {
- buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j)))
- }
- }
- }
- }
- return i
- }
-
- func bcjARMFilter(s *xzDecBCJ, buf []byte) int {
- var i int
- var addr uint32
- for i = 0; i+4 <= len(buf); i += 4 {
- if buf[i+3] == 0xeb {
- addr = uint32(buf[i]) | uint32(buf[i+1])<<8 |
- uint32(buf[i+2])<<16
- addr <<= 2
- addr -= uint32(s.pos + i + 8)
- addr >>= 2
- buf[i] = byte(addr)
- buf[i+1] = byte(addr >> 8)
- buf[i+2] = byte(addr >> 16)
- }
- }
- return i
- }
-
- func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int {
- var i int
- var addr uint32
- for i = 0; i+4 <= len(buf); i += 2 {
- if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 {
- addr = uint32(buf[i+1]&0x07)<<19 |
- uint32(buf[i])<<11 |
- uint32(buf[i+3]&0x07)<<8 |
- uint32(buf[i+2])
- addr <<= 1
- addr -= uint32(s.pos + i + 4)
- addr >>= 1
- buf[i+1] = byte(0xf0 | (addr>>19)&0x07)
- buf[i] = byte(addr >> 11)
- buf[i+3] = byte(0xf8 | (addr>>8)&0x07)
- buf[i+2] = byte(addr)
- i += 2
- }
- }
- return i
- }
-
- func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int {
- var i int
- var instr uint32
- for i = 0; i+4 <= len(buf); i += 4 {
- instr = getBE32(buf[i:])
- if instr>>22 == 0x100 || instr>>22 == 0x1ff {
- instr <<= 2
- instr -= uint32(s.pos + i)
- instr >>= 2
- instr = (0x40000000 - instr&0x400000) |
- 0x40000000 | (instr & 0x3FFFFF)
- putBE32(instr, buf[i:])
- }
- }
- return i
- }
-
- /*
- * Apply the selected BCJ filter. Update *pos and s.pos to match the amount
- * of data that got filtered.
- */
- func bcjApply(s *xzDecBCJ, buf []byte, pos *int) {
- var filtered int
- buf = buf[*pos:]
- switch s.typ {
- case idBCJX86:
- filtered = bcjX86Filter(s, buf)
- case idBCJPowerPC:
- filtered = bcjPowerPCFilter(s, buf)
- case idBCJIA64:
- filtered = bcjIA64Filter(s, buf)
- case idBCJARM:
- filtered = bcjARMFilter(s, buf)
- case idBCJARMThumb:
- filtered = bcjARMThumbFilter(s, buf)
- case idBCJSPARC:
- filtered = bcjSPARCFilter(s, buf)
- default:
- /* Never reached */
- }
- *pos += filtered
- s.pos += filtered
- }
-
- /*
- * Flush pending filtered data from temp to the output buffer.
- * Move the remaining mixture of possibly filtered and unfiltered
- * data to the beginning of temp.
- */
- func bcjFlush(s *xzDecBCJ, b *xzBuf) {
- var copySize int
- copySize = len(b.out) - b.outPos
- if copySize > s.temp.filtered {
- copySize = s.temp.filtered
- }
- copy(b.out[b.outPos:], s.temp.buf[:copySize])
- b.outPos += copySize
- s.temp.filtered -= copySize
- copy(s.temp.buf, s.temp.buf[copySize:])
- s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize]
- }
-
- /*
- * Decode raw stream which has a BCJ filter as the first filter.
- *
- * The BCJ filter functions are primitive in sense that they process the
- * data in chunks of 1-16 bytes. To hide this issue, this function does
- * some buffering.
- */
- func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet {
- var outStart int
- /*
- * Flush pending already filtered data to the output buffer. Return
- * immediately if we couldn't flush everything, or if the next
- * filter in the chain had already returned xzStreamEnd.
- */
- if s.temp.filtered > 0 {
- bcjFlush(s, b)
- if s.temp.filtered > 0 {
- return xzOK
- }
- if s.ret == xzStreamEnd {
- return xzStreamEnd
- }
- }
- /*
- * If we have more output space than what is currently pending in
- * temp, copy the unfiltered data from temp to the output buffer
- * and try to fill the output buffer by decoding more data from the
- * next filter in the chain. Apply the BCJ filter on the new data
- * in the output buffer. If everything cannot be filtered, copy it
- * to temp and rewind the output buffer position accordingly.
- *
- * This needs to be always run when len(temp.buf) == 0 to handle a special
- * case where the output buffer is full and the next filter has no
- * more output coming but hasn't returned xzStreamEnd yet.
- */
- if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 {
- outStart = b.outPos
- copy(b.out[b.outPos:], s.temp.buf)
- b.outPos += len(s.temp.buf)
- s.ret = chain(b)
- if s.ret != xzStreamEnd && s.ret != xzOK {
- return s.ret
- }
- bcjApply(s, b.out[:b.outPos], &outStart)
- /*
- * As an exception, if the next filter returned xzStreamEnd,
- * we can do that too, since the last few bytes that remain
- * unfiltered are meant to remain unfiltered.
- */
- if s.ret == xzStreamEnd {
- return xzStreamEnd
- }
- s.temp.buf = s.temp.bufArray[:b.outPos-outStart]
- b.outPos -= len(s.temp.buf)
- copy(s.temp.buf, b.out[b.outPos:])
- /*
- * If there wasn't enough input to the next filter to fill
- * the output buffer with unfiltered data, there's no point
- * to try decoding more data to temp.
- */
- if b.outPos+len(s.temp.buf) < len(b.out) {
- return xzOK
- }
- }
- /*
- * We have unfiltered data in temp. If the output buffer isn't full
- * yet, try to fill the temp buffer by decoding more data from the
- * next filter. Apply the BCJ filter on temp. Then we hopefully can
- * fill the actual output buffer by copying filtered data from temp.
- * A mix of filtered and unfiltered data may be left in temp; it will
- * be taken care on the next call to this function.
- */
- if b.outPos < len(b.out) {
- /* Make b.out temporarily point to s.temp. */
- s.out = b.out
- s.outPos = b.outPos
- b.out = s.temp.bufArray[:]
- b.outPos = len(s.temp.buf)
- s.ret = chain(b)
- s.temp.buf = s.temp.bufArray[:b.outPos]
- b.out = s.out
- b.outPos = s.outPos
- if s.ret != xzOK && s.ret != xzStreamEnd {
- return s.ret
- }
- bcjApply(s, s.temp.buf, &s.temp.filtered)
- /*
- * If the next filter returned xzStreamEnd, we mark that
- * everything is filtered, since the last unfiltered bytes
- * of the stream are meant to be left as is.
- */
- if s.ret == xzStreamEnd {
- s.temp.filtered = len(s.temp.buf)
- }
- bcjFlush(s, b)
- if s.temp.filtered > 0 {
- return xzOK
- }
- }
- return s.ret
- }
-
- /*
- * Allocate memory for BCJ decoders. xzDecBCJReset must be used before
- * calling xzDecBCJRun.
- */
- func xzDecBCJCreate() *xzDecBCJ {
- return new(xzDecBCJ)
- }
-
- /*
- * Decode the Filter ID of a BCJ filter and check the start offset is
- * valid. Returns xzOK if the given Filter ID and offset is
- * supported. Otherwise xzOptionsError is returned.
- */
- func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet {
- switch id {
- case idBCJX86:
- case idBCJPowerPC:
- case idBCJIA64:
- case idBCJARM:
- case idBCJARMThumb:
- case idBCJSPARC:
- default:
- /* Unsupported Filter ID */
- return xzOptionsError
- }
- // check offset is a multiple of alignment
- switch id {
- case idBCJPowerPC, idBCJARM, idBCJSPARC:
- if offset%4 != 0 {
- return xzOptionsError
- }
- case idBCJIA64:
- if offset%16 != 0 {
- return xzOptionsError
- }
- case idBCJARMThumb:
- if offset%2 != 0 {
- return xzOptionsError
- }
- }
- s.typ = id
- s.ret = xzOK
- s.pos = offset
- s.x86PrevMask = 0
- s.temp.filtered = 0
- s.temp.buf = nil
- return xzOK
- }
|