1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000 |
- // Copyright 2009 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
-
- // Package flate implements the DEFLATE compressed data format, described in
- // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
- // formats.
- package flate
-
- import (
- "bufio"
- "fmt"
- "io"
- "math/bits"
- "strconv"
- "sync"
- )
-
- const (
- maxCodeLen = 16 // max length of Huffman code
- maxCodeLenMask = 15 // mask for max length of Huffman code
- // The next three numbers come from the RFC section 3.2.7, with the
- // additional proviso in section 3.2.5 which implies that distance codes
- // 30 and 31 should never occur in compressed data.
- maxNumLit = 286
- maxNumDist = 30
- numCodes = 19 // number of codes in Huffman meta-code
-
- debugDecode = false
- )
-
- // Initialize the fixedHuffmanDecoder only once upon first use.
- var fixedOnce sync.Once
- var fixedHuffmanDecoder huffmanDecoder
-
- // A CorruptInputError reports the presence of corrupt input at a given offset.
- type CorruptInputError int64
-
- func (e CorruptInputError) Error() string {
- return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
- }
-
- // An InternalError reports an error in the flate code itself.
- type InternalError string
-
- func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
-
- // A ReadError reports an error encountered while reading input.
- //
- // Deprecated: No longer returned.
- type ReadError struct {
- Offset int64 // byte offset where error occurred
- Err error // error returned by underlying Read
- }
-
- func (e *ReadError) Error() string {
- return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
- }
-
- // A WriteError reports an error encountered while writing output.
- //
- // Deprecated: No longer returned.
- type WriteError struct {
- Offset int64 // byte offset where error occurred
- Err error // error returned by underlying Write
- }
-
- func (e *WriteError) Error() string {
- return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
- }
-
- // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
- // to switch to a new underlying Reader. This permits reusing a ReadCloser
- // instead of allocating a new one.
- type Resetter interface {
- // Reset discards any buffered data and resets the Resetter as if it was
- // newly initialized with the given reader.
- Reset(r io.Reader, dict []byte) error
- }
-
- // The data structure for decoding Huffman tables is based on that of
- // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
- // For codes smaller than the table width, there are multiple entries
- // (each combination of trailing bits has the same value). For codes
- // larger than the table width, the table contains a link to an overflow
- // table. The width of each entry in the link table is the maximum code
- // size minus the chunk width.
- //
- // Note that you can do a lookup in the table even without all bits
- // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
- // have the property that shorter codes come before longer ones, the
- // bit length estimate in the result is a lower bound on the actual
- // number of bits.
- //
- // See the following:
- // http://www.gzip.org/algorithm.txt
-
- // chunk & 15 is number of bits
- // chunk >> 4 is value, including table link
-
- const (
- huffmanChunkBits = 9
- huffmanNumChunks = 1 << huffmanChunkBits
- huffmanCountMask = 15
- huffmanValueShift = 4
- )
-
- type huffmanDecoder struct {
- maxRead int // the maximum number of bits we can read and not overread
- chunks *[huffmanNumChunks]uint16 // chunks as described above
- links [][]uint16 // overflow links
- linkMask uint32 // mask the width of the link table
- }
-
- // Initialize Huffman decoding tables from array of code lengths.
- // Following this function, h is guaranteed to be initialized into a complete
- // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
- // degenerate case where the tree has only a single symbol with length 1. Empty
- // trees are permitted.
- func (h *huffmanDecoder) init(lengths []int) bool {
- // Sanity enables additional runtime tests during Huffman
- // table construction. It's intended to be used during
- // development to supplement the currently ad-hoc unit tests.
- const sanity = false
-
- if h.chunks == nil {
- h.chunks = &[huffmanNumChunks]uint16{}
- }
- if h.maxRead != 0 {
- *h = huffmanDecoder{chunks: h.chunks, links: h.links}
- }
-
- // Count number of codes of each length,
- // compute maxRead and max length.
- var count [maxCodeLen]int
- var min, max int
- for _, n := range lengths {
- if n == 0 {
- continue
- }
- if min == 0 || n < min {
- min = n
- }
- if n > max {
- max = n
- }
- count[n&maxCodeLenMask]++
- }
-
- // Empty tree. The decompressor.huffSym function will fail later if the tree
- // is used. Technically, an empty tree is only valid for the HDIST tree and
- // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
- // is guaranteed to fail since it will attempt to use the tree to decode the
- // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
- // guaranteed to fail later since the compressed data section must be
- // composed of at least one symbol (the end-of-block marker).
- if max == 0 {
- return true
- }
-
- code := 0
- var nextcode [maxCodeLen]int
- for i := min; i <= max; i++ {
- code <<= 1
- nextcode[i&maxCodeLenMask] = code
- code += count[i&maxCodeLenMask]
- }
-
- // Check that the coding is complete (i.e., that we've
- // assigned all 2-to-the-max possible bit sequences).
- // Exception: To be compatible with zlib, we also need to
- // accept degenerate single-code codings. See also
- // TestDegenerateHuffmanCoding.
- if code != 1<<uint(max) && !(code == 1 && max == 1) {
- if debugDecode {
- fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
- }
- return false
- }
-
- h.maxRead = min
- chunks := h.chunks[:]
- for i := range chunks {
- chunks[i] = 0
- }
-
- if max > huffmanChunkBits {
- numLinks := 1 << (uint(max) - huffmanChunkBits)
- h.linkMask = uint32(numLinks - 1)
-
- // create link tables
- link := nextcode[huffmanChunkBits+1] >> 1
- if cap(h.links) < huffmanNumChunks-link {
- h.links = make([][]uint16, huffmanNumChunks-link)
- } else {
- h.links = h.links[:huffmanNumChunks-link]
- }
- for j := uint(link); j < huffmanNumChunks; j++ {
- reverse := int(bits.Reverse16(uint16(j)))
- reverse >>= uint(16 - huffmanChunkBits)
- off := j - uint(link)
- if sanity && h.chunks[reverse] != 0 {
- panic("impossible: overwriting existing chunk")
- }
- h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
- if cap(h.links[off]) < numLinks {
- h.links[off] = make([]uint16, numLinks)
- } else {
- links := h.links[off][:0]
- h.links[off] = links[:numLinks]
- }
- }
- } else {
- h.links = h.links[:0]
- }
-
- for i, n := range lengths {
- if n == 0 {
- continue
- }
- code := nextcode[n]
- nextcode[n]++
- chunk := uint16(i<<huffmanValueShift | n)
- reverse := int(bits.Reverse16(uint16(code)))
- reverse >>= uint(16 - n)
- if n <= huffmanChunkBits {
- for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
- // We should never need to overwrite
- // an existing chunk. Also, 0 is
- // never a valid chunk, because the
- // lower 4 "count" bits should be
- // between 1 and 15.
- if sanity && h.chunks[off] != 0 {
- panic("impossible: overwriting existing chunk")
- }
- h.chunks[off] = chunk
- }
- } else {
- j := reverse & (huffmanNumChunks - 1)
- if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
- // Longer codes should have been
- // associated with a link table above.
- panic("impossible: not an indirect chunk")
- }
- value := h.chunks[j] >> huffmanValueShift
- linktab := h.links[value]
- reverse >>= huffmanChunkBits
- for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
- if sanity && linktab[off] != 0 {
- panic("impossible: overwriting existing chunk")
- }
- linktab[off] = chunk
- }
- }
- }
-
- if sanity {
- // Above we've sanity checked that we never overwrote
- // an existing entry. Here we additionally check that
- // we filled the tables completely.
- for i, chunk := range h.chunks {
- if chunk == 0 {
- // As an exception, in the degenerate
- // single-code case, we allow odd
- // chunks to be missing.
- if code == 1 && i%2 == 1 {
- continue
- }
- panic("impossible: missing chunk")
- }
- }
- for _, linktab := range h.links {
- for _, chunk := range linktab {
- if chunk == 0 {
- panic("impossible: missing chunk")
- }
- }
- }
- }
-
- return true
- }
-
- // The actual read interface needed by NewReader.
- // If the passed in io.Reader does not also have ReadByte,
- // the NewReader will introduce its own buffering.
- type Reader interface {
- io.Reader
- io.ByteReader
- }
-
- // Decompress state.
- type decompressor struct {
- // Input source.
- r Reader
- roffset int64
-
- // Input bits, in top of b.
- b uint32
- nb uint
-
- // Huffman decoders for literal/length, distance.
- h1, h2 huffmanDecoder
-
- // Length arrays used to define Huffman codes.
- bits *[maxNumLit + maxNumDist]int
- codebits *[numCodes]int
-
- // Output history, buffer.
- dict dictDecoder
-
- // Temporary buffer (avoids repeated allocation).
- buf [4]byte
-
- // Next step in the decompression,
- // and decompression state.
- step func(*decompressor)
- stepState int
- final bool
- err error
- toRead []byte
- hl, hd *huffmanDecoder
- copyLen int
- copyDist int
- }
-
- func (f *decompressor) nextBlock() {
- for f.nb < 1+2 {
- if f.err = f.moreBits(); f.err != nil {
- return
- }
- }
- f.final = f.b&1 == 1
- f.b >>= 1
- typ := f.b & 3
- f.b >>= 2
- f.nb -= 1 + 2
- switch typ {
- case 0:
- f.dataBlock()
- case 1:
- // compressed, fixed Huffman tables
- f.hl = &fixedHuffmanDecoder
- f.hd = nil
- f.huffmanBlockDecoder()()
- case 2:
- // compressed, dynamic Huffman tables
- if f.err = f.readHuffman(); f.err != nil {
- break
- }
- f.hl = &f.h1
- f.hd = &f.h2
- f.huffmanBlockDecoder()()
- default:
- // 3 is reserved.
- if debugDecode {
- fmt.Println("reserved data block encountered")
- }
- f.err = CorruptInputError(f.roffset)
- }
- }
-
- func (f *decompressor) Read(b []byte) (int, error) {
- for {
- if len(f.toRead) > 0 {
- n := copy(b, f.toRead)
- f.toRead = f.toRead[n:]
- if len(f.toRead) == 0 {
- return n, f.err
- }
- return n, nil
- }
- if f.err != nil {
- return 0, f.err
- }
- f.step(f)
- if f.err != nil && len(f.toRead) == 0 {
- f.toRead = f.dict.readFlush() // Flush what's left in case of error
- }
- }
- }
-
- // Support the io.WriteTo interface for io.Copy and friends.
- func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
- total := int64(0)
- flushed := false
- for {
- if len(f.toRead) > 0 {
- n, err := w.Write(f.toRead)
- total += int64(n)
- if err != nil {
- f.err = err
- return total, err
- }
- if n != len(f.toRead) {
- return total, io.ErrShortWrite
- }
- f.toRead = f.toRead[:0]
- }
- if f.err != nil && flushed {
- if f.err == io.EOF {
- return total, nil
- }
- return total, f.err
- }
- if f.err == nil {
- f.step(f)
- }
- if len(f.toRead) == 0 && f.err != nil && !flushed {
- f.toRead = f.dict.readFlush() // Flush what's left in case of error
- flushed = true
- }
- }
- }
-
- func (f *decompressor) Close() error {
- if f.err == io.EOF {
- return nil
- }
- return f.err
- }
-
- // RFC 1951 section 3.2.7.
- // Compression with dynamic Huffman codes
-
- var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
-
- func (f *decompressor) readHuffman() error {
- // HLIT[5], HDIST[5], HCLEN[4].
- for f.nb < 5+5+4 {
- if err := f.moreBits(); err != nil {
- return err
- }
- }
- nlit := int(f.b&0x1F) + 257
- if nlit > maxNumLit {
- if debugDecode {
- fmt.Println("nlit > maxNumLit", nlit)
- }
- return CorruptInputError(f.roffset)
- }
- f.b >>= 5
- ndist := int(f.b&0x1F) + 1
- if ndist > maxNumDist {
- if debugDecode {
- fmt.Println("ndist > maxNumDist", ndist)
- }
- return CorruptInputError(f.roffset)
- }
- f.b >>= 5
- nclen := int(f.b&0xF) + 4
- // numCodes is 19, so nclen is always valid.
- f.b >>= 4
- f.nb -= 5 + 5 + 4
-
- // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
- for i := 0; i < nclen; i++ {
- for f.nb < 3 {
- if err := f.moreBits(); err != nil {
- return err
- }
- }
- f.codebits[codeOrder[i]] = int(f.b & 0x7)
- f.b >>= 3
- f.nb -= 3
- }
- for i := nclen; i < len(codeOrder); i++ {
- f.codebits[codeOrder[i]] = 0
- }
- if !f.h1.init(f.codebits[0:]) {
- if debugDecode {
- fmt.Println("init codebits failed")
- }
- return CorruptInputError(f.roffset)
- }
-
- // HLIT + 257 code lengths, HDIST + 1 code lengths,
- // using the code length Huffman code.
- for i, n := 0, nlit+ndist; i < n; {
- x, err := f.huffSym(&f.h1)
- if err != nil {
- return err
- }
- if x < 16 {
- // Actual length.
- f.bits[i] = x
- i++
- continue
- }
- // Repeat previous length or zero.
- var rep int
- var nb uint
- var b int
- switch x {
- default:
- return InternalError("unexpected length code")
- case 16:
- rep = 3
- nb = 2
- if i == 0 {
- if debugDecode {
- fmt.Println("i==0")
- }
- return CorruptInputError(f.roffset)
- }
- b = f.bits[i-1]
- case 17:
- rep = 3
- nb = 3
- b = 0
- case 18:
- rep = 11
- nb = 7
- b = 0
- }
- for f.nb < nb {
- if err := f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits:", err)
- }
- return err
- }
- }
- rep += int(f.b & uint32(1<<nb-1))
- f.b >>= nb
- f.nb -= nb
- if i+rep > n {
- if debugDecode {
- fmt.Println("i+rep > n", i, rep, n)
- }
- return CorruptInputError(f.roffset)
- }
- for j := 0; j < rep; j++ {
- f.bits[i] = b
- i++
- }
- }
-
- if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
- if debugDecode {
- fmt.Println("init2 failed")
- }
- return CorruptInputError(f.roffset)
- }
-
- // As an optimization, we can initialize the maxRead bits to read at a time
- // for the HLIT tree to the length of the EOB marker since we know that
- // every block must terminate with one. This preserves the property that
- // we never read any extra bytes after the end of the DEFLATE stream.
- if f.h1.maxRead < f.bits[endBlockMarker] {
- f.h1.maxRead = f.bits[endBlockMarker]
- }
- if !f.final {
- // If not the final block, the smallest block possible is
- // a predefined table, BTYPE=01, with a single EOB marker.
- // This will take up 3 + 7 bits.
- f.h1.maxRead += 10
- }
-
- return nil
- }
-
- // Decode a single Huffman block from f.
- // hl and hd are the Huffman states for the lit/length values
- // and the distance values, respectively. If hd == nil, using the
- // fixed distance encoding associated with fixed Huffman blocks.
- func (f *decompressor) huffmanBlockGeneric() {
- const (
- stateInit = iota // Zero value must be stateInit
- stateDict
- )
-
- switch f.stepState {
- case stateInit:
- goto readLiteral
- case stateDict:
- goto copyHistory
- }
-
- readLiteral:
- // Read literal and/or (length, distance) according to RFC section 3.2.3.
- {
- var v int
- {
- // Inlined v, err := f.huffSym(f.hl)
- // Since a huffmanDecoder can be empty or be composed of a degenerate tree
- // with single element, huffSym must error on these two edge cases. In both
- // cases, the chunks slice will be 0 for the invalid sequence, leading it
- // satisfy the n == 0 check below.
- n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
- for {
- for nb < n {
- c, err := f.r.ReadByte()
- if err != nil {
- f.b = b
- f.nb = nb
- f.err = noEOF(err)
- return
- }
- f.roffset++
- b |= uint32(c) << (nb & 31)
- nb += 8
- }
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
- n = uint(chunk & huffmanCountMask)
- if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
- n = uint(chunk & huffmanCountMask)
- }
- if n <= nb {
- if n == 0 {
- f.b = b
- f.nb = nb
- if debugDecode {
- fmt.Println("huffsym: n==0")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- f.b = b >> (n & 31)
- f.nb = nb - n
- v = int(chunk >> huffmanValueShift)
- break
- }
- }
- }
-
- var n uint // number of bits extra
- var length int
- var err error
- switch {
- case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanBlockGeneric
- f.stepState = stateInit
- return
- }
- goto readLiteral
- case v == 256:
- f.finishBlock()
- return
- // otherwise, reference to older data
- case v < 265:
- length = v - (257 - 3)
- n = 0
- case v < 269:
- length = v*2 - (265*2 - 11)
- n = 1
- case v < 273:
- length = v*4 - (269*4 - 19)
- n = 2
- case v < 277:
- length = v*8 - (273*8 - 35)
- n = 3
- case v < 281:
- length = v*16 - (277*16 - 67)
- n = 4
- case v < 285:
- length = v*32 - (281*32 - 131)
- n = 5
- case v < maxNumLit:
- length = 258
- n = 0
- default:
- if debugDecode {
- fmt.Println(v, ">= maxNumLit")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- if n > 0 {
- for f.nb < n {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits n>0:", err)
- }
- f.err = err
- return
- }
- }
- length += int(f.b & uint32(1<<n-1))
- f.b >>= n
- f.nb -= n
- }
-
- var dist int
- if f.hd == nil {
- for f.nb < 5 {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits f.nb<5:", err)
- }
- f.err = err
- return
- }
- }
- dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
- } else {
- if dist, err = f.huffSym(f.hd); err != nil {
- if debugDecode {
- fmt.Println("huffsym:", err)
- }
- f.err = err
- return
- }
- }
-
- switch {
- case dist < 4:
- dist++
- case dist < maxNumDist:
- nb := uint(dist-2) >> 1
- // have 1 bit in bottom of dist, need nb more.
- extra := (dist & 1) << nb
- for f.nb < nb {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits f.nb<nb:", err)
- }
- f.err = err
- return
- }
- }
- extra |= int(f.b & uint32(1<<nb-1))
- f.b >>= nb
- f.nb -= nb
- dist = 1<<(nb+1) + 1 + extra
- default:
- if debugDecode {
- fmt.Println("dist too big:", dist, maxNumDist)
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- // No check on length; encoding can be prescient.
- if dist > f.dict.histSize() {
- if debugDecode {
- fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- f.copyLen, f.copyDist = length, dist
- goto copyHistory
- }
-
- copyHistory:
- // Perform a backwards copy according to RFC section 3.2.3.
- {
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
- if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
- }
- f.copyLen -= cnt
-
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work
- f.stepState = stateDict
- return
- }
- goto readLiteral
- }
- }
-
- // Copy a single uncompressed data block from input to output.
- func (f *decompressor) dataBlock() {
- // Uncompressed.
- // Discard current half-byte.
- left := (f.nb) & 7
- f.nb -= left
- f.b >>= left
-
- offBytes := f.nb >> 3
- // Unfilled values will be overwritten.
- f.buf[0] = uint8(f.b)
- f.buf[1] = uint8(f.b >> 8)
- f.buf[2] = uint8(f.b >> 16)
- f.buf[3] = uint8(f.b >> 24)
-
- f.roffset += int64(offBytes)
- f.nb, f.b = 0, 0
-
- // Length then ones-complement of length.
- nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
- f.roffset += int64(nr)
- if err != nil {
- f.err = noEOF(err)
- return
- }
- n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
- nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
- if nn != ^n {
- if debugDecode {
- ncomp := ^n
- fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- if n == 0 {
- f.toRead = f.dict.readFlush()
- f.finishBlock()
- return
- }
-
- f.copyLen = int(n)
- f.copyData()
- }
-
- // copyData copies f.copyLen bytes from the underlying reader into f.hist.
- // It pauses for reads when f.hist is full.
- func (f *decompressor) copyData() {
- buf := f.dict.writeSlice()
- if len(buf) > f.copyLen {
- buf = buf[:f.copyLen]
- }
-
- cnt, err := io.ReadFull(f.r, buf)
- f.roffset += int64(cnt)
- f.copyLen -= cnt
- f.dict.writeMark(cnt)
- if err != nil {
- f.err = noEOF(err)
- return
- }
-
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).copyData
- return
- }
- f.finishBlock()
- }
-
- func (f *decompressor) finishBlock() {
- if f.final {
- if f.dict.availRead() > 0 {
- f.toRead = f.dict.readFlush()
- }
- f.err = io.EOF
- }
- f.step = (*decompressor).nextBlock
- }
-
- // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
- func noEOF(e error) error {
- if e == io.EOF {
- return io.ErrUnexpectedEOF
- }
- return e
- }
-
- func (f *decompressor) moreBits() error {
- c, err := f.r.ReadByte()
- if err != nil {
- return noEOF(err)
- }
- f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
- return nil
- }
-
- // Read the next Huffman-encoded symbol from f according to h.
- func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
- // Since a huffmanDecoder can be empty or be composed of a degenerate tree
- // with single element, huffSym must error on these two edge cases. In both
- // cases, the chunks slice will be 0 for the invalid sequence, leading it
- // satisfy the n == 0 check below.
- n := uint(h.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
- for {
- for nb < n {
- c, err := f.r.ReadByte()
- if err != nil {
- f.b = b
- f.nb = nb
- return 0, noEOF(err)
- }
- f.roffset++
- b |= uint32(c) << (nb & 31)
- nb += 8
- }
- chunk := h.chunks[b&(huffmanNumChunks-1)]
- n = uint(chunk & huffmanCountMask)
- if n > huffmanChunkBits {
- chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
- n = uint(chunk & huffmanCountMask)
- }
- if n <= nb {
- if n == 0 {
- f.b = b
- f.nb = nb
- if debugDecode {
- fmt.Println("huffsym: n==0")
- }
- f.err = CorruptInputError(f.roffset)
- return 0, f.err
- }
- f.b = b >> (n & 31)
- f.nb = nb - n
- return int(chunk >> huffmanValueShift), nil
- }
- }
- }
-
- func makeReader(r io.Reader) Reader {
- if rr, ok := r.(Reader); ok {
- return rr
- }
- return bufio.NewReader(r)
- }
-
- func fixedHuffmanDecoderInit() {
- fixedOnce.Do(func() {
- // These come from the RFC section 3.2.6.
- var bits [288]int
- for i := 0; i < 144; i++ {
- bits[i] = 8
- }
- for i := 144; i < 256; i++ {
- bits[i] = 9
- }
- for i := 256; i < 280; i++ {
- bits[i] = 7
- }
- for i := 280; i < 288; i++ {
- bits[i] = 8
- }
- fixedHuffmanDecoder.init(bits[:])
- })
- }
-
- func (f *decompressor) Reset(r io.Reader, dict []byte) error {
- *f = decompressor{
- r: makeReader(r),
- bits: f.bits,
- codebits: f.codebits,
- h1: f.h1,
- h2: f.h2,
- dict: f.dict,
- step: (*decompressor).nextBlock,
- }
- f.dict.init(maxMatchOffset, dict)
- return nil
- }
-
- // NewReader returns a new ReadCloser that can be used
- // to read the uncompressed version of r.
- // If r does not also implement io.ByteReader,
- // the decompressor may read more data than necessary from r.
- // It is the caller's responsibility to call Close on the ReadCloser
- // when finished reading.
- //
- // The ReadCloser returned by NewReader also implements Resetter.
- func NewReader(r io.Reader) io.ReadCloser {
- fixedHuffmanDecoderInit()
-
- var f decompressor
- f.r = makeReader(r)
- f.bits = new([maxNumLit + maxNumDist]int)
- f.codebits = new([numCodes]int)
- f.step = (*decompressor).nextBlock
- f.dict.init(maxMatchOffset, nil)
- return &f
- }
-
- // NewReaderDict is like NewReader but initializes the reader
- // with a preset dictionary. The returned Reader behaves as if
- // the uncompressed data stream started with the given dictionary,
- // which has already been read. NewReaderDict is typically used
- // to read data compressed by NewWriterDict.
- //
- // The ReadCloser returned by NewReader also implements Resetter.
- func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
- fixedHuffmanDecoderInit()
-
- var f decompressor
- f.r = makeReader(r)
- f.bits = new([maxNumLit + maxNumDist]int)
- f.codebits = new([numCodes]int)
- f.step = (*decompressor).nextBlock
- f.dict.init(maxMatchOffset, dict)
- return &f
- }
|