Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

inflate.go 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package flate implements the DEFLATE compressed data format, described in
  5. // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
  6. // formats.
  7. package flate
  8. import (
  9. "bufio"
  10. "io"
  11. "strconv"
  12. "sync"
  13. )
  14. const (
  15. maxCodeLen = 16 // max length of Huffman code
  16. // The next three numbers come from the RFC section 3.2.7, with the
  17. // additional proviso in section 3.2.5 which implies that distance codes
  18. // 30 and 31 should never occur in compressed data.
  19. maxNumLit = 286
  20. maxNumDist = 30
  21. numCodes = 19 // number of codes in Huffman meta-code
  22. )
  23. // Initialize the fixedHuffmanDecoder only once upon first use.
  24. var fixedOnce sync.Once
  25. var fixedHuffmanDecoder huffmanDecoder
  26. // A CorruptInputError reports the presence of corrupt input at a given offset.
  27. type CorruptInputError int64
  28. func (e CorruptInputError) Error() string {
  29. return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
  30. }
  31. // An InternalError reports an error in the flate code itself.
  32. type InternalError string
  33. func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
  34. // A ReadError reports an error encountered while reading input.
  35. //
  36. // Deprecated: No longer returned.
  37. type ReadError struct {
  38. Offset int64 // byte offset where error occurred
  39. Err error // error returned by underlying Read
  40. }
  41. func (e *ReadError) Error() string {
  42. return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  43. }
  44. // A WriteError reports an error encountered while writing output.
  45. //
  46. // Deprecated: No longer returned.
  47. type WriteError struct {
  48. Offset int64 // byte offset where error occurred
  49. Err error // error returned by underlying Write
  50. }
  51. func (e *WriteError) Error() string {
  52. return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  53. }
  54. // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
  55. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  56. // instead of allocating a new one.
  57. type Resetter interface {
  58. // Reset discards any buffered data and resets the Resetter as if it was
  59. // newly initialized with the given reader.
  60. Reset(r io.Reader, dict []byte) error
  61. }
  62. // The data structure for decoding Huffman tables is based on that of
  63. // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
  64. // For codes smaller than the table width, there are multiple entries
  65. // (each combination of trailing bits has the same value). For codes
  66. // larger than the table width, the table contains a link to an overflow
  67. // table. The width of each entry in the link table is the maximum code
  68. // size minus the chunk width.
  69. //
  70. // Note that you can do a lookup in the table even without all bits
  71. // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
  72. // have the property that shorter codes come before longer ones, the
  73. // bit length estimate in the result is a lower bound on the actual
  74. // number of bits.
  75. //
  76. // See the following:
  77. // http://www.gzip.org/algorithm.txt
  78. // chunk & 15 is number of bits
  79. // chunk >> 4 is value, including table link
  80. const (
  81. huffmanChunkBits = 9
  82. huffmanNumChunks = 1 << huffmanChunkBits
  83. huffmanCountMask = 15
  84. huffmanValueShift = 4
  85. )
  86. type huffmanDecoder struct {
  87. min int // the minimum code length
  88. chunks [huffmanNumChunks]uint32 // chunks as described above
  89. links [][]uint32 // overflow links
  90. linkMask uint32 // mask the width of the link table
  91. }
  92. // Initialize Huffman decoding tables from array of code lengths.
  93. // Following this function, h is guaranteed to be initialized into a complete
  94. // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
  95. // degenerate case where the tree has only a single symbol with length 1. Empty
  96. // trees are permitted.
  97. func (h *huffmanDecoder) init(bits []int) bool {
  98. // Sanity enables additional runtime tests during Huffman
  99. // table construction. It's intended to be used during
  100. // development to supplement the currently ad-hoc unit tests.
  101. const sanity = false
  102. if h.min != 0 {
  103. *h = huffmanDecoder{}
  104. }
  105. // Count number of codes of each length,
  106. // compute min and max length.
  107. var count [maxCodeLen]int
  108. var min, max int
  109. for _, n := range bits {
  110. if n == 0 {
  111. continue
  112. }
  113. if min == 0 || n < min {
  114. min = n
  115. }
  116. if n > max {
  117. max = n
  118. }
  119. count[n]++
  120. }
  121. // Empty tree. The decompressor.huffSym function will fail later if the tree
  122. // is used. Technically, an empty tree is only valid for the HDIST tree and
  123. // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
  124. // is guaranteed to fail since it will attempt to use the tree to decode the
  125. // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
  126. // guaranteed to fail later since the compressed data section must be
  127. // composed of at least one symbol (the end-of-block marker).
  128. if max == 0 {
  129. return true
  130. }
  131. code := 0
  132. var nextcode [maxCodeLen]int
  133. for i := min; i <= max; i++ {
  134. code <<= 1
  135. nextcode[i] = code
  136. code += count[i]
  137. }
  138. // Check that the coding is complete (i.e., that we've
  139. // assigned all 2-to-the-max possible bit sequences).
  140. // Exception: To be compatible with zlib, we also need to
  141. // accept degenerate single-code codings. See also
  142. // TestDegenerateHuffmanCoding.
  143. if code != 1<<uint(max) && !(code == 1 && max == 1) {
  144. return false
  145. }
  146. h.min = min
  147. if max > huffmanChunkBits {
  148. numLinks := 1 << (uint(max) - huffmanChunkBits)
  149. h.linkMask = uint32(numLinks - 1)
  150. // create link tables
  151. link := nextcode[huffmanChunkBits+1] >> 1
  152. h.links = make([][]uint32, huffmanNumChunks-link)
  153. for j := uint(link); j < huffmanNumChunks; j++ {
  154. reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
  155. reverse >>= uint(16 - huffmanChunkBits)
  156. off := j - uint(link)
  157. if sanity && h.chunks[reverse] != 0 {
  158. panic("impossible: overwriting existing chunk")
  159. }
  160. h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
  161. h.links[off] = make([]uint32, numLinks)
  162. }
  163. }
  164. for i, n := range bits {
  165. if n == 0 {
  166. continue
  167. }
  168. code := nextcode[n]
  169. nextcode[n]++
  170. chunk := uint32(i<<huffmanValueShift | n)
  171. reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
  172. reverse >>= uint(16 - n)
  173. if n <= huffmanChunkBits {
  174. for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
  175. // We should never need to overwrite
  176. // an existing chunk. Also, 0 is
  177. // never a valid chunk, because the
  178. // lower 4 "count" bits should be
  179. // between 1 and 15.
  180. if sanity && h.chunks[off] != 0 {
  181. panic("impossible: overwriting existing chunk")
  182. }
  183. h.chunks[off] = chunk
  184. }
  185. } else {
  186. j := reverse & (huffmanNumChunks - 1)
  187. if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
  188. // Longer codes should have been
  189. // associated with a link table above.
  190. panic("impossible: not an indirect chunk")
  191. }
  192. value := h.chunks[j] >> huffmanValueShift
  193. linktab := h.links[value]
  194. reverse >>= huffmanChunkBits
  195. for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
  196. if sanity && linktab[off] != 0 {
  197. panic("impossible: overwriting existing chunk")
  198. }
  199. linktab[off] = chunk
  200. }
  201. }
  202. }
  203. if sanity {
  204. // Above we've sanity checked that we never overwrote
  205. // an existing entry. Here we additionally check that
  206. // we filled the tables completely.
  207. for i, chunk := range h.chunks {
  208. if chunk == 0 {
  209. // As an exception, in the degenerate
  210. // single-code case, we allow odd
  211. // chunks to be missing.
  212. if code == 1 && i%2 == 1 {
  213. continue
  214. }
  215. panic("impossible: missing chunk")
  216. }
  217. }
  218. for _, linktab := range h.links {
  219. for _, chunk := range linktab {
  220. if chunk == 0 {
  221. panic("impossible: missing chunk")
  222. }
  223. }
  224. }
  225. }
  226. return true
  227. }
  228. // The actual read interface needed by NewReader.
  229. // If the passed in io.Reader does not also have ReadByte,
  230. // the NewReader will introduce its own buffering.
  231. type Reader interface {
  232. io.Reader
  233. io.ByteReader
  234. }
  235. // Decompress state.
  236. type decompressor struct {
  237. // Input source.
  238. r Reader
  239. roffset int64
  240. // Input bits, in top of b.
  241. b uint32
  242. nb uint
  243. // Huffman decoders for literal/length, distance.
  244. h1, h2 huffmanDecoder
  245. // Length arrays used to define Huffman codes.
  246. bits *[maxNumLit + maxNumDist]int
  247. codebits *[numCodes]int
  248. // Output history, buffer.
  249. dict dictDecoder
  250. // Temporary buffer (avoids repeated allocation).
  251. buf [4]byte
  252. // Next step in the decompression,
  253. // and decompression state.
  254. step func(*decompressor)
  255. stepState int
  256. final bool
  257. err error
  258. toRead []byte
  259. hl, hd *huffmanDecoder
  260. copyLen int
  261. copyDist int
  262. }
  263. func (f *decompressor) nextBlock() {
  264. for f.nb < 1+2 {
  265. if f.err = f.moreBits(); f.err != nil {
  266. return
  267. }
  268. }
  269. f.final = f.b&1 == 1
  270. f.b >>= 1
  271. typ := f.b & 3
  272. f.b >>= 2
  273. f.nb -= 1 + 2
  274. switch typ {
  275. case 0:
  276. f.dataBlock()
  277. case 1:
  278. // compressed, fixed Huffman tables
  279. f.hl = &fixedHuffmanDecoder
  280. f.hd = nil
  281. f.huffmanBlock()
  282. case 2:
  283. // compressed, dynamic Huffman tables
  284. if f.err = f.readHuffman(); f.err != nil {
  285. break
  286. }
  287. f.hl = &f.h1
  288. f.hd = &f.h2
  289. f.huffmanBlock()
  290. default:
  291. // 3 is reserved.
  292. f.err = CorruptInputError(f.roffset)
  293. }
  294. }
  295. func (f *decompressor) Read(b []byte) (int, error) {
  296. for {
  297. if len(f.toRead) > 0 {
  298. n := copy(b, f.toRead)
  299. f.toRead = f.toRead[n:]
  300. if len(f.toRead) == 0 {
  301. return n, f.err
  302. }
  303. return n, nil
  304. }
  305. if f.err != nil {
  306. return 0, f.err
  307. }
  308. f.step(f)
  309. if f.err != nil && len(f.toRead) == 0 {
  310. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  311. }
  312. }
  313. }
  314. // Support the io.WriteTo interface for io.Copy and friends.
  315. func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
  316. total := int64(0)
  317. flushed := false
  318. for {
  319. if len(f.toRead) > 0 {
  320. n, err := w.Write(f.toRead)
  321. total += int64(n)
  322. if err != nil {
  323. f.err = err
  324. return total, err
  325. }
  326. if n != len(f.toRead) {
  327. return total, io.ErrShortWrite
  328. }
  329. f.toRead = f.toRead[:0]
  330. }
  331. if f.err != nil && flushed {
  332. if f.err == io.EOF {
  333. return total, nil
  334. }
  335. return total, f.err
  336. }
  337. if f.err == nil {
  338. f.step(f)
  339. }
  340. if len(f.toRead) == 0 && f.err != nil && !flushed {
  341. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  342. flushed = true
  343. }
  344. }
  345. }
  346. func (f *decompressor) Close() error {
  347. if f.err == io.EOF {
  348. return nil
  349. }
  350. return f.err
  351. }
  352. // RFC 1951 section 3.2.7.
  353. // Compression with dynamic Huffman codes
  354. var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  355. func (f *decompressor) readHuffman() error {
  356. // HLIT[5], HDIST[5], HCLEN[4].
  357. for f.nb < 5+5+4 {
  358. if err := f.moreBits(); err != nil {
  359. return err
  360. }
  361. }
  362. nlit := int(f.b&0x1F) + 257
  363. if nlit > maxNumLit {
  364. return CorruptInputError(f.roffset)
  365. }
  366. f.b >>= 5
  367. ndist := int(f.b&0x1F) + 1
  368. if ndist > maxNumDist {
  369. return CorruptInputError(f.roffset)
  370. }
  371. f.b >>= 5
  372. nclen := int(f.b&0xF) + 4
  373. // numCodes is 19, so nclen is always valid.
  374. f.b >>= 4
  375. f.nb -= 5 + 5 + 4
  376. // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
  377. for i := 0; i < nclen; i++ {
  378. for f.nb < 3 {
  379. if err := f.moreBits(); err != nil {
  380. return err
  381. }
  382. }
  383. f.codebits[codeOrder[i]] = int(f.b & 0x7)
  384. f.b >>= 3
  385. f.nb -= 3
  386. }
  387. for i := nclen; i < len(codeOrder); i++ {
  388. f.codebits[codeOrder[i]] = 0
  389. }
  390. if !f.h1.init(f.codebits[0:]) {
  391. return CorruptInputError(f.roffset)
  392. }
  393. // HLIT + 257 code lengths, HDIST + 1 code lengths,
  394. // using the code length Huffman code.
  395. for i, n := 0, nlit+ndist; i < n; {
  396. x, err := f.huffSym(&f.h1)
  397. if err != nil {
  398. return err
  399. }
  400. if x < 16 {
  401. // Actual length.
  402. f.bits[i] = x
  403. i++
  404. continue
  405. }
  406. // Repeat previous length or zero.
  407. var rep int
  408. var nb uint
  409. var b int
  410. switch x {
  411. default:
  412. return InternalError("unexpected length code")
  413. case 16:
  414. rep = 3
  415. nb = 2
  416. if i == 0 {
  417. return CorruptInputError(f.roffset)
  418. }
  419. b = f.bits[i-1]
  420. case 17:
  421. rep = 3
  422. nb = 3
  423. b = 0
  424. case 18:
  425. rep = 11
  426. nb = 7
  427. b = 0
  428. }
  429. for f.nb < nb {
  430. if err := f.moreBits(); err != nil {
  431. return err
  432. }
  433. }
  434. rep += int(f.b & uint32(1<<nb-1))
  435. f.b >>= nb
  436. f.nb -= nb
  437. if i+rep > n {
  438. return CorruptInputError(f.roffset)
  439. }
  440. for j := 0; j < rep; j++ {
  441. f.bits[i] = b
  442. i++
  443. }
  444. }
  445. if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
  446. return CorruptInputError(f.roffset)
  447. }
  448. // As an optimization, we can initialize the min bits to read at a time
  449. // for the HLIT tree to the length of the EOB marker since we know that
  450. // every block must terminate with one. This preserves the property that
  451. // we never read any extra bytes after the end of the DEFLATE stream.
  452. if f.h1.min < f.bits[endBlockMarker] {
  453. f.h1.min = f.bits[endBlockMarker]
  454. }
  455. return nil
  456. }
  457. // Decode a single Huffman block from f.
  458. // hl and hd are the Huffman states for the lit/length values
  459. // and the distance values, respectively. If hd == nil, using the
  460. // fixed distance encoding associated with fixed Huffman blocks.
  461. func (f *decompressor) huffmanBlock() {
  462. const (
  463. stateInit = iota // Zero value must be stateInit
  464. stateDict
  465. )
  466. switch f.stepState {
  467. case stateInit:
  468. goto readLiteral
  469. case stateDict:
  470. goto copyHistory
  471. }
  472. readLiteral:
  473. // Read literal and/or (length, distance) according to RFC section 3.2.3.
  474. {
  475. v, err := f.huffSym(f.hl)
  476. if err != nil {
  477. f.err = err
  478. return
  479. }
  480. var n uint // number of bits extra
  481. var length int
  482. switch {
  483. case v < 256:
  484. f.dict.writeByte(byte(v))
  485. if f.dict.availWrite() == 0 {
  486. f.toRead = f.dict.readFlush()
  487. f.step = (*decompressor).huffmanBlock
  488. f.stepState = stateInit
  489. return
  490. }
  491. goto readLiteral
  492. case v == 256:
  493. f.finishBlock()
  494. return
  495. // otherwise, reference to older data
  496. case v < 265:
  497. length = v - (257 - 3)
  498. n = 0
  499. case v < 269:
  500. length = v*2 - (265*2 - 11)
  501. n = 1
  502. case v < 273:
  503. length = v*4 - (269*4 - 19)
  504. n = 2
  505. case v < 277:
  506. length = v*8 - (273*8 - 35)
  507. n = 3
  508. case v < 281:
  509. length = v*16 - (277*16 - 67)
  510. n = 4
  511. case v < 285:
  512. length = v*32 - (281*32 - 131)
  513. n = 5
  514. case v < maxNumLit:
  515. length = 258
  516. n = 0
  517. default:
  518. f.err = CorruptInputError(f.roffset)
  519. return
  520. }
  521. if n > 0 {
  522. for f.nb < n {
  523. if err = f.moreBits(); err != nil {
  524. f.err = err
  525. return
  526. }
  527. }
  528. length += int(f.b & uint32(1<<n-1))
  529. f.b >>= n
  530. f.nb -= n
  531. }
  532. var dist int
  533. if f.hd == nil {
  534. for f.nb < 5 {
  535. if err = f.moreBits(); err != nil {
  536. f.err = err
  537. return
  538. }
  539. }
  540. dist = int(reverseByte[(f.b&0x1F)<<3])
  541. f.b >>= 5
  542. f.nb -= 5
  543. } else {
  544. if dist, err = f.huffSym(f.hd); err != nil {
  545. f.err = err
  546. return
  547. }
  548. }
  549. switch {
  550. case dist < 4:
  551. dist++
  552. case dist < maxNumDist:
  553. nb := uint(dist-2) >> 1
  554. // have 1 bit in bottom of dist, need nb more.
  555. extra := (dist & 1) << nb
  556. for f.nb < nb {
  557. if err = f.moreBits(); err != nil {
  558. f.err = err
  559. return
  560. }
  561. }
  562. extra |= int(f.b & uint32(1<<nb-1))
  563. f.b >>= nb
  564. f.nb -= nb
  565. dist = 1<<(nb+1) + 1 + extra
  566. default:
  567. f.err = CorruptInputError(f.roffset)
  568. return
  569. }
  570. // No check on length; encoding can be prescient.
  571. if dist > f.dict.histSize() {
  572. f.err = CorruptInputError(f.roffset)
  573. return
  574. }
  575. f.copyLen, f.copyDist = length, dist
  576. goto copyHistory
  577. }
  578. copyHistory:
  579. // Perform a backwards copy according to RFC section 3.2.3.
  580. {
  581. cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
  582. if cnt == 0 {
  583. cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
  584. }
  585. f.copyLen -= cnt
  586. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  587. f.toRead = f.dict.readFlush()
  588. f.step = (*decompressor).huffmanBlock // We need to continue this work
  589. f.stepState = stateDict
  590. return
  591. }
  592. goto readLiteral
  593. }
  594. }
  595. // Copy a single uncompressed data block from input to output.
  596. func (f *decompressor) dataBlock() {
  597. // Uncompressed.
  598. // Discard current half-byte.
  599. f.nb = 0
  600. f.b = 0
  601. // Length then ones-complement of length.
  602. nr, err := io.ReadFull(f.r, f.buf[0:4])
  603. f.roffset += int64(nr)
  604. if err != nil {
  605. if err == io.EOF {
  606. err = io.ErrUnexpectedEOF
  607. }
  608. f.err = err
  609. return
  610. }
  611. n := int(f.buf[0]) | int(f.buf[1])<<8
  612. nn := int(f.buf[2]) | int(f.buf[3])<<8
  613. if uint16(nn) != uint16(^n) {
  614. f.err = CorruptInputError(f.roffset)
  615. return
  616. }
  617. if n == 0 {
  618. f.toRead = f.dict.readFlush()
  619. f.finishBlock()
  620. return
  621. }
  622. f.copyLen = n
  623. f.copyData()
  624. }
  625. // copyData copies f.copyLen bytes from the underlying reader into f.hist.
  626. // It pauses for reads when f.hist is full.
  627. func (f *decompressor) copyData() {
  628. buf := f.dict.writeSlice()
  629. if len(buf) > f.copyLen {
  630. buf = buf[:f.copyLen]
  631. }
  632. cnt, err := io.ReadFull(f.r, buf)
  633. f.roffset += int64(cnt)
  634. f.copyLen -= cnt
  635. f.dict.writeMark(cnt)
  636. if err != nil {
  637. if err == io.EOF {
  638. err = io.ErrUnexpectedEOF
  639. }
  640. f.err = err
  641. return
  642. }
  643. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  644. f.toRead = f.dict.readFlush()
  645. f.step = (*decompressor).copyData
  646. return
  647. }
  648. f.finishBlock()
  649. }
  650. func (f *decompressor) finishBlock() {
  651. if f.final {
  652. if f.dict.availRead() > 0 {
  653. f.toRead = f.dict.readFlush()
  654. }
  655. f.err = io.EOF
  656. }
  657. f.step = (*decompressor).nextBlock
  658. }
  659. func (f *decompressor) moreBits() error {
  660. c, err := f.r.ReadByte()
  661. if err != nil {
  662. if err == io.EOF {
  663. err = io.ErrUnexpectedEOF
  664. }
  665. return err
  666. }
  667. f.roffset++
  668. f.b |= uint32(c) << f.nb
  669. f.nb += 8
  670. return nil
  671. }
  672. // Read the next Huffman-encoded symbol from f according to h.
  673. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
  674. // Since a huffmanDecoder can be empty or be composed of a degenerate tree
  675. // with single element, huffSym must error on these two edge cases. In both
  676. // cases, the chunks slice will be 0 for the invalid sequence, leading it
  677. // satisfy the n == 0 check below.
  678. n := uint(h.min)
  679. for {
  680. for f.nb < n {
  681. if err := f.moreBits(); err != nil {
  682. return 0, err
  683. }
  684. }
  685. chunk := h.chunks[f.b&(huffmanNumChunks-1)]
  686. n = uint(chunk & huffmanCountMask)
  687. if n > huffmanChunkBits {
  688. chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
  689. n = uint(chunk & huffmanCountMask)
  690. }
  691. if n <= f.nb {
  692. if n == 0 {
  693. f.err = CorruptInputError(f.roffset)
  694. return 0, f.err
  695. }
  696. f.b >>= n
  697. f.nb -= n
  698. return int(chunk >> huffmanValueShift), nil
  699. }
  700. }
  701. }
  702. func makeReader(r io.Reader) Reader {
  703. if rr, ok := r.(Reader); ok {
  704. return rr
  705. }
  706. return bufio.NewReader(r)
  707. }
  708. func fixedHuffmanDecoderInit() {
  709. fixedOnce.Do(func() {
  710. // These come from the RFC section 3.2.6.
  711. var bits [288]int
  712. for i := 0; i < 144; i++ {
  713. bits[i] = 8
  714. }
  715. for i := 144; i < 256; i++ {
  716. bits[i] = 9
  717. }
  718. for i := 256; i < 280; i++ {
  719. bits[i] = 7
  720. }
  721. for i := 280; i < 288; i++ {
  722. bits[i] = 8
  723. }
  724. fixedHuffmanDecoder.init(bits[:])
  725. })
  726. }
  727. func (f *decompressor) Reset(r io.Reader, dict []byte) error {
  728. *f = decompressor{
  729. r: makeReader(r),
  730. bits: f.bits,
  731. codebits: f.codebits,
  732. dict: f.dict,
  733. step: (*decompressor).nextBlock,
  734. }
  735. f.dict.init(maxMatchOffset, dict)
  736. return nil
  737. }
  738. // NewReader returns a new ReadCloser that can be used
  739. // to read the uncompressed version of r.
  740. // If r does not also implement io.ByteReader,
  741. // the decompressor may read more data than necessary from r.
  742. // It is the caller's responsibility to call Close on the ReadCloser
  743. // when finished reading.
  744. //
  745. // The ReadCloser returned by NewReader also implements Resetter.
  746. func NewReader(r io.Reader) io.ReadCloser {
  747. fixedHuffmanDecoderInit()
  748. var f decompressor
  749. f.r = makeReader(r)
  750. f.bits = new([maxNumLit + maxNumDist]int)
  751. f.codebits = new([numCodes]int)
  752. f.step = (*decompressor).nextBlock
  753. f.dict.init(maxMatchOffset, nil)
  754. return &f
  755. }
  756. // NewReaderDict is like NewReader but initializes the reader
  757. // with a preset dictionary. The returned Reader behaves as if
  758. // the uncompressed data stream started with the given dictionary,
  759. // which has already been read. NewReaderDict is typically used
  760. // to read data compressed by NewWriterDict.
  761. //
  762. // The ReadCloser returned by NewReader also implements Resetter.
  763. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
  764. fixedHuffmanDecoderInit()
  765. var f decompressor
  766. f.r = makeReader(r)
  767. f.bits = new([maxNumLit + maxNumDist]int)
  768. f.codebits = new([numCodes]int)
  769. f.step = (*decompressor).nextBlock
  770. f.dict.init(maxMatchOffset, dict)
  771. return &f
  772. }