You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

reader.go 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. /*
  2. * Package xz Go Reader API
  3. *
  4. * Author: Michael Cross <https://github.com/xi2>
  5. *
  6. * This file has been put into the public domain.
  7. * You can do whatever you want with this file.
  8. */
  9. package xz
  10. import (
  11. "errors"
  12. "io"
  13. )
  14. // Package specific errors.
  15. var (
  16. ErrUnsupportedCheck = errors.New("xz: integrity check type not supported")
  17. ErrMemlimit = errors.New("xz: LZMA2 dictionary size exceeds max")
  18. ErrFormat = errors.New("xz: file format not recognized")
  19. ErrOptions = errors.New("xz: compression options not supported")
  20. ErrData = errors.New("xz: data is corrupt")
  21. ErrBuf = errors.New("xz: data is truncated or corrupt")
  22. )
  23. // DefaultDictMax is the default maximum dictionary size in bytes used
  24. // by the decoder. This value is sufficient to decompress files
  25. // created with XZ Utils "xz -9".
  26. const DefaultDictMax = 1 << 26 // 64 MiB
  27. // inBufSize is the input buffer size used by the decoder.
  28. const inBufSize = 1 << 13 // 8 KiB
  29. // A Reader is an io.Reader that can be used to retrieve uncompressed
  30. // data from an XZ file.
  31. //
  32. // In general, an XZ file can be a concatenation of other XZ
  33. // files. Reads from the Reader return the concatenation of the
  34. // uncompressed data of each.
  35. type Reader struct {
  36. Header
  37. r io.Reader // the wrapped io.Reader
  38. multistream bool // true if reader is in multistream mode
  39. rEOF bool // true after io.EOF received on r
  40. dEOF bool // true after decoder has completed
  41. padding int // bytes of stream padding read (or -1)
  42. in [inBufSize]byte // backing array for buf.in
  43. buf *xzBuf // decoder input/output buffers
  44. dec *xzDec // decoder state
  45. err error // the result of the last decoder call
  46. }
  47. // NewReader creates a new Reader reading from r. The decompressor
  48. // will use an LZMA2 dictionary size up to dictMax bytes in
  49. // size. Passing a value of zero sets dictMax to DefaultDictMax. If
  50. // an individual XZ stream requires a dictionary size greater than
  51. // dictMax in order to decompress, Read will return ErrMemlimit.
  52. //
  53. // If NewReader is passed a value of nil for r then a Reader is
  54. // created such that all read attempts will return io.EOF. This is
  55. // useful if you just want to allocate memory for a Reader which will
  56. // later be initialized with Reset.
  57. //
  58. // Due to internal buffering, the Reader may read more data than
  59. // necessary from r.
  60. func NewReader(r io.Reader, dictMax uint32) (*Reader, error) {
  61. if dictMax == 0 {
  62. dictMax = DefaultDictMax
  63. }
  64. z := &Reader{
  65. r: r,
  66. multistream: true,
  67. padding: -1,
  68. buf: &xzBuf{},
  69. }
  70. if r == nil {
  71. z.rEOF, z.dEOF = true, true
  72. }
  73. z.dec = xzDecInit(dictMax, &z.Header)
  74. var err error
  75. if r != nil {
  76. _, err = z.Read(nil) // read stream header
  77. }
  78. return z, err
  79. }
  80. // decode is a wrapper around xzDecRun that additionally handles
  81. // stream padding. It treats the padding as a kind of stream that
  82. // decodes to nothing.
  83. //
  84. // When decoding padding, z.padding >= 0
  85. // When decoding a real stream, z.padding == -1
  86. func (z *Reader) decode() (ret xzRet) {
  87. if z.padding >= 0 {
  88. // read all padding in input buffer
  89. for z.buf.inPos < len(z.buf.in) &&
  90. z.buf.in[z.buf.inPos] == 0 {
  91. z.buf.inPos++
  92. z.padding++
  93. }
  94. switch {
  95. case z.buf.inPos == len(z.buf.in) && z.rEOF:
  96. // case: out of padding. no more input data available
  97. if z.padding%4 != 0 {
  98. ret = xzDataError
  99. } else {
  100. ret = xzStreamEnd
  101. }
  102. case z.buf.inPos == len(z.buf.in):
  103. // case: read more padding next loop iteration
  104. ret = xzOK
  105. default:
  106. // case: out of padding. more input data available
  107. if z.padding%4 != 0 {
  108. ret = xzDataError
  109. } else {
  110. xzDecReset(z.dec)
  111. ret = xzStreamEnd
  112. }
  113. }
  114. } else {
  115. ret = xzDecRun(z.dec, z.buf)
  116. }
  117. return
  118. }
  119. func (z *Reader) Read(p []byte) (n int, err error) {
  120. // restore err
  121. err = z.err
  122. // set decoder output buffer to p
  123. z.buf.out = p
  124. z.buf.outPos = 0
  125. for {
  126. // update n
  127. n = z.buf.outPos
  128. // if last call to decoder ended with an error, return that error
  129. if err != nil {
  130. break
  131. }
  132. // if decoder has finished, return with err == io.EOF
  133. if z.dEOF {
  134. err = io.EOF
  135. break
  136. }
  137. // if p full, return with err == nil, unless we have not yet
  138. // read the stream header with Read(nil)
  139. if n == len(p) && z.CheckType != checkUnset {
  140. break
  141. }
  142. // if needed, read more data from z.r
  143. if z.buf.inPos == len(z.buf.in) && !z.rEOF {
  144. rn, e := z.r.Read(z.in[:])
  145. if e != nil && e != io.EOF {
  146. // read error
  147. err = e
  148. break
  149. }
  150. if e == io.EOF {
  151. z.rEOF = true
  152. }
  153. // set new input buffer in z.buf
  154. z.buf.in = z.in[:rn]
  155. z.buf.inPos = 0
  156. }
  157. // decode more data
  158. ret := z.decode()
  159. switch ret {
  160. case xzOK:
  161. // no action needed
  162. case xzStreamEnd:
  163. if z.padding >= 0 {
  164. z.padding = -1
  165. if !z.multistream || z.rEOF {
  166. z.dEOF = true
  167. }
  168. } else {
  169. z.padding = 0
  170. }
  171. case xzUnsupportedCheck:
  172. err = ErrUnsupportedCheck
  173. case xzMemlimitError:
  174. err = ErrMemlimit
  175. case xzFormatError:
  176. err = ErrFormat
  177. case xzOptionsError:
  178. err = ErrOptions
  179. case xzDataError:
  180. err = ErrData
  181. case xzBufError:
  182. err = ErrBuf
  183. }
  184. // save err
  185. z.err = err
  186. }
  187. return
  188. }
  189. // Multistream controls whether the reader is operating in multistream
  190. // mode.
  191. //
  192. // If enabled (the default), the Reader expects the input to be a
  193. // sequence of XZ streams, possibly interspersed with stream padding,
  194. // which it reads one after another. The effect is that the
  195. // concatenation of a sequence of XZ streams or XZ files is
  196. // treated as equivalent to the compressed result of the concatenation
  197. // of the sequence. This is standard behaviour for XZ readers.
  198. //
  199. // Calling Multistream(false) disables this behaviour; disabling the
  200. // behaviour can be useful when reading file formats that distinguish
  201. // individual XZ streams. In this mode, when the Reader reaches the
  202. // end of the stream, Read returns io.EOF. To start the next stream,
  203. // call z.Reset(nil) followed by z.Multistream(false). If there is no
  204. // next stream, z.Reset(nil) will return io.EOF.
  205. func (z *Reader) Multistream(ok bool) {
  206. z.multistream = ok
  207. }
  208. // Reset, for non-nil values of io.Reader r, discards the Reader z's
  209. // state and makes it equivalent to the result of its original state
  210. // from NewReader, but reading from r instead. This permits reusing a
  211. // Reader rather than allocating a new one.
  212. //
  213. // If you wish to leave r unchanged use z.Reset(nil). This keeps r
  214. // unchanged and ensures internal buffering is preserved. If the
  215. // Reader was at the end of a stream it is then ready to read any
  216. // follow on streams. If there are no follow on streams z.Reset(nil)
  217. // returns io.EOF. If the Reader was not at the end of a stream then
  218. // z.Reset(nil) does nothing.
  219. func (z *Reader) Reset(r io.Reader) error {
  220. switch {
  221. case r == nil:
  222. z.multistream = true
  223. if !z.dEOF {
  224. return nil
  225. }
  226. if z.rEOF {
  227. return io.EOF
  228. }
  229. z.dEOF = false
  230. _, err := z.Read(nil) // read stream header
  231. return err
  232. default:
  233. z.r = r
  234. z.multistream = true
  235. z.rEOF = false
  236. z.dEOF = false
  237. z.padding = -1
  238. z.buf.in = nil
  239. z.buf.inPos = 0
  240. xzDecReset(z.dec)
  241. z.err = nil
  242. _, err := z.Read(nil) // read stream header
  243. return err
  244. }
  245. }