123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- /*
- * Package xz Go Reader API
- *
- * Author: Michael Cross <https://github.com/xi2>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
- */
-
- package xz
-
- import (
- "errors"
- "io"
- )
-
- // Package specific errors.
- var (
- ErrUnsupportedCheck = errors.New("xz: integrity check type not supported")
- ErrMemlimit = errors.New("xz: LZMA2 dictionary size exceeds max")
- ErrFormat = errors.New("xz: file format not recognized")
- ErrOptions = errors.New("xz: compression options not supported")
- ErrData = errors.New("xz: data is corrupt")
- ErrBuf = errors.New("xz: data is truncated or corrupt")
- )
-
- // DefaultDictMax is the default maximum dictionary size in bytes used
- // by the decoder. This value is sufficient to decompress files
- // created with XZ Utils "xz -9".
- const DefaultDictMax = 1 << 26 // 64 MiB
-
- // inBufSize is the input buffer size used by the decoder.
- const inBufSize = 1 << 13 // 8 KiB
-
- // A Reader is an io.Reader that can be used to retrieve uncompressed
- // data from an XZ file.
- //
- // In general, an XZ file can be a concatenation of other XZ
- // files. Reads from the Reader return the concatenation of the
- // uncompressed data of each.
- type Reader struct {
- Header
- r io.Reader // the wrapped io.Reader
- multistream bool // true if reader is in multistream mode
- rEOF bool // true after io.EOF received on r
- dEOF bool // true after decoder has completed
- padding int // bytes of stream padding read (or -1)
- in [inBufSize]byte // backing array for buf.in
- buf *xzBuf // decoder input/output buffers
- dec *xzDec // decoder state
- err error // the result of the last decoder call
- }
-
- // NewReader creates a new Reader reading from r. The decompressor
- // will use an LZMA2 dictionary size up to dictMax bytes in
- // size. Passing a value of zero sets dictMax to DefaultDictMax. If
- // an individual XZ stream requires a dictionary size greater than
- // dictMax in order to decompress, Read will return ErrMemlimit.
- //
- // If NewReader is passed a value of nil for r then a Reader is
- // created such that all read attempts will return io.EOF. This is
- // useful if you just want to allocate memory for a Reader which will
- // later be initialized with Reset.
- //
- // Due to internal buffering, the Reader may read more data than
- // necessary from r.
- func NewReader(r io.Reader, dictMax uint32) (*Reader, error) {
- if dictMax == 0 {
- dictMax = DefaultDictMax
- }
- z := &Reader{
- r: r,
- multistream: true,
- padding: -1,
- buf: &xzBuf{},
- }
- if r == nil {
- z.rEOF, z.dEOF = true, true
- }
- z.dec = xzDecInit(dictMax, &z.Header)
- var err error
- if r != nil {
- _, err = z.Read(nil) // read stream header
- }
- return z, err
- }
-
- // decode is a wrapper around xzDecRun that additionally handles
- // stream padding. It treats the padding as a kind of stream that
- // decodes to nothing.
- //
- // When decoding padding, z.padding >= 0
- // When decoding a real stream, z.padding == -1
- func (z *Reader) decode() (ret xzRet) {
- if z.padding >= 0 {
- // read all padding in input buffer
- for z.buf.inPos < len(z.buf.in) &&
- z.buf.in[z.buf.inPos] == 0 {
- z.buf.inPos++
- z.padding++
- }
- switch {
- case z.buf.inPos == len(z.buf.in) && z.rEOF:
- // case: out of padding. no more input data available
- if z.padding%4 != 0 {
- ret = xzDataError
- } else {
- ret = xzStreamEnd
- }
- case z.buf.inPos == len(z.buf.in):
- // case: read more padding next loop iteration
- ret = xzOK
- default:
- // case: out of padding. more input data available
- if z.padding%4 != 0 {
- ret = xzDataError
- } else {
- xzDecReset(z.dec)
- ret = xzStreamEnd
- }
- }
- } else {
- ret = xzDecRun(z.dec, z.buf)
- }
- return
- }
-
- func (z *Reader) Read(p []byte) (n int, err error) {
- // restore err
- err = z.err
- // set decoder output buffer to p
- z.buf.out = p
- z.buf.outPos = 0
- for {
- // update n
- n = z.buf.outPos
- // if last call to decoder ended with an error, return that error
- if err != nil {
- break
- }
- // if decoder has finished, return with err == io.EOF
- if z.dEOF {
- err = io.EOF
- break
- }
- // if p full, return with err == nil, unless we have not yet
- // read the stream header with Read(nil)
- if n == len(p) && z.CheckType != checkUnset {
- break
- }
- // if needed, read more data from z.r
- if z.buf.inPos == len(z.buf.in) && !z.rEOF {
- rn, e := z.r.Read(z.in[:])
- if e != nil && e != io.EOF {
- // read error
- err = e
- break
- }
- if e == io.EOF {
- z.rEOF = true
- }
- // set new input buffer in z.buf
- z.buf.in = z.in[:rn]
- z.buf.inPos = 0
- }
- // decode more data
- ret := z.decode()
- switch ret {
- case xzOK:
- // no action needed
- case xzStreamEnd:
- if z.padding >= 0 {
- z.padding = -1
- if !z.multistream || z.rEOF {
- z.dEOF = true
- }
- } else {
- z.padding = 0
- }
- case xzUnsupportedCheck:
- err = ErrUnsupportedCheck
- case xzMemlimitError:
- err = ErrMemlimit
- case xzFormatError:
- err = ErrFormat
- case xzOptionsError:
- err = ErrOptions
- case xzDataError:
- err = ErrData
- case xzBufError:
- err = ErrBuf
- }
- // save err
- z.err = err
- }
- return
- }
-
- // Multistream controls whether the reader is operating in multistream
- // mode.
- //
- // If enabled (the default), the Reader expects the input to be a
- // sequence of XZ streams, possibly interspersed with stream padding,
- // which it reads one after another. The effect is that the
- // concatenation of a sequence of XZ streams or XZ files is
- // treated as equivalent to the compressed result of the concatenation
- // of the sequence. This is standard behaviour for XZ readers.
- //
- // Calling Multistream(false) disables this behaviour; disabling the
- // behaviour can be useful when reading file formats that distinguish
- // individual XZ streams. In this mode, when the Reader reaches the
- // end of the stream, Read returns io.EOF. To start the next stream,
- // call z.Reset(nil) followed by z.Multistream(false). If there is no
- // next stream, z.Reset(nil) will return io.EOF.
- func (z *Reader) Multistream(ok bool) {
- z.multistream = ok
- }
-
- // Reset, for non-nil values of io.Reader r, discards the Reader z's
- // state and makes it equivalent to the result of its original state
- // from NewReader, but reading from r instead. This permits reusing a
- // Reader rather than allocating a new one.
- //
- // If you wish to leave r unchanged use z.Reset(nil). This keeps r
- // unchanged and ensures internal buffering is preserved. If the
- // Reader was at the end of a stream it is then ready to read any
- // follow on streams. If there are no follow on streams z.Reset(nil)
- // returns io.EOF. If the Reader was not at the end of a stream then
- // z.Reset(nil) does nothing.
- func (z *Reader) Reset(r io.Reader) error {
- switch {
- case r == nil:
- z.multistream = true
- if !z.dEOF {
- return nil
- }
- if z.rEOF {
- return io.EOF
- }
- z.dEOF = false
- _, err := z.Read(nil) // read stream header
- return err
- default:
- z.r = r
- z.multistream = true
- z.rEOF = false
- z.dEOF = false
- z.padding = -1
- z.buf.in = nil
- z.buf.inPos = 0
- xzDecReset(z.dec)
- z.err = nil
- _, err := z.Read(nil) // read stream header
- return err
- }
- }
|