diff options
author | techknowlogick <techknowlogick@gitea.io> | 2020-08-27 22:47:17 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-27 22:47:17 -0400 |
commit | c5d5d63c9cb28b45d936c937388a3668523857a3 (patch) | |
tree | 86f2a89c89c1a9f9a9dcd5e225d512caedb26923 /vendor/github.com | |
parent | 211321fb936683815c4033fdfb9ac46af8a3b357 (diff) | |
download | gitea-c5d5d63c9cb28b45d936c937388a3668523857a3.tar.gz gitea-c5d5d63c9cb28b45d936c937388a3668523857a3.zip |
Macaron 1.5 (#12596)
* update macaron to v1.5 of fork
* update macaron to v1.5 of fork
* test gzip PR
* add push method impl to context_tests
* use proper gzip commit
Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Diffstat (limited to 'vendor/github.com')
32 files changed, 2464 insertions, 575 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 2b101d26b2..25dbe3e15f 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -80,9 +80,7 @@ type advancedState struct { // deflate state length int offset int - hash uint32 maxInsertIndex int - ii uint16 // position of last match, intended to overflow to reset. // Input hash chains // hashHead[hashValue] contains the largest inputIndex with the specified hash value @@ -97,6 +95,9 @@ type advancedState struct { // input window: unprocessed data is window[index:windowEnd] index int hashMatch [maxMatchLength + minMatchLength]uint32 + + hash uint32 + ii uint16 // position of last match, intended to overflow to reset. } type compressor struct { @@ -107,18 +108,19 @@ type compressor struct { // compression algorithm fill func(*compressor, []byte) int // copy data to window step func(*compressor) // process window - sync bool // requesting flush - window []byte - windowEnd int - blockStart int // window index where current tokens start - byteAvailable bool // if true, still need to process window[index-1]. - err error + window []byte + windowEnd int + blockStart int // window index where current tokens start + err error // queued output tokens tokens tokens fast fastEnc state *advancedState + + sync bool // requesting flush + byteAvailable bool // if true, still need to process window[index-1]. } func (d *compressor) fillDeflate(b []byte) int { diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 7f175a4ec2..3e4259f157 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -295,10 +295,6 @@ type decompressor struct { r Reader roffset int64 - // Input bits, in top of b. - b uint32 - nb uint - // Huffman decoders for literal/length, distance. h1, h2 huffmanDecoder @@ -309,19 +305,24 @@ type decompressor struct { // Output history, buffer. dict dictDecoder - // Temporary buffer (avoids repeated allocation). - buf [4]byte - // Next step in the decompression, // and decompression state. step func(*decompressor) stepState int - final bool err error toRead []byte hl, hd *huffmanDecoder copyLen int copyDist int + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Input bits, in top of b. + b uint32 + + nb uint + final bool } func (f *decompressor) nextBlock() { diff --git a/vendor/github.com/klauspost/compress/fse/bitreader.go b/vendor/github.com/klauspost/compress/fse/bitreader.go index b9db204f59..f65eb3909c 100644 --- a/vendor/github.com/klauspost/compress/fse/bitreader.go +++ b/vendor/github.com/klauspost/compress/fse/bitreader.go @@ -6,6 +6,7 @@ package fse import ( + "encoding/binary" "errors" "io" ) @@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBits(uint32(v))) return nil } @@ -63,8 +68,9 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. - v := b.in[b.off-4 : b.off] + // 2 bounds checks. + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -77,7 +83,8 @@ func (b *bitReader) fill() { return } if b.off > 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -91,9 +98,17 @@ func (b *bitReader) fill() { } } +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // finished returns true if all bits have been read from the bit stream. func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 + return b.bitsRead >= 64 && b.off == 0 } // close the bitstream and returns an error if out-of-buffer reads occurred. diff --git a/vendor/github.com/klauspost/compress/fse/bytereader.go b/vendor/github.com/klauspost/compress/fse/bytereader.go index f228a46cdf..abade2d605 100644 --- a/vendor/github.com/klauspost/compress/fse/bytereader.go +++ b/vendor/github.com/klauspost/compress/fse/bytereader.go @@ -25,19 +25,10 @@ func (b *byteReader) advance(n uint) { b.off += int(n) } -// Int32 returns a little endian int32 starting at current offset. -func (b byteReader) Int32() int32 { - b2 := b.b[b.off : b.off+4 : b.off+4] - v3 := int32(b2[3]) - v2 := int32(b2[2]) - v1 := int32(b2[1]) - v0 := int32(b2[0]) - return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) -} - // Uint32 returns a little endian uint32 starting at current offset. func (b byteReader) Uint32() uint32 { - b2 := b.b[b.off : b.off+4 : b.off+4] + b2 := b.b[b.off:] + b2 = b2[:4] v3 := uint32(b2[3]) v2 := uint32(b2[2]) v1 := uint32(b2[1]) diff --git a/vendor/github.com/klauspost/compress/fse/fse.go b/vendor/github.com/klauspost/compress/fse/fse.go index 075357b5b1..535cbadfde 100644 --- a/vendor/github.com/klauspost/compress/fse/fse.go +++ b/vendor/github.com/klauspost/compress/fse/fse.go @@ -44,18 +44,14 @@ var ( // Scratch provides temporary storage for compression and decompression. type Scratch struct { // Private - count [maxSymbolValue + 1]uint32 - norm [maxSymbolValue + 1]int16 - symbolLen uint16 // Length of active part of the symbol table. - actualTableLog uint8 // Selected tablelog. - br byteReader - bits bitReader - bw bitWriter - ct cTable // Compression tables. - decTable []decSymbol // Decompression table. - zeroBits bool // no bits has prob > 50%. - clearCount bool // clear count - maxCount int // count of the most probable symbol + count [maxSymbolValue + 1]uint32 + norm [maxSymbolValue + 1]int16 + br byteReader + bits bitReader + bw bitWriter + ct cTable // Compression tables. + decTable []decSymbol // Decompression table. + maxCount int // count of the most probable symbol // Per block parameters. // These can be used to override compression parameters of the block. @@ -68,17 +64,22 @@ type Scratch struct { // and allocation will be avoided. Out []byte - // MaxSymbolValue will override the maximum symbol value of the next block. - MaxSymbolValue uint8 - - // TableLog will attempt to override the tablelog for the next block. - TableLog uint8 - // DecompressLimit limits the maximum decoded size acceptable. // If > 0 decompression will stop when approximately this many bytes // has been decoded. // If 0, maximum size will be 2GB. DecompressLimit int + + symbolLen uint16 // Length of active part of the symbol table. + actualTableLog uint8 // Selected tablelog. + zeroBits bool // no bits has prob > 50%. + clearCount bool // clear count + + // MaxSymbolValue will override the maximum symbol value of the next block. + MaxSymbolValue uint8 + + // TableLog will attempt to override the tablelog for the next block. + TableLog uint8 } // Histogram allows to populate the histogram and skip that step in the compression, diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go index 6794cf48f4..26203851bd 100644 --- a/vendor/github.com/klauspost/compress/gzip/gzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -37,13 +37,13 @@ type Writer struct { Header // written at first call to Write, Flush, or Close w io.Writer level int - wroteHeader bool + err error compressor *flate.Writer digest uint32 // CRC-32, IEEE polynomial (section 8) size uint32 // Uncompressed size (section 2.3.1) + wroteHeader bool closed bool buf [10]byte - err error } // NewWriter returns a new Writer. diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md index 0a8448ce9f..e12da4db2f 100644 --- a/vendor/github.com/klauspost/compress/huff0/README.md +++ b/vendor/github.com/klauspost/compress/huff0/README.md @@ -12,8 +12,6 @@ but it can be used as a secondary step to compressors (like Snappy) that does no * [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0)
-THIS PACKAGE IS NOT CONSIDERED STABLE AND API OR ENCODING MAY CHANGE IN THE FUTURE.
-
## News
* Mar 2018: First implementation released. Consider this beta software for now.
@@ -75,6 +73,8 @@ which can be given to the decompressor. Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X)
or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function.
+For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size.
+
You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
your input was likely corrupted.
@@ -84,4 +84,4 @@ There are no integrity checks, so relying on errors from the decompressor does n # Contributing
Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
-changes will likely not be accepted. If in doubt open an issue before writing the PR.
\ No newline at end of file +changes will likely not be accepted. If in doubt open an issue before writing the PR.
diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go index 7d0903c701..a4979e8868 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitreader.go +++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go @@ -6,6 +6,7 @@ package huff0 import ( + "encoding/binary" "errors" "io" ) @@ -34,29 +35,16 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBit32(uint32(v))) return nil } -// getBits will return n bits. n can be 0. -func (b *bitReader) getBits(n uint8) uint16 { - if n == 0 || b.bitsRead >= 64 { - return 0 - } - return b.getBitsFast(n) -} - -// getBitsFast requires that at least one bit is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReader) getBitsFast(n uint8) uint16 { - const regMask = 64 - 1 - v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) - b.bitsRead += n - return v -} - // peekBitsFast requires that at least one bit is requested every time. // There are no checks if the buffer is filled. func (b *bitReader) peekBitsFast(n uint8) uint16 { @@ -71,21 +59,36 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. + + // 2 bounds checks. v := b.in[b.off-4 : b.off] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 b.off -= 4 } +func (b *bitReader) advance(n uint8) { + b.bitsRead += n +} + +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // fill() will make sure at least 32 bits are available. func (b *bitReader) fill() { if b.bitsRead < 32 { return } if b.off > 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -113,3 +116,214 @@ func (b *bitReader) close() error { } return nil } + +// bitReader reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReaderBytes struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReaderBytes) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } + b.advance(8 - uint8(highBit32(uint32(v)))) + return nil +} + +// peekBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReaderBytes) peekByteFast() uint8 { + got := uint8(b.value >> 56) + return got +} + +func (b *bitReaderBytes) advance(n uint8) { + b.bitsRead += n + b.value <<= n & 63 +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReaderBytes) fillFast() { + if b.bitsRead < 32 { + return + } + + // 2 bounds checks. + v := b.in[b.off-4 : b.off] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << (b.bitsRead - 32) + b.bitsRead -= 32 + b.off -= 4 +} + +// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read. +func (b *bitReaderBytes) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReaderBytes) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << (b.bitsRead - 32) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReaderBytes) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReaderBytes) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} + +// bitReaderShifted reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReaderShifted struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReaderShifted) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } + b.advance(8 - uint8(highBit32(uint32(v)))) + return nil +} + +// peekBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 { + return uint16(b.value >> ((64 - n) & 63)) +} + +func (b *bitReaderShifted) advance(n uint8) { + b.bitsRead += n + b.value <<= n & 63 +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReaderShifted) fillFast() { + if b.bitsRead < 32 { + return + } + + // 2 bounds checks. + v := b.in[b.off-4 : b.off] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << ((b.bitsRead - 32) & 63) + b.bitsRead -= 32 + b.off -= 4 +} + +// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read. +func (b *bitReaderShifted) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReaderShifted) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << ((b.bitsRead - 32) & 63) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReaderShifted) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReaderShifted) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go index bda4021efd..6bce4e87d4 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -43,6 +43,11 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { func (b *bitWriter) encSymbol(ct cTable, symbol byte) { enc := ct[symbol] b.bitContainer |= uint64(enc.val) << (b.nBits & 63) + if false { + if enc.nBits == 0 { + panic("nbits 0") + } + } b.nBits += enc.nBits } @@ -54,6 +59,14 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) { sh := b.nBits & 63 combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63)) b.bitContainer |= combined << sh + if false { + if encA.nBits == 0 { + panic("nbitsA 0") + } + if encB.nBits == 0 { + panic("nbitsB 0") + } + } b.nBits += encA.nBits + encB.nBits } diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 0843cb014f..f9ed5f8306 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -77,8 +77,11 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) // Each symbol present maximum once or too well distributed. return nil, false, ErrIncompressible } - - if s.Reuse == ReusePolicyPrefer && canReuse { + if s.Reuse == ReusePolicyMust && !canReuse { + // We must reuse, but we can't. + return nil, false, ErrIncompressible + } + if (s.Reuse == ReusePolicyPrefer || s.Reuse == ReusePolicyMust) && canReuse { keepTable := s.cTable keepTL := s.actualTableLog s.cTable = s.prevTable @@ -90,6 +93,9 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) s.OutData = s.Out return s.Out, true, nil } + if s.Reuse == ReusePolicyMust { + return nil, false, ErrIncompressible + } // Do not attempt to re-use later. s.prevTable = s.prevTable[:0] } diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index 97ae66a4ac..41703bba4d 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -25,11 +25,14 @@ type dEntryDouble struct { len uint8 } +// Uses special code for all tables that are < 8 bits. +const use8BitTables = true + // ReadTable will read a table from the input. // The size of the input may be larger than the table definition. // Any content remaining after the table definition will be returned. // If no Scratch is provided a new one is allocated. -// The returned Scratch can be used for decoding input using this table. +// The returned Scratch can be used for encoding or decoding input using this table. func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { s, err = s.prepare(in) if err != nil { @@ -55,8 +58,8 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { s.symbolLen = uint16(oSize) in = in[iSize:] } else { - if len(in) <= int(iSize) { - return s, nil, errors.New("input too small for table") + if len(in) < int(iSize) { + return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in)) } // FSE compressed weights s.fse.DecompressLimit = 255 @@ -83,6 +86,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { } v2 := v & 15 rankStats[v2]++ + // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0. weightTotal += (1 << v2) >> 1 } if weightTotal == 0 { @@ -134,20 +138,40 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { if len(s.dt.single) != tSize { s.dt.single = make([]dEntrySingle, tSize) } + cTable := s.prevTable + if cap(cTable) < maxSymbolValue+1 { + cTable = make([]cTableEntry, 0, maxSymbolValue+1) + } + cTable = cTable[:maxSymbolValue+1] + s.prevTable = cTable[:s.symbolLen] + s.prevTableLog = s.actualTableLog + for n, w := range s.huffWeight[:s.symbolLen] { if w == 0 { + cTable[n] = cTableEntry{ + val: 0, + nBits: 0, + } continue } length := (uint32(1) << w) >> 1 d := dEntrySingle{ entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8), } - single := s.dt.single[rankStats[w] : rankStats[w]+length] + + rank := &rankStats[w] + cTable[n] = cTableEntry{ + val: uint16(*rank >> (w - 1)), + nBits: uint8(d.entry), + } + + single := s.dt.single[*rank : *rank+length] for i := range single { single[i] = d } - rankStats[w] += length + *rank += length } + return s, in, nil } @@ -155,237 +179,905 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { // The length of the supplied input must match the end of a block exactly. // Before this is called, the table must be initialized with ReadTable unless // the encoder re-used the table. +// deprecated: Use the stateless Decoder() to get a concurrent version. func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) { - if len(s.dt.single) == 0 { - return nil, errors.New("no table loaded") + if cap(s.Out) < s.MaxDecodedSize { + s.Out = make([]byte, s.MaxDecodedSize) } - var br bitReader - err = br.init(in) - if err != nil { - return nil, err + s.Out = s.Out[:0:s.MaxDecodedSize] + s.Out, err = s.Decoder().Decompress1X(s.Out, in) + return s.Out, err +} + +// Decompress4X will decompress a 4X encoded stream. +// Before this is called, the table must be initialized with ReadTable unless +// the encoder re-used the table. +// The length of the supplied input must match the end of a block exactly. +// The destination size of the uncompressed data must be known and provided. +// deprecated: Use the stateless Decoder() to get a concurrent version. +func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { + if dstSize > s.MaxDecodedSize { + return nil, ErrMaxDecodedSizeExceeded + } + if cap(s.Out) < dstSize { + s.Out = make([]byte, s.MaxDecodedSize) + } + s.Out = s.Out[:0:dstSize] + s.Out, err = s.Decoder().Decompress4X(s.Out, in) + return s.Out, err +} + +// Decoder will return a stateless decoder that can be used by multiple +// decompressors concurrently. +// Before this is called, the table must be initialized with ReadTable. +// The Decoder is still linked to the scratch buffer so that cannot be reused. +// However, it is safe to discard the scratch. +func (s *Scratch) Decoder() *Decoder { + return &Decoder{ + dt: s.dt, + actualTableLog: s.actualTableLog, } - s.Out = s.Out[:0] +} - decode := func() byte { - val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ - v := s.dt.single[val] - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) +// Decoder provides stateless decoding. +type Decoder struct { + dt dTable + actualTableLog uint8 +} + +// Decompress1X will decompress a 1X encoded stream. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { + return nil, errors.New("no table loaded") } - hasDec := func(v dEntrySingle) byte { - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress1X8Bit(dst, src) } + var br bitReaderShifted + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:0] // Avoid bounds check by always having full sized table. const tlSize = 1 << tableLogMax const tlMask = tlSize - 1 - dt := s.dt.single[:tlSize] + dt := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var tmp = s.huffWeight[:256] + var buf [256]byte var off uint8 for br.off >= 8 { br.fillFast() - tmp[off+0] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) - tmp[off+1] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + v := dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + // Refill br.fillFast() - tmp[off+2] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) - tmp[off+3] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + off += 4 if off == 0 { - if len(s.Out)+256 > s.MaxDecodedSize { + if len(dst)+256 > maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, tmp...) + dst = append(dst, buf[:]...) } } - if len(s.Out)+int(off) > s.MaxDecodedSize { + if len(dst)+int(off) > maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, tmp[:off]...) + dst = append(dst, buf[:off]...) - for !br.finished() { + // br < 8, so uint8 is fine + bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead + for bitsLeft > 0 { br.fill() - if len(s.Out) >= s.MaxDecodedSize { + if false && br.bitsRead >= 32 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value = (br.value << 32) | uint64(low) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value = (br.value << 8) | uint64(br.in[br.off-1]) + br.bitsRead -= 8 + br.off-- + } + } + } + if len(dst) >= maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= nBits + dst = append(dst, uint8(v.entry>>8)) + } + return dst, br.close() +} + +// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { + if d.actualTableLog == 8 { + return d.decompress1X8BitExactly(dst, src) + } + var br bitReaderBytes + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:0] + + // Avoid bounds check by always having full sized table. + dt := d.dt.single[:256] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + + shift := (8 - d.actualTableLog) & 7 + + //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) + for br.off >= 4 { + br.fillFast() + v := dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + + if len(dst)+int(off) > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:off]...) + + // br < 4, so uint8 is fine + bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) + for bitsLeft > 0 { + if br.bitsRead >= 64-8 { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + if len(dst) >= maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + v := dt[br.peekByteFast()>>shift] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= int8(nBits) + dst = append(dst, uint8(v.entry>>8)) + } + return dst, br.close() +} + +// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { + var br bitReaderBytes + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:0] + + // Avoid bounds check by always having full sized table. + dt := d.dt.single[:256] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + + const shift = 0 + + //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) + for br.off >= 4 { + br.fillFast() + v := dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + + if len(dst)+int(off) > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:off]...) + + // br < 4, so uint8 is fine + bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) + for bitsLeft > 0 { + if br.bitsRead >= 64-8 { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + if len(dst) >= maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, decode()) + v := dt[br.peekByteFast()>>shift] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= int8(nBits) + dst = append(dst, uint8(v.entry>>8)) } - return s.Out, br.close() + return dst, br.close() } // Decompress4X will decompress a 4X encoded stream. -// Before this is called, the table must be initialized with ReadTable unless -// the encoder re-used the table. // The length of the supplied input must match the end of a block exactly. -// The destination size of the uncompressed data must be known and provided. -func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { - if len(s.dt.single) == 0 { +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { return nil, errors.New("no table loaded") } - if len(in) < 6+(4*1) { + if len(src) < 6+(4*1) { return nil, errors.New("input too small") } - if dstSize > s.MaxDecodedSize { - return nil, ErrMaxDecodedSizeExceeded + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress4X8bit(dst, src) } - // TODO: We do not detect when we overrun a buffer, except if the last one does. - var br [4]bitReader + var br [4]bitReaderShifted start := 6 for i := 0; i < 3; i++ { - length := int(in[i*2]) | (int(in[i*2+1]) << 8) - if start+length >= len(in) { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { return nil, errors.New("truncated input (or invalid offset)") } - err = br[i].init(in[start : start+length]) + err := br[i].init(src[start : start+length]) if err != nil { return nil, err } start += length } - err = br[3].init(in[start:]) + err := br[3].init(src[start:]) if err != nil { return nil, err } - // Prepare output - if cap(s.Out) < dstSize { - s.Out = make([]byte, 0, dstSize) - } - s.Out = s.Out[:dstSize] // destination, offset to match first output - dstOut := s.Out + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst dstEvery := (dstSize + 3) / 4 const tlSize = 1 << tableLogMax const tlMask = tlSize - 1 - single := s.dt.single[:tlSize] - - decode := func(br *bitReader) byte { - val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ - v := single[val&tlMask] - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) - } + single := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var tmp = s.huffWeight[:256] + var buf [256]byte var off uint8 var decoded int // Decode 2 values from each decoder/loop. const bufoff = 256 / 4 -bigloop: for { - for i := range br { - br := &br[i] - if br.off < 4 { - break bigloop - } - br.fillFast() + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break } { const stream = 0 - val := br[stream].peekBitsFast(s.actualTableLog) + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream] = uint8(v.entry >> 8) - val2 := br[stream].peekBitsFast(s.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream+1] = uint8(v.entry >> 8) + + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v2 = single[val2&tlMask] + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) } { - const stream = 1 - val := br[stream].peekBitsFast(s.actualTableLog) + const stream = 2 + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream] = uint8(v.entry >> 8) - val2 := br[stream].peekBitsFast(s.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream+1] = uint8(v.entry >> 8) + + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v2 = single[val2&tlMask] + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) + } + + off += 2 + + if off == bufoff { + if bufoff > dstEvery { + return nil, errors.New("corruption detected: stream overrun 1") + } + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) + off = 0 + out = out[bufoff:] + decoded += 256 + // There must at least be 3 buffers left. + if len(out) < dstEvery*3 { + return nil, errors.New("corruption detected: stream overrun 2") + } + } + } + if off > 0 { + ioff := int(off) + if len(out) < dstEvery*3+ioff { + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + decoded += int(off) * 4 + out = out[off:] + } + + // Decode remaining. + for i := range br { + offset := dstEvery * i + br := &br[i] + bitsLeft := br.off*8 + uint(64-br.bitsRead) + for bitsLeft > 0 { + br.fill() + if false && br.bitsRead >= 32 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value = (br.value << 32) | uint64(low) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value = (br.value << 8) | uint64(br.in[br.off-1]) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... + if offset >= len(out) { + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + val := br.peekBitsFast(d.actualTableLog) + v := single[val&tlMask].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= uint(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { + if d.actualTableLog == 8 { + return d.decompress4X8bitExactly(dst, src) + } + + var br [4]bitReaderBytes + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + shift := (8 - d.actualTableLog) & 7 + + const tlSize = 1 << 8 + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + var decoded int + + // Decode 4 values from each decoder/loop. + const bufoff = 256 / 4 + for { + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break + } + + { + // Interleave 2 decodes. + const stream = 0 + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) } { const stream = 2 - val := br[stream].peekBitsFast(s.actualTableLog) - v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() - val2 := br[stream].peekBitsFast(s.actualTableLog) - v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + } + + off += 4 + + if off == bufoff { + if bufoff > dstEvery { + return nil, errors.New("corruption detected: stream overrun 1") + } + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) + off = 0 + out = out[bufoff:] + decoded += 256 + // There must at least be 3 buffers left. + if len(out) < dstEvery*3 { + return nil, errors.New("corruption detected: stream overrun 2") + } + } + } + if off > 0 { + ioff := int(off) + if len(out) < dstEvery*3+ioff { + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + decoded += int(off) * 4 + out = out[off:] + } + + // Decode remaining. + for i := range br { + offset := dstEvery * i + br := &br[i] + bitsLeft := int(br.off*8) + int(64-br.bitsRead) + for bitsLeft > 0 { + if br.finished() { + return nil, io.ErrUnexpectedEOF + } + if br.bitsRead >= 56 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value |= uint64(low) << (br.bitsRead - 32) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... + if offset >= len(out) { + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + v := single[br.peekByteFast()>>shift].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= int(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { + var br [4]bitReaderBytes + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + const shift = 0 + const tlSize = 1 << 8 + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + var decoded int + + // Decode 4 values from each decoder/loop. + const bufoff = 256 / 4 + for { + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break } { - const stream = 3 - val := br[stream].peekBitsFast(s.actualTableLog) - v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) + // Interleave 2 decodes. + const stream = 0 + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() - val2 := br[stream].peekBitsFast(s.actualTableLog) - v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) } - off += 2 + { + const stream = 2 + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + } + + off += 4 if off == bufoff { if bufoff > dstEvery { return nil, errors.New("corruption detected: stream overrun 1") } - copy(dstOut, tmp[:bufoff]) - copy(dstOut[dstEvery:], tmp[bufoff:bufoff*2]) - copy(dstOut[dstEvery*2:], tmp[bufoff*2:bufoff*3]) - copy(dstOut[dstEvery*3:], tmp[bufoff*3:bufoff*4]) + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) off = 0 - dstOut = dstOut[bufoff:] + out = out[bufoff:] decoded += 256 // There must at least be 3 buffers left. - if len(dstOut) < dstEvery*3 { + if len(out) < dstEvery*3 { return nil, errors.New("corruption detected: stream overrun 2") } } } if off > 0 { ioff := int(off) - if len(dstOut) < dstEvery*3+ioff { + if len(out) < dstEvery*3+ioff { return nil, errors.New("corruption detected: stream overrun 3") } - copy(dstOut, tmp[:off]) - copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2]) - copy(dstOut[dstEvery*2:dstEvery*2+ioff], tmp[bufoff*2:bufoff*3]) - copy(dstOut[dstEvery*3:dstEvery*3+ioff], tmp[bufoff*3:bufoff*4]) + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) decoded += int(off) * 4 - dstOut = dstOut[off:] + out = out[off:] } // Decode remaining. for i := range br { offset := dstEvery * i br := &br[i] - for !br.finished() { - br.fill() - if offset >= len(dstOut) { + bitsLeft := int(br.off*8) + int(64-br.bitsRead) + for bitsLeft > 0 { + if br.finished() { + return nil, io.ErrUnexpectedEOF + } + if br.bitsRead >= 56 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value |= uint64(low) << (br.bitsRead - 32) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... + if offset >= len(out) { return nil, errors.New("corruption detected: stream overrun 4") } - dstOut[offset] = decode(br) + + // Read value and increment offset. + v := single[br.peekByteFast()>>shift].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= int(nBits) + out[offset] = uint8(v >> 8) offset++ } decoded += offset - dstEvery*i @@ -397,7 +1089,7 @@ bigloop: if dstSize != decoded { return nil, errors.New("corruption detected: short output block") } - return s.Out, nil + return dst, nil } // matches will compare a decoding table to a coding table. diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go index 53249df056..5dd66854b0 100644 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -55,6 +55,9 @@ const ( // ReusePolicyNone will disable re-use of tables. // This is slightly faster than ReusePolicyAllow but may produce larger output. ReusePolicyNone + + // ReusePolicyMust must allow reuse and produce smaller output. + ReusePolicyMust ) type Scratch struct { @@ -79,6 +82,13 @@ type Scratch struct { // Slice of the returned data. OutData []byte + // MaxDecodedSize will set the maximum allowed output size. + // This value will automatically be set to BlockSizeMax if not set. + // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. + MaxDecodedSize int + + br byteReader + // MaxSymbolValue will override the maximum symbol value of the next block. MaxSymbolValue uint8 @@ -95,12 +105,6 @@ type Scratch struct { // If WantLogLess == 0 any improvement will do. WantLogLess uint8 - // MaxDecodedSize will set the maximum allowed output size. - // This value will automatically be set to BlockSizeMax if not set. - // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. - MaxDecodedSize int - - br byteReader symbolLen uint16 // Length of active part of the symbol table. maxCount int // count of the most probable symbol clearCount bool // clear count diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index bc977a3023..ac3640dc90 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -5,11 +5,9 @@ It offers a very wide range of compression / speed trade-off, while being backed A high performance compression algorithm is implemented. For now focused on speed. This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content. -Note that custom dictionaries are not supported yet, so if your code relies on that, -you cannot use the package as-is. +Note that custom dictionaries are only supported for decompression. This package is pure Go and without use of "unsafe". -If a significant speedup can be achieved using "unsafe", it may be added as an option later. The `zstd` package is provided as open source software using a Go standard license. @@ -142,80 +140,96 @@ Using the Encoder for both a stream and individual blocks concurrently is safe. I have collected some speed examples to compare speed and compression against other compressors. * `file` is the input file. -* `out` is the compressor used. `zskp` is this package. `gzstd` is gzip standard library. `zstd` is the Datadog cgo library. +* `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library. * `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default". * `insize`/`outsize` is the input/output size. * `millis` is the number of milliseconds used for compression. * `mb/s` is megabytes (2^20 bytes) per second. ``` -The test data for the Large Text Compression Benchmark is the first -10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. -http://mattmahoney.net/dc/textdata.html - -file out level insize outsize millis mb/s -enwik9 zskp 1 1000000000 343833033 5840 163.30 -enwik9 zskp 2 1000000000 317822183 8449 112.87 -enwik9 gzstd 1 1000000000 382578136 13627 69.98 -enwik9 gzstd 3 1000000000 349139651 22344 42.68 -enwik9 zstd 1 1000000000 357416379 4838 197.12 -enwik9 zstd 3 1000000000 313734522 7556 126.21 +Silesia Corpus: +http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip -GOB stream of binary data. Highly compressible. -https://files.klauspost.com/compress/gob-stream.7z +This package: +file out level insize outsize millis mb/s +silesia.tar zskp 1 211947520 73101992 643 313.87 +silesia.tar zskp 2 211947520 67504318 969 208.38 +silesia.tar zskp 3 211947520 65177448 1899 106.44 -file out level insize outsize millis mb/s -gob-stream zskp 1 1911399616 234981983 5100 357.42 -gob-stream zskp 2 1911399616 208674003 6698 272.15 -gob-stream gzstd 1 1911399616 357382641 14727 123.78 -gob-stream gzstd 3 1911399616 327835097 17005 107.19 -gob-stream zstd 1 1911399616 250787165 4075 447.22 -gob-stream zstd 3 1911399616 208191888 5511 330.77 - -Highly compressible JSON file. Similar to logs in a lot of ways. -https://files.klauspost.com/compress/adresser.001.gz - -file out level insize outsize millis mb/s -adresser.001 zskp 1 1073741824 18510122 1477 692.83 -adresser.001 zskp 2 1073741824 19831697 1705 600.59 -adresser.001 gzstd 1 1073741824 47755503 3079 332.47 -adresser.001 gzstd 3 1073741824 40052381 3051 335.63 -adresser.001 zstd 1 1073741824 16135896 994 1030.18 -adresser.001 zstd 3 1073741824 17794465 905 1131.49 +cgo zstd: +silesia.tar zstd 1 211947520 73605392 543 371.56 +silesia.tar zstd 3 211947520 66793289 864 233.68 +silesia.tar zstd 6 211947520 62916450 1913 105.66 -VM Image, Linux mint with a few installed applications: -https://files.klauspost.com/compress/rawstudio-mint14.7z +gzip, stdlib/this package: +silesia.tar gzstd 1 211947520 80007735 1654 122.21 +silesia.tar gzkp 1 211947520 80369488 1168 173.06 -file out level insize outsize millis mb/s -rawstudio-mint14.tar zskp 1 8558382592 3648168838 33398 244.38 -rawstudio-mint14.tar zskp 2 8558382592 3376721436 50962 160.16 -rawstudio-mint14.tar gzstd 1 8558382592 3926257486 84712 96.35 -rawstudio-mint14.tar gzstd 3 8558382592 3740711978 176344 46.28 -rawstudio-mint14.tar zstd 1 8558382592 3607859742 27903 292.51 -rawstudio-mint14.tar zstd 3 8558382592 3341710879 46700 174.77 +GOB stream of binary data. Highly compressible. +https://files.klauspost.com/compress/gob-stream.7z +file out level insize outsize millis mb/s +gob-stream zskp 1 1911399616 235022249 3088 590.30 +gob-stream zskp 2 1911399616 205669791 3786 481.34 +gob-stream zskp 3 1911399616 185792019 9324 195.48 +gob-stream zstd 1 1911399616 249810424 2637 691.26 +gob-stream zstd 3 1911399616 208192146 3490 522.31 +gob-stream zstd 6 1911399616 193632038 6687 272.56 +gob-stream gzstd 1 1911399616 357382641 10251 177.82 +gob-stream gzkp 1 1911399616 362156523 5695 320.08 -The test data is designed to test archivers in realistic backup scenarios. -http://mattmahoney.net/dc/10gb.html +The test data for the Large Text Compression Benchmark is the first +10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. +http://mattmahoney.net/dc/textdata.html -file out level insize outsize millis mb/s -10gb.tar zskp 1 10065157632 4883149814 45715 209.97 -10gb.tar zskp 2 10065157632 4638110010 60970 157.44 -10gb.tar gzstd 1 10065157632 5198296126 97769 98.18 -10gb.tar gzstd 3 10065157632 4932665487 313427 30.63 -10gb.tar zstd 1 10065157632 4940796535 40391 237.65 -10gb.tar zstd 3 10065157632 4638618579 52911 181.42 +file out level insize outsize millis mb/s +enwik9 zskp 1 1000000000 343848582 3609 264.18 +enwik9 zskp 2 1000000000 317276632 5746 165.97 +enwik9 zskp 3 1000000000 294540704 11725 81.34 +enwik9 zstd 1 1000000000 358072021 3110 306.65 +enwik9 zstd 3 1000000000 313734672 4784 199.35 +enwik9 zstd 6 1000000000 295138875 10290 92.68 +enwik9 gzstd 1 1000000000 382578136 9604 99.30 +enwik9 gzkp 1 1000000000 383825945 6544 145.73 + +Highly compressible JSON file. +https://files.klauspost.com/compress/github-june-2days-2019.json.zst + +file out level insize outsize millis mb/s +github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 +github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 +github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54 +github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 +github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 +github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 +github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79 +github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03 -Silesia Corpus: -http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip +VM Image, Linux mint with a few installed applications: +https://files.klauspost.com/compress/rawstudio-mint14.7z -file out level insize outsize millis mb/s -silesia.tar zskp 1 211947520 73025800 1108 182.26 -silesia.tar zskp 2 211947520 67674684 1599 126.41 -silesia.tar gzstd 1 211947520 80007735 2515 80.37 -silesia.tar gzstd 3 211947520 73133380 4259 47.45 -silesia.tar zstd 1 211947520 73513991 933 216.64 -silesia.tar zstd 3 211947520 66793301 1377 146.79 +file out level insize outsize millis mb/s +rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 +rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 +rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75 +rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 +rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 +rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 +rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40 +rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49 + +CSV data: +https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst + +file out level insize outsize millis mb/s +nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 +nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 +nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53 +nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 +nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 +nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 +nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83 +nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53 ``` ### Converters @@ -309,6 +323,20 @@ The decoder can be used for *concurrent* decompression of multiple buffers. It will only allow a certain number of concurrent operations to run. To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. +### Dictionaries + +Data compressed with [dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression) can be decompressed. + +Dictionaries are added individually to Decoders. +Dictionaries are generated by the `zstd --train` command and contains an initial state for the decoder. +To add a dictionary use the `WithDecoderDicts(dicts ...[]byte)` option with the dictionary data. +Several dictionaries can be added at once. + +The dictionary will be used automatically for the data that specifies them. +A re-used Decoder will still contain the dictionaries registered. + +When registering multiple dictionaries with the same ID, the last one will be used. + ### Allocation-less operation The decoder has been designed to operate without allocations after a warmup. @@ -350,36 +378,42 @@ These are some examples of performance compared to [datadog cgo library](https:/ The first two are streaming decodes and the last are smaller inputs. ``` -BenchmarkDecoderSilesia-8 20 642550210 ns/op 329.85 MB/s 3101 B/op 8 allocs/op -BenchmarkDecoderSilesiaCgo-8 100 384930000 ns/op 550.61 MB/s 451878 B/op 9713 allocs/op - -BenchmarkDecoderEnwik9-2 10 3146000080 ns/op 317.86 MB/s 2649 B/op 9 allocs/op -BenchmarkDecoderEnwik9Cgo-2 20 1905900000 ns/op 524.69 MB/s 1125120 B/op 45785 allocs/op - -BenchmarkDecoder_DecodeAll/z000000.zst-8 200 7049994 ns/op 138.26 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000001.zst-8 100000 19560 ns/op 97.49 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000002.zst-8 5000 297599 ns/op 236.99 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000003.zst-8 2000 725502 ns/op 141.17 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000004.zst-8 200000 9314 ns/op 54.54 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000005.zst-8 10000 137500 ns/op 104.72 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000006.zst-8 500 2316009 ns/op 206.06 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000007.zst-8 20000 64499 ns/op 344.90 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000008.zst-8 50000 24900 ns/op 219.56 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000009.zst-8 1000 2348999 ns/op 154.01 MB/s 40 B/op 2 allocs/op - -BenchmarkDecoder_DecodeAllCgo/z000000.zst-8 500 4268005 ns/op 228.38 MB/s 1228849 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000001.zst-8 100000 15250 ns/op 125.05 MB/s 2096 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000002.zst-8 10000 147399 ns/op 478.49 MB/s 73776 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000003.zst-8 5000 320798 ns/op 319.27 MB/s 139312 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000004.zst-8 200000 10004 ns/op 50.77 MB/s 560 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000005.zst-8 20000 73599 ns/op 195.64 MB/s 19120 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000006.zst-8 1000 1119003 ns/op 426.48 MB/s 557104 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000007.zst-8 20000 103450 ns/op 215.04 MB/s 71296 B/op 9 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000008.zst-8 100000 20130 ns/op 271.58 MB/s 6192 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000009.zst-8 2000 1123500 ns/op 322.00 MB/s 368688 B/op 3 allocs/op +BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op +BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op + +BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op +BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op + +Concurrent performance: + +BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op + +BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op ``` -This reflects the performance around May 2019, but this may be out of date. +This reflects the performance around May 2020, but this may be out of date. # Contributions diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go index 15d79d439f..8544585371 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -5,6 +5,7 @@ package zstd import ( + "encoding/binary" "errors" "io" "math/bits" @@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBits(uint32(v))) return nil } @@ -63,21 +68,31 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. - v := b.in[b.off-4 : b.off] + // 2 bounds checks. + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 b.off -= 4 } +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // fill() will make sure at least 32 bits are available. func (b *bitReader) fill() { if b.bitsRead < 32 { return } if b.off >= 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index ed670bcc7a..c8ec6e3312 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -75,21 +75,29 @@ type blockDec struct { // Window size of the block. WindowSize uint64 - Type blockType - RLESize uint32 - // Is this the last block of a frame? - Last bool - - // Use less memory - lowMem bool history chan *history input chan struct{} result chan decodeOutput sequenceBuf []seq - tmp [4]byte err error decWG sync.WaitGroup + + // Frame to use for singlethreaded decoding. + // Should not be used by the decoder itself since parent may be another frame. + localFrame *frameDec + + // Block is RLE, this is the size. + RLESize uint32 + tmp [4]byte + + Type blockType + + // Is this the last block of a frame? + Last bool + + // Use less memory + lowMem bool } func (b *blockDec) String() string { @@ -127,25 +135,37 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.Type = blockType((bh >> 1) & 3) // find size. cSize := int(bh >> 3) + maxSize := maxBlockSize switch b.Type { case blockTypeReserved: return ErrReservedBlockType case blockTypeRLE: b.RLESize = uint32(cSize) + if b.lowMem { + maxSize = cSize + } cSize = 1 case blockTypeCompressed: if debug { println("Data size on stream:", cSize) } b.RLESize = 0 + maxSize = maxCompressedBlockSize + if windowSize < maxCompressedBlockSize && b.lowMem { + maxSize = int(windowSize) + } if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { if debug { printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b) } return ErrCompressedSizeTooBig } - default: + case blockTypeRaw: b.RLESize = 0 + // We do not need a destination for raw blocks. + maxSize = -1 + default: + panic("Invalid block type") } // Read block data. @@ -156,8 +176,8 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.dataStorage = make([]byte, 0, maxBlockSize) } } - if cap(b.dst) <= maxBlockSize { - b.dst = make([]byte, 0, maxBlockSize+1) + if cap(b.dst) <= maxSize { + b.dst = make([]byte, 0, maxSize+1) } var err error b.data, err = br.readBig(cSize, b.dataStorage) @@ -445,26 +465,22 @@ func (b *blockDec) decodeCompressed(hist *history) error { if huff == nil { huff = &huff0.Scratch{} } - huff.Out = b.literalBuf[:0] huff, literals, err = huff0.ReadTable(literals, huff) if err != nil { println("reading huffman table:", err) return err } // Use our out buffer. - huff.Out = b.literalBuf[:0] - huff.MaxDecodedSize = litRegenSize if fourStreams { - literals, err = huff.Decompress4X(literals, litRegenSize) + literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) } else { - literals, err = huff.Decompress1X(literals) + literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) } if err != nil { println("decoding compressed literals:", err) return err } // Make sure we don't leak our literals buffer - huff.Out = nil if len(literals) != litRegenSize { return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) } @@ -615,15 +631,12 @@ func (b *blockDec) decodeCompressed(hist *history) error { var err error // Use our out buffer. huff = hist.huffTree - huff.Out = b.literalBuf[:0] - huff.MaxDecodedSize = litRegenSize if fourStreams { - literals, err = huff.Decompress4X(literals, litRegenSize) + literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) } else { - literals, err = huff.Decompress1X(literals) + literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) } // Make sure we don't leak our literals buffer - huff.Out = nil if err != nil { println("decompressing literals:", err) return err @@ -633,12 +646,13 @@ func (b *blockDec) decodeCompressed(hist *history) error { } } else { if hist.huffTree != nil && huff != nil { - huffDecoderPool.Put(hist.huffTree) + if hist.dict == nil || hist.dict.litDec != hist.huffTree { + huffDecoderPool.Put(hist.huffTree) + } hist.huffTree = nil } } if huff != nil { - huff.Out = nil hist.huffTree = huff } if debug { @@ -671,12 +685,21 @@ func (b *blockDec) decodeCompressed(hist *history) error { // If only recent offsets were not transferred, this would be an obvious win. // Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded. + hbytes := hist.b + if len(hbytes) > hist.windowSize { + hbytes = hbytes[len(hbytes)-hist.windowSize:] + // We do not need history any more. + if hist.dict != nil { + hist.dict.content = nil + } + } + if err := seqs.initialize(br, hist, literals, b.dst); err != nil { println("initializing sequences:", err) return err } - err = seqs.decode(nSeqs, br, hist.b) + err = seqs.decode(nSeqs, br, hbytes) if err != nil { return err } diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index 4f0eba22f0..be718afd43 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -295,7 +295,7 @@ func (b *blockEnc) encodeRaw(a []byte) { b.output = bh.appendTo(b.output[:0]) b.output = append(b.output, a...) if debug { - println("Adding RAW block, length", len(a)) + println("Adding RAW block, length", len(a), "last:", b.last) } } @@ -308,7 +308,7 @@ func (b *blockEnc) encodeRawTo(dst, src []byte) []byte { dst = bh.appendTo(dst) dst = append(dst, src...) if debug { - println("Adding RAW block, length", len(src)) + println("Adding RAW block, length", len(src), "last:", b.last) } return dst } @@ -322,7 +322,7 @@ func (b *blockEnc) encodeLits(raw bool) error { // Don't compress extremely small blocks if len(b.literals) < 32 || raw { if debug { - println("Adding RAW block, length", len(b.literals)) + println("Adding RAW block, length", len(b.literals), "last:", b.last) } bh.setType(blockTypeRaw) b.output = bh.appendTo(b.output) @@ -349,7 +349,7 @@ func (b *blockEnc) encodeLits(raw bool) error { switch err { case huff0.ErrIncompressible: if debug { - println("Adding RAW block, length", len(b.literals)) + println("Adding RAW block, length", len(b.literals), "last:", b.last) } bh.setType(blockTypeRaw) b.output = bh.appendTo(b.output) @@ -444,9 +444,9 @@ func fuzzFseEncoder(data []byte) int { } // encode will encode the block and append the output in b.output. -func (b *blockEnc) encode(raw bool) error { +func (b *blockEnc) encode(raw, rawAllLits bool) error { if len(b.sequences) == 0 { - return b.encodeLits(raw) + return b.encodeLits(rawAllLits) } // We want some difference if len(b.literals) > (b.size - (b.size >> 5)) { diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go index dc4378b640..2c4fca17fa 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytereader.go +++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go @@ -31,7 +31,8 @@ func (b *byteReader) overread() bool { // Int32 returns a little endian int32 starting at current offset. func (b byteReader) Int32() int32 { - b2 := b.b[b.off : b.off+4 : b.off+4] + b2 := b.b[b.off:] + b2 = b2[:4] v3 := int32(b2[3]) v2 := int32(b2[2]) v1 := int32(b2[1]) @@ -55,7 +56,20 @@ func (b byteReader) Uint32() uint32 { } return v } - b2 := b.b[b.off : b.off+4 : b.off+4] + b2 := b.b[b.off:] + b2 = b2[:4] + v3 := uint32(b2[3]) + v2 := uint32(b2[2]) + v1 := uint32(b2[1]) + v0 := uint32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// Uint32NC returns a little endian uint32 starting at current offset. +// The caller must be sure if there are at least 4 bytes left. +func (b byteReader) Uint32NC() uint32 { + b2 := b.b[b.off:] + b2 = b2[:4] v3 := uint32(b2[3]) v2 := uint32(b2[2]) v1 := uint32(b2[1]) diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 73ac3c630e..66b51bf2d3 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -23,17 +23,15 @@ type Decoder struct { // Unreferenced decoders, ready for use. decoders chan *blockDec - // Unreferenced decoders, ready for use. - frames chan *frameDec - // Streams ready to be decoded. stream chan decodeStream // Current read position used for Reader functionality. current decoderState - // Custom dictionaries - dicts map[uint32]struct{} + // Custom dictionaries. + // Always uses copies. + dicts map[uint32]dict // streamWg is the waitgroup for all streams streamWg sync.WaitGroup @@ -66,7 +64,7 @@ var ( // A Decoder can be used in two modes: // // 1) As a stream, or -// 2) For stateless decoding using DecodeAll or DecodeBuffer. +// 2) For stateless decoding using DecodeAll. // // Only a single stream can be decoded concurrently, but the same decoder // can run multiple concurrent stateless decodes. It is even possible to @@ -87,12 +85,19 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { d.current.output = make(chan decodeOutput, d.o.concurrent) d.current.flushed = true + // Transfer option dicts. + d.dicts = make(map[uint32]dict, len(d.o.dicts)) + for _, dc := range d.o.dicts { + d.dicts[dc.id] = dc + } + d.o.dicts = nil + // Create decoders d.decoders = make(chan *blockDec, d.o.concurrent) - d.frames = make(chan *frameDec, d.o.concurrent) for i := 0; i < d.o.concurrent; i++ { - d.frames <- newFrameDec(d.o) - d.decoders <- newBlockDec(d.o.lowMem) + dec := newBlockDec(d.o.lowMem) + dec.localFrame = newFrameDec(d.o) + d.decoders <- dec } if r == nil { @@ -169,7 +174,12 @@ func (d *Decoder) Reset(r io.Reader) error { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } b := bb.Bytes() - dst, err := d.DecodeAll(b, nil) + var dst []byte + if cap(d.current.b) > 0 { + dst = d.current.b + } + + dst, err := d.DecodeAll(b, dst[:0]) if err == nil { err = io.EOF } @@ -277,23 +287,31 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } // Grab a block decoder and frame decoder. - block, frame := <-d.decoders, <-d.frames + block := <-d.decoders + frame := block.localFrame defer func() { if debug { printf("re-adding decoder: %p", block) } - d.decoders <- block frame.rawInput = nil frame.bBuf = nil - d.frames <- frame + d.decoders <- block }() frame.bBuf = input for { + frame.history.reset() err := frame.reset(&frame.bBuf) if err == io.EOF { return dst, nil } + if frame.DictionaryID != nil { + dict, ok := d.dicts[*frame.DictionaryID] + if !ok { + return nil, ErrUnknownDictionary + } + frame.history.setDict(&dict) + } if err != nil { return dst, err } @@ -456,10 +474,19 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { br := readerWrapper{r: stream.r} decodeStream: for { + frame.history.reset() err := frame.reset(&br) if debug && err != nil { println("Frame decoder returned", err) } + if err == nil && frame.DictionaryID != nil { + dict, ok := d.dicts[*frame.DictionaryID] + if !ok { + err = ErrUnknownDictionary + } else { + frame.history.setDict(&dict) + } + } if err != nil { stream.output <- decodeOutput{ err: err, diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index 2ac9cd2dd3..284d384492 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -18,6 +18,7 @@ type decoderOptions struct { lowMem bool concurrent int maxDecodedSize uint64 + dicts []dict } func (o *decoderOptions) setDefault() { @@ -66,3 +67,18 @@ func WithDecoderMaxMemory(n uint64) DOption { return nil } } + +// WithDecoderDicts allows to register one or more dictionaries for the decoder. +// If several dictionaries with the same ID is provided the last one will be used. +func WithDecoderDicts(dicts ...[]byte) DOption { + return func(o *decoderOptions) error { + for _, b := range dicts { + d, err := loadDict(b) + if err != nil { + return err + } + o.dicts = append(o.dicts, *d) + } + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go new file mode 100644 index 0000000000..8eb6f6ba33 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -0,0 +1,104 @@ +package zstd + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/klauspost/compress/huff0" +) + +type dict struct { + id uint32 + + litDec *huff0.Scratch + llDec, ofDec, mlDec sequenceDec + offsets [3]int + content []byte +} + +var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec} + +// Load a dictionary as described in +// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format +func loadDict(b []byte) (*dict, error) { + // Check static field size. + if len(b) <= 8+(3*4) { + return nil, io.ErrUnexpectedEOF + } + d := dict{ + llDec: sequenceDec{fse: &fseDecoder{}}, + ofDec: sequenceDec{fse: &fseDecoder{}}, + mlDec: sequenceDec{fse: &fseDecoder{}}, + } + if !bytes.Equal(b[:4], dictMagic[:]) { + return nil, ErrMagicMismatch + } + d.id = binary.LittleEndian.Uint32(b[4:8]) + if d.id == 0 { + return nil, errors.New("dictionaries cannot have ID 0") + } + + // Read literal table + var err error + d.litDec, b, err = huff0.ReadTable(b[8:], nil) + if err != nil { + return nil, err + } + + br := byteReader{ + b: b, + off: 0, + } + readDec := func(i tableIndex, dec *fseDecoder) error { + if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil { + return err + } + if br.overread() { + return io.ErrUnexpectedEOF + } + err = dec.transform(symbolTableX[i]) + if err != nil { + println("Transform table error:", err) + return err + } + if debug { + println("Read table ok", "symbolLen:", dec.symbolLen) + } + // Set decoders as predefined so they aren't reused. + dec.preDefined = true + return nil + } + + if err := readDec(tableOffsets, d.ofDec.fse); err != nil { + return nil, err + } + if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil { + return nil, err + } + if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil { + return nil, err + } + if br.remain() < 12 { + return nil, io.ErrUnexpectedEOF + } + + d.offsets[0] = int(br.Uint32()) + br.advance(4) + d.offsets[1] = int(br.Uint32()) + br.advance(4) + d.offsets[2] = int(br.Uint32()) + br.advance(4) + if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 { + return nil, errors.New("invalid offset in dictionary") + } + d.content = make([]byte, br.remain()) + copy(d.content, br.unread()) + if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) { + return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets) + } + + return &d, nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go new file mode 100644 index 0000000000..c120d90548 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -0,0 +1,518 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import "fmt" + +const ( + betterLongTableBits = 19 // Bits used in the long match table + betterLongTableSize = 1 << betterLongTableBits // Size of the table + + // Note: Increasing the short table bits or making the hash shorter + // can actually lead to compression degradation since it will 'steal' more from the + // long match table and match offsets are quite big. + // This greatly depends on the type of input. + betterShortTableBits = 13 // Bits used in the short match table + betterShortTableSize = 1 << betterShortTableBits // Size of the table +) + +type prevEntry struct { + offset int32 + prev int32 +} + +// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. +// The long match table contains the previous entry with the same hash, +// effectively making it a "chain" of length 2. +// When we find a long match we choose between the two values and select the longest. +// When we find a short match, after checking the long, we check if we can find a long at n+1 +// and that it is longer (lazy matching). +type betterFastEncoder struct { + fastBase + table [betterShortTableSize]tableEntry + longTable [betterLongTableSize]prevEntry +} + +// Encode improves compression... +func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 9 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + var matched int32 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + off := s + e.cur + e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + repOff + s += lenght + repOff + + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + continue + } + const repOff2 = 1 + + // We deviate from the reference encoder and also check offset 2. + // Still slower and not much better, so disabled. + // repIndex = s - offset2 + repOff2 + if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { + // Consider history as well. + var seq seq + lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 2 + seq.offset = 2 + if debugSequences { + println("repeat sequence 2", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + index0 := s + repOff2 + s += lenght + repOff2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + // Swap offsets + offset1, offset2 = offset2, offset1 + continue + } + } + // Find the offsets of our two matches. + coffsetL := candidateL.offset - e.cur + coffsetLP := candidateL.prev - e.cur + + // Check if we have a long match. + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetL+8, src) + 8 + t = coffsetL + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 + if prevMatch > matched { + matched = prevMatch + t = coffsetLP + } + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + } + break + } + + // Check if we have a long match on prev. + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetLP+8, src) + 8 + t = coffsetLP + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + coffsetS := candidateS.offset - e.cur + + // Check if we have a short match. + if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + matched = e.matchlen(s+4, coffsetS+4, src) + 4 + + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, betterLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = candidateL.offset - e.cur + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("long match (after short)") + } + break + } + } + + // Check prev long... + coffsetL = candidateL.prev - e.cur + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("prev long match (after short)") + } + break + } + } + t = coffsetS + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := matched + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.Encode(blk, src) +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 0ffea76554..50276bcde7 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -80,10 +80,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { sLimit := int32(len(src)) - inputMargin // stepSize is the number of bytes to skip on every main loop iteration. // It should be >= 1. - stepSize := int32(e.o.targetLength) - if stepSize == 0 { - stepSize++ - } + const stepSize = 1 const kSearchStrength = 8 @@ -172,55 +169,6 @@ encodeLoop: cv = load6432(src, s) continue } - const repOff2 = 1 - // We deviate from the reference encoder and also check offset 2. - // Slower and not consistently better, so disabled. - // repIndex = s - offset2 + repOff2 - if false && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff2*8)) { - // Consider history as well. - var seq seq - lenght := 4 + e.matchlen(s+4+repOff2, repIndex+4, src) - - seq.matchLen = uint32(lenght - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 2 - seq.offset = 2 - if debugSequences { - println("repeat sequence 2", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 - nextEmit = s - if s >= sLimit { - if debug { - println("repeat ended", s, lenght) - - } - break encodeLoop - } - cv = load6432(src, s) - // Swap offsets - offset1, offset2 = offset2, offset1 - continue - } } // Find the offsets of our two matches. coffsetL := s - (candidateL.offset - e.cur) @@ -372,7 +320,7 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv1>>8, dFastShortTableBits) + nextHashS := hash5(cv, dFastShortTableBits) nextHashL := hash8(cv, dFastLongTableBits) // We have at least 4 byte match. @@ -450,10 +398,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { sLimit := int32(len(src)) - inputMargin // stepSize is the number of bytes to skip on every main loop iteration. // It should be >= 1. - stepSize := int32(e.o.targetLength) - if stepSize == 0 { - stepSize++ - } + const stepSize = 1 const kSearchStrength = 8 @@ -726,4 +671,8 @@ encodeLoop: println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } + // We do not store history, so we must offset e.cur to avoid false matches for next user. + if e.cur < bufferReset { + e.cur += int32(len(src)) + } } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 28134b1589..4104b456ce 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -6,6 +6,7 @@ package zstd import ( "fmt" + "math" "math/bits" "github.com/klauspost/compress/zstd/internal/xxhash" @@ -23,26 +24,29 @@ type tableEntry struct { offset int32 } -type fastEncoder struct { - o encParams +type fastBase struct { // cur is the offset at the start of hist cur int32 // maximum offset. Should be at least 2x block size. maxMatchOff int32 hist []byte crc *xxhash.Digest - table [tableSize]tableEntry tmp [8]byte blk *blockEnc } +type fastEncoder struct { + fastBase + table [tableSize]tableEntry +} + // CRC returns the underlying CRC writer. -func (e *fastEncoder) CRC() *xxhash.Digest { +func (e *fastBase) CRC() *xxhash.Digest { return e.crc } // AppendCRC will append the CRC to the destination slice and return it. -func (e *fastEncoder) AppendCRC(dst []byte) []byte { +func (e *fastBase) AppendCRC(dst []byte) []byte { crc := e.crc.Sum(e.tmp[:0]) dst = append(dst, crc[7], crc[6], crc[5], crc[4]) return dst @@ -50,7 +54,7 @@ func (e *fastEncoder) AppendCRC(dst []byte) []byte { // WindowSize returns the window size of the encoder, // or a window size small enough to contain the input size, if > 0. -func (e *fastEncoder) WindowSize(size int) int32 { +func (e *fastBase) WindowSize(size int) int32 { if size > 0 && size < int(e.maxMatchOff) { b := int32(1) << uint(bits.Len(uint(size))) // Keep minimum window. @@ -63,7 +67,7 @@ func (e *fastEncoder) WindowSize(size int) int32 { } // Block returns the current block. -func (e *fastEncoder) Block() *blockEnc { +func (e *fastBase) Block() *blockEnc { return e.blk } @@ -112,11 +116,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { sLimit := int32(len(src)) - inputMargin // stepSize is the number of bytes to skip on every main loop iteration. // It should be >= 2. - stepSize := int32(e.o.targetLength) - if stepSize == 0 { - stepSize++ - } - stepSize++ + const stepSize = 2 // TEMPLATE const hashLog = tableBits @@ -169,9 +169,22 @@ encodeLoop: if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+6, repIndex+4, src) + var length int32 + // length = 4 + e.matchlen(s+6, repIndex+4, src) + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -197,11 +210,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -257,7 +270,20 @@ encodeLoop: } // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 + //l := e.matchlen(s+4, t+4, src) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -294,7 +320,20 @@ encodeLoop: if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) + //l := 4 + e.matchlen(s+4, o2+4, src) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -344,6 +383,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { panic("src too big") } } + // Protect against e.cur wraparound. if e.cur >= bufferReset { for i := range e.table[:] { @@ -412,10 +452,23 @@ encodeLoop: if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - // lenght := 4 + e.matchlen(s+6, repIndex+4, src) - lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + // length := 4 + e.matchlen(s+6, repIndex+4, src) + // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + var length int32 + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -441,11 +494,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -464,6 +517,9 @@ encodeLoop: if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } + if debugAsserts && t < 0 { + panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff)) + } break } @@ -496,9 +552,25 @@ encodeLoop: panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } + if debugAsserts && t < 0 { + panic(fmt.Sprintf("t (%d) < 0 ", t)) + } // Extend the 4-byte match as long as possible. //l := e.matchlenNoHist(s+4, t+4, src) + 4 - l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -536,7 +608,20 @@ encodeLoop: // We have at least 4 byte match. // No need to check backwards. We come straight from a match //l := 4 + e.matchlenNoHist(s+4, o2+4, src) - l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -569,9 +654,13 @@ encodeLoop: if debug { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } + // We do not store history, so we must offset e.cur to avoid false matches for next user. + if e.cur < bufferReset { + e.cur += int32(len(src)) + } } -func (e *fastEncoder) addBlock(src []byte) int32 { +func (e *fastBase) addBlock(src []byte) int32 { if debugAsserts && e.cur > bufferReset { panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) } @@ -602,17 +691,17 @@ func (e *fastEncoder) addBlock(src []byte) int32 { // useBlock will replace the block with the provided one, // but transfer recent offsets from the previous. -func (e *fastEncoder) UseBlock(enc *blockEnc) { +func (e *fastBase) UseBlock(enc *blockEnc) { enc.reset(e.blk) e.blk = enc } -func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 { +func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { // Extend the match to be as long as possible. return int32(matchLen(src[s:], src[t:])) } -func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { +func (e *fastBase) matchlen(s, t int32, src []byte) int32 { if debugAsserts { if s < 0 { err := fmt.Sprintf("s (%d) < 0", s) @@ -626,18 +715,17 @@ func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) panic(err) } - } - s1 := int(s) + maxMatchLength - 4 - if s1 > len(src) { - s1 = len(src) + if len(src)-int(s) > maxCompressedBlockSize { + panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) + } } // Extend the match to be as long as possible. - return int32(matchLen(src[s:s1], src[t:])) + return int32(matchLen(src[s:], src[t:])) } // Reset the encoding table. -func (e *fastEncoder) Reset() { +func (e *fastBase) Reset(singleBlock bool) { if e.blk == nil { e.blk = &blockEnc{} e.blk.init() @@ -650,7 +738,7 @@ func (e *fastEncoder) Reset() { } else { e.crc.Reset() } - if cap(e.hist) < int(e.maxMatchOff*2) { + if !singleBlock && cap(e.hist) < int(e.maxMatchOff*2) { l := e.maxMatchOff * 2 // Make it at least 1MB. if l < 1<<20 { diff --git a/vendor/github.com/klauspost/compress/zstd/enc_params.go b/vendor/github.com/klauspost/compress/zstd/enc_params.go index b6779ecb6d..d874116f71 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_params.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_params.go @@ -4,6 +4,8 @@ package zstd +/* +// encParams are not really used, just here for reference. type encParams struct { // largest match distance : larger == more compression, more memory needed during decompression windowLog uint8 @@ -152,3 +154,4 @@ var defEncParams = [4][]encParams{ {14, 15, 15, 10, 3, 999, strategyBtultra2}, // level 22. }, } +*/ diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 4032fb9fc8..95ebc3d84e 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -35,21 +35,22 @@ type encoder interface { AppendCRC([]byte) []byte WindowSize(size int) int32 UseBlock(*blockEnc) - Reset() + Reset(singleBlock bool) } type encoderState struct { - w io.Writer - filling []byte - current []byte - previous []byte - encoder encoder - writing *blockEnc - err error - writeErr error - nWritten int64 - headerWritten bool - eofWritten bool + w io.Writer + filling []byte + current []byte + previous []byte + encoder encoder + writing *blockEnc + err error + writeErr error + nWritten int64 + headerWritten bool + eofWritten bool + fullFrameWritten bool // This waitgroup indicates an encode is running. wg sync.WaitGroup @@ -71,27 +72,26 @@ func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) { } if w != nil { e.Reset(w) - } else { - e.init.Do(func() { - e.initialize() - }) } return &e, nil } func (e *Encoder) initialize() { + if e.o.concurrent == 0 { + e.o.setDefault() + } e.encoders = make(chan encoder, e.o.concurrent) for i := 0; i < e.o.concurrent; i++ { - e.encoders <- e.o.encoder() + enc := e.o.encoder() + // If not single block, history will be allocated on first use. + enc.Reset(true) + e.encoders <- enc } } // Reset will re-initialize the writer and new writes will encode to the supplied writer // as a new, independent stream. func (e *Encoder) Reset(w io.Writer) { - e.init.Do(func() { - e.initialize() - }) s := &e.state s.wg.Wait() s.wWg.Wait() @@ -115,9 +115,10 @@ func (e *Encoder) Reset(w io.Writer) { s.filling = s.filling[:0] s.current = s.current[:0] s.previous = s.previous[:0] - s.encoder.Reset() + s.encoder.Reset(false) s.headerWritten = false s.eofWritten = false + s.fullFrameWritten = false s.w = w s.err = nil s.nWritten = 0 @@ -176,6 +177,23 @@ func (e *Encoder) nextBlock(final bool) error { return fmt.Errorf("block > maxStoreBlockSize") } if !s.headerWritten { + // If we have a single block encode, do a sync compression. + if final && len(s.filling) > 0 { + s.current = e.EncodeAll(s.filling, s.current[:0]) + var n2 int + n2, s.err = s.w.Write(s.current) + if s.err != nil { + return s.err + } + s.nWritten += int64(n2) + s.current = s.current[:0] + s.filling = s.filling[:0] + s.headerWritten = true + s.fullFrameWritten = true + s.eofWritten = true + return nil + } + var tmp [maxHeaderSize]byte fh := frameHeader{ ContentSize: 0, @@ -263,7 +281,7 @@ func (e *Encoder) nextBlock(final bool) error { // If we got the exact same number of literals as input, // assume the literals cannot be compressed. if len(src) != len(blk.literals) || len(src) != e.o.blockSize { - err = blk.encode(e.o.noEntropy) + err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) } switch err { case errIncompressible: @@ -298,7 +316,9 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { src := e.state.filling for { n2, err := r.Read(src) - _, _ = e.state.encoder.CRC().Write(src[:n2]) + if e.o.crc { + _, _ = e.state.encoder.CRC().Write(src[:n2]) + } // src is now the unfilled part... src = src[n2:] n += int64(n2) @@ -363,6 +383,9 @@ func (e *Encoder) Close() error { if err != nil { return err } + if e.state.fullFrameWritten { + return s.err + } s.wg.Wait() s.wWg.Wait() @@ -422,18 +445,14 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(func() { - e.o.setDefault() - e.initialize() - }) + e.init.Do(e.initialize) enc := <-e.encoders defer func() { // Release encoder reference to last block. - enc.Reset() + // If a non-single block is needed the encoder will reset again. + enc.Reset(true) e.encoders <- enc }() - enc.Reset() - blk := enc.Block() // Use single segments when above minimum window and below 1MB. single := len(src) < 1<<20 && len(src) > MinWindowSize if e.o.single != nil { @@ -456,12 +475,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { panic(err) } - if len(src) <= e.o.blockSize && len(src) <= maxBlockSize { + // If we can do everything in one block, prefer that. + if len(src) <= maxCompressedBlockSize { // Slightly faster with no history and everything in one block. if e.o.crc { _, _ = enc.CRC().Write(src) } - blk.reset(nil) + blk := enc.Block() blk.last = true enc.EncodeNoHist(blk, src) @@ -472,7 +492,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(blk.literals) != len(src) || len(src) != e.o.blockSize { // Output directly to dst blk.output = dst - err = blk.encode(e.o.noEntropy) + err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) } switch err { @@ -488,6 +508,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } blk.output = oldout } else { + enc.Reset(false) + blk := enc.Block() for len(src) > 0 { todo := src if len(todo) > e.o.blockSize { @@ -507,7 +529,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // If we got the exact same number of literals as input, // assume the literals cannot be compressed. if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { - err = blk.encode(e.o.noEntropy) + err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) } switch err { diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 40eb457331..dfac14ddde 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -12,15 +12,18 @@ type EOption func(*encoderOptions) error // options retains accumulated state of multiple options. type encoderOptions struct { - concurrent int - crc bool - single *bool - pad int - blockSize int - windowSize int - level EncoderLevel - fullZero bool - noEntropy bool + concurrent int + level EncoderLevel + single *bool + pad int + blockSize int + windowSize int + crc bool + fullZero bool + noEntropy bool + allLitEntropy bool + customWindow bool + customALEntropy bool } func (o *encoderOptions) setDefault() { @@ -30,7 +33,7 @@ func (o *encoderOptions) setDefault() { crc: true, single: nil, blockSize: 1 << 16, - windowSize: 1 << 22, + windowSize: 8 << 20, level: SpeedDefault, } } @@ -39,9 +42,11 @@ func (o *encoderOptions) setDefault() { func (o encoderOptions) encoder() encoder { switch o.level { case SpeedDefault: - return &doubleFastEncoder{fastEncoder: fastEncoder{maxMatchOff: int32(o.windowSize)}} + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} + case SpeedBetterCompression: + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} case SpeedFastest: - return &fastEncoder{maxMatchOff: int32(o.windowSize)} + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} } panic("unknown compression level") } @@ -67,7 +72,7 @@ func WithEncoderConcurrency(n int) EOption { } // WithWindowSize will set the maximum allowed back-reference distance. -// The value must be a power of two between WindowSizeMin and WindowSizeMax. +// The value must be a power of two between MinWindowSize and MaxWindowSize. // A larger value will enable better compression but allocate more memory and, // for above-default values, take considerably longer. // The default value is determined by the compression level. @@ -83,6 +88,7 @@ func WithWindowSize(n int) EOption { } o.windowSize = n + o.customWindow = true if o.blockSize > o.windowSize { o.blockSize = o.windowSize } @@ -130,18 +136,18 @@ const ( // This is roughly equivalent to the default Zstandard mode (level 3). SpeedDefault + // SpeedBetterCompression will yield better compression than the default. + // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage. + // By using this, notice that CPU usage may go up in the future. + SpeedBetterCompression + // speedLast should be kept as the last actual compression option. // The is not for external usage, but is used to keep track of the valid options. speedLast - // SpeedBetterCompression will (in the future) yield better compression than the default, - // but at approximately 4x the CPU usage of the default. - // For now this is not implemented. - SpeedBetterCompression = SpeedDefault - // SpeedBestCompression will choose the best available compression option. // For now this is not implemented. - SpeedBestCompression = SpeedDefault + SpeedBestCompression = SpeedBetterCompression ) // EncoderLevelFromString will convert a string representation of an encoding level back @@ -163,8 +169,10 @@ func EncoderLevelFromZstd(level int) EncoderLevel { switch { case level < 3: return SpeedFastest - case level >= 3: + case level >= 3 && level < 6: return SpeedDefault + case level > 5: + return SpeedBetterCompression } return SpeedDefault } @@ -176,6 +184,8 @@ func (e EncoderLevel) String() string { return "fastest" case SpeedDefault: return "default" + case SpeedBetterCompression: + return "better" default: return "invalid" } @@ -189,6 +199,20 @@ func WithEncoderLevel(l EncoderLevel) EOption { return fmt.Errorf("unknown encoder level") } o.level = l + if !o.customWindow { + switch o.level { + case SpeedFastest: + o.windowSize = 4 << 20 + case SpeedDefault: + o.windowSize = 8 << 20 + case SpeedBetterCompression: + o.windowSize = 16 << 20 + } + } + if !o.customALEntropy { + o.allLitEntropy = l > SpeedFastest + } + return nil } } @@ -203,6 +227,18 @@ func WithZeroFrames(b bool) EOption { } } +// WithAllLitEntropyCompression will apply entropy compression if no matches are found. +// Disabling this will skip incompressible data faster, but in cases with no matches but +// skewed character distribution compression is lost. +// Default value depends on the compression level selected. +func WithAllLitEntropyCompression(b bool) EOption { + return func(o *encoderOptions) error { + o.customALEntropy = true + o.allLitEntropy = b + return nil + } +} + // WithNoEntropyCompression will always skip entropy compression of literals. // This can be useful if content has matches, but unlikely to benefit from entropy // compression. Usually the slight speed improvement is not worth enabling this. diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index cda590b5f7..fc4a566d39 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -16,16 +16,11 @@ import ( ) type frameDec struct { - o decoderOptions - crc hash.Hash64 - frameDone sync.WaitGroup - offset int64 + o decoderOptions + crc hash.Hash64 + offset int64 - WindowSize uint64 - DictionaryID uint32 - FrameContentSize uint64 - HasCheckSum bool - SingleSegment bool + WindowSize uint64 // maxWindowSize is the maximum windows size to support. // should never be bigger than max-int. @@ -42,9 +37,16 @@ type frameDec struct { // Byte buffer that can be reused for small input blocks. bBuf byteBuf + FrameContentSize uint64 + frameDone sync.WaitGroup + + DictionaryID *uint32 + HasCheckSum bool + SingleSegment bool + // asyncRunning indicates whether the async routine processes input on 'decoding'. - asyncRunning bool asyncRunningMu sync.Mutex + asyncRunning bool } const ( @@ -140,7 +142,7 @@ func (d *frameDec) reset(br byteBuffer) error { // Read Dictionary_ID // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id - d.DictionaryID = 0 + d.DictionaryID = nil if size := fhd & 3; size != 0 { if size == 3 { size = 4 @@ -152,19 +154,22 @@ func (d *frameDec) reset(br byteBuffer) error { } return io.ErrUnexpectedEOF } + var id uint32 switch size { case 1: - d.DictionaryID = uint32(b[0]) + id = uint32(b[0]) case 2: - d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) + id = uint32(b[0]) | (uint32(b[1]) << 8) case 4: - d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) } if debug { - println("Dict size", size, "ID:", d.DictionaryID) + println("Dict size", size, "ID:", id) } - if d.DictionaryID != 0 { - return ErrUnknownDictionary + if id > 0 { + // ID 0 means "sorry, no dictionary anyway". + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format + d.DictionaryID = &id } } @@ -231,7 +236,11 @@ func (d *frameDec) reset(br byteBuffer) error { return ErrWindowSizeTooSmall } d.history.windowSize = int(d.WindowSize) - d.history.maxSize = d.history.windowSize + maxBlockSize + if d.o.lowMem && d.history.windowSize < maxBlockSize { + d.history.maxSize = d.history.windowSize * 2 + } else { + d.history.maxSize = d.history.windowSize + maxBlockSize + } // history contains input - maybe we do something d.rawInput = br return nil @@ -318,8 +327,8 @@ func (d *frameDec) checkCRC() error { func (d *frameDec) initAsync() { if !d.o.lowMem && !d.SingleSegment { - // set max extra size history to 20MB. - d.history.maxSize = d.history.windowSize + maxBlockSize*10 + // set max extra size history to 10MB. + d.history.maxSize = d.history.windowSize + maxBlockSize*5 } // re-alloc if more than one extra block size. if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize { @@ -345,8 +354,6 @@ func (d *frameDec) initAsync() { // When the frame has finished decoding the *bufio.Reader // containing the remaining input will be sent on frameDec.frameDone. func (d *frameDec) startDecoder(output chan decodeOutput) { - // TODO: Init to dictionary - d.history.reset() written := int64(0) defer func() { @@ -439,8 +446,6 @@ func (d *frameDec) startDecoder(output chan decodeOutput) { // runDecoder will create a sync decoder that will decode a block of data. func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { - // TODO: Init to dictionary - d.history.reset() saved := d.history.b // We use the history for output to avoid copying it. diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index e002be98b9..e6d3d49b39 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -19,7 +19,7 @@ const ( * Increasing memory usage improves compression ratio * Reduced memory usage can improve speed, due to cache effect * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ - maxMemoryUsage = 11 + maxMemoryUsage = tablelogAbsoluteMax + 2 maxTableLog = maxMemoryUsage - 2 maxTablesize = 1 << maxTableLog @@ -55,7 +55,7 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { if b.remain() < 4 { return errors.New("input too small") } - bitStream := b.Uint32() + bitStream := b.Uint32NC() nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog if nbBits > tablelogAbsoluteMax { println("Invalid tablelog:", nbBits) @@ -79,7 +79,8 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { n0 += 24 if r := b.remain(); r > 5 { b.advance(2) - bitStream = b.Uint32() >> bitCount + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> bitCount } else { // end of bit stream bitStream >>= 16 @@ -104,10 +105,11 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { charnum++ } - if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { + if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 { b.advance(bitCount >> 3) bitCount &= 7 - bitStream = b.Uint32() >> bitCount + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> bitCount } else { bitStream >>= 2 } @@ -148,17 +150,16 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { threshold >>= 1 } - //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "remain:", b.remain()) - if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { + if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 { b.advance(bitCount >> 3) bitCount &= 7 + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> (bitCount & 31) } else { bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) b.off = len(b.b) - 4 - //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "iend", iend) + bitStream = b.Uint32() >> (bitCount & 31) } - bitStream = b.Uint32() >> (bitCount & 31) - //printf("bitstream is now: 0b%b", bitStream) } s.symbolLen = charnum if s.symbolLen <= 1 { diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go index e8c419bd53..f418f50fcd 100644 --- a/vendor/github.com/klauspost/compress/zstd/history.go +++ b/vendor/github.com/klauspost/compress/zstd/history.go @@ -17,6 +17,7 @@ type history struct { windowSize int maxSize int error bool + dict *dict } // reset will reset the history to initial state of a frame. @@ -36,12 +37,27 @@ func (h *history) reset() { } h.decoders = sequenceDecs{} if h.huffTree != nil { - huffDecoderPool.Put(h.huffTree) + if h.dict == nil || h.dict.litDec != h.huffTree { + huffDecoderPool.Put(h.huffTree) + } } h.huffTree = nil + h.dict = nil //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b)) } +func (h *history) setDict(dict *dict) { + if dict == nil { + return + } + h.dict = dict + h.decoders.litLengths = dict.llDec + h.decoders.offsets = dict.ofDec + h.decoders.matchLengths = dict.mlDec + h.recentOffsets = dict.offsets + h.huffTree = dict.litDec +} + // append bytes to history. // This function will make sure there is space for it, // if the buffer has been allocated with enough extra space. diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index 15a45f7b50..7ff870400d 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -62,8 +62,10 @@ type sequenceDecs struct { matchLengths sequenceDec prevOffset [3]int hist []byte + dict []byte literals []byte out []byte + windowSize int maxBits uint8 } @@ -82,7 +84,12 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out [] s.hist = hist.b s.prevOffset = hist.recentOffsets s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits + s.windowSize = hist.windowSize s.out = out + s.dict = nil + if hist.dict != nil { + s.dict = hist.dict.content + } return nil } @@ -98,23 +105,78 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { printf("reading sequence %d, exceeded available data\n", seqs-i) return io.ErrUnexpectedEOF } - var litLen, matchOff, matchLen int + var ll, mo, ml int if br.off > 4+((maxOffsetBits+16+16)>>3) { - litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState) + // inlined function: + // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) + + // Final will not read from stream. + var llB, mlB, moB uint8 + ll, llB = llState.final() + ml, mlB = mlState.final() + mo, moB = ofState.final() + + // extra bits are stored in reverse order. + br.fillFast() + mo += br.getBits(moB) + if s.maxBits > 32 { + br.fillFast() + } + ml += br.getBits(mlB) + ll += br.getBits(llB) + + if moB > 1 { + s.prevOffset[2] = s.prevOffset[1] + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = mo + } else { + // mo = s.adjustOffset(mo, ll, moB) + // Inlined for rather big speedup + if ll == 0 { + // There is an exception though, when current sequence's literals_length = 0. + // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, + // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. + mo++ + } + + if mo == 0 { + mo = s.prevOffset[0] + } else { + var temp int + if mo == 3 { + temp = s.prevOffset[0] - 1 + } else { + temp = s.prevOffset[mo] + } + + if temp == 0 { + // 0 is not valid; input is corrupted; force offset to 1 + println("temp was 0") + temp = 1 + } + + if mo != 1 { + s.prevOffset[2] = s.prevOffset[1] + } + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = temp + mo = temp + } + } br.fillFast() } else { - litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState) + ll, mo, ml = s.next(br, llState, mlState, ofState) br.fill() } if debugSequences { - println("Seq", seqs-i-1, "Litlen:", litLen, "matchOff:", matchOff, "(abs) matchLen:", matchLen) + println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) } - if litLen > len(s.literals) { - return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", litLen, len(s.literals)) + if ll > len(s.literals) { + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals)) } - size := litLen + matchLen + len(s.out) + size := ll + ml + len(s.out) if size-startSize > maxBlockSize { return fmt.Errorf("output (%d) bigger than max block size", size) } @@ -125,49 +187,70 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { s.out = append(s.out, make([]byte, maxBlockSize)...) s.out = s.out[:len(s.out)-maxBlockSize] } - if matchLen > maxMatchLen { - return fmt.Errorf("match len (%d) bigger than max allowed length", matchLen) - } - if matchOff > len(s.out)+len(hist)+litLen { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", matchOff, len(s.out)+len(hist)+litLen) - } - if matchOff == 0 && matchLen > 0 { - return fmt.Errorf("zero matchoff and matchlen > 0") + if ml > maxMatchLen { + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) } - s.out = append(s.out, s.literals[:litLen]...) - s.literals = s.literals[litLen:] + // Add literals + s.out = append(s.out, s.literals[:ll]...) + s.literals = s.literals[ll:] out := s.out + if mo > len(s.out)+len(hist) || mo > s.windowSize { + if len(s.dict) == 0 { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + } + + // we may be in dictionary. + dictO := len(s.dict) - (mo - (len(s.out) + len(hist))) + if dictO < 0 || dictO >= len(s.dict) { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + } + end := dictO + ml + if end > len(s.dict) { + out = append(out, s.dict[dictO:]...) + mo -= len(s.dict) - dictO + ml -= len(s.dict) - dictO + } else { + out = append(out, s.dict[dictO:end]...) + mo = 0 + ml = 0 + } + } + + if mo == 0 && ml > 0 { + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + } + // Copy from history. // TODO: Blocks without history could be made to ignore this completely. - if v := matchOff - len(s.out); v > 0 { + if v := mo - len(s.out); v > 0 { // v is the start position in history from end. start := len(s.hist) - v - if matchLen > v { + if ml > v { // Some goes into current block. // Copy remainder of history out = append(out, s.hist[start:]...) - matchOff -= v - matchLen -= v + mo -= v + ml -= v } else { - out = append(out, s.hist[start:start+matchLen]...) - matchLen = 0 + out = append(out, s.hist[start:start+ml]...) + ml = 0 } } // We must be in current buffer now - if matchLen > 0 { - start := len(s.out) - matchOff - if matchLen <= len(s.out)-start { + if ml > 0 { + start := len(s.out) - mo + if ml <= len(s.out)-start { // No overlap - out = append(out, s.out[start:start+matchLen]...) + out = append(out, s.out[start:start+ml]...) } else { // Overlapping copy // Extend destination slice and copy one byte at the time. - out = out[:len(out)+matchLen] - src := out[start : start+matchLen] + out = out[:len(out)+ml] + src := out[start : start+ml] // Destination is the space we just added. - dst := out[len(out)-matchLen:] + dst := out[len(out)-ml:] dst = dst[:len(src)] for i := range src { dst[i] = src[i] diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 356956ba25..690428cd24 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -178,7 +178,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { r.err = ErrSnappyCorrupt return written, r.err } - err = r.block.encode(false) + err = r.block.encode(false, false) switch err { case errIncompressible: r.block.popOffsets() diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 5e0b64cccf..0807719c8b 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -87,6 +87,17 @@ func printf(format string, a ...interface{}) { } } +// matchLenFast does matching, but will not match the last up to 7 bytes. +func matchLenFast(a, b []byte) int { + endI := len(a) & (math.MaxInt32 - 7) + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + return i + bits.TrailingZeros64(diff)>>3 + } + } + return endI +} + // matchLen returns the maximum length. // a must be the shortest of the two. // The function also returns whether all bytes matched. @@ -97,33 +108,18 @@ func matchLen(a, b []byte) int { return i + (bits.TrailingZeros64(diff) >> 3) } } + checked := (len(a) >> 3) << 3 a = a[checked:] b = b[checked:] - // TODO: We could do a 4 check. for i := range a { if a[i] != b[i] { - return int(i) + checked + return i + checked } } return len(a) + checked } -// matchLen returns a match length in src between index s and t -func matchLenIn(src []byte, s, t int32) int32 { - s1 := len(src) - b := src[t:] - a := src[s:s1] - b = b[:len(a)] - // Extend the match to be as long as possible. - for i := range a { - if a[i] != b[i] { - return int32(i) - } - } - return int32(len(a)) -} - func load3232(b []byte, i int32) uint32 { // Help the compiler eliminate bounds checks on the read so it can be done in a single read. b = b[i:] |