diff options
author | 6543 <6543@obermui.de> | 2021-06-10 16:44:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-10 16:44:25 +0200 |
commit | 86e2789960439ca786c6e7a74f85f076c223d148 (patch) | |
tree | c63064d4258bcf2bb7a2e5e60acc0a5d5c8216da /vendor/github.com/klauspost/compress | |
parent | f088dc4ea14ae6304c2e06fca06ffa2f7bc273f3 (diff) | |
download | gitea-86e2789960439ca786c6e7a74f85f076c223d148.tar.gz gitea-86e2789960439ca786c6e7a74f85f076c223d148.zip |
Vendor Update (#16121)
* update github.com/PuerkitoBio/goquery
* update github.com/alecthomas/chroma
* update github.com/blevesearch/bleve/v2
* update github.com/caddyserver/certmagic
* update github.com/go-enry/go-enry/v2
* update github.com/go-git/go-billy/v5
* update github.com/go-git/go-git/v5
* update github.com/go-redis/redis/v8
* update github.com/go-testfixtures/testfixtures/v3
* update github.com/jaytaylor/html2text
* update github.com/json-iterator/go
* update github.com/klauspost/compress
* update github.com/markbates/goth
* update github.com/mattn/go-isatty
* update github.com/mholt/archiver/v3
* update github.com/microcosm-cc/bluemonday
* update github.com/minio/minio-go/v7
* update github.com/prometheus/client_golang
* update github.com/unrolled/render
* update github.com/xanzy/go-gitlab
* update github.com/yuin/goldmark
* update github.com/yuin/goldmark-highlighting
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Diffstat (limited to 'vendor/github.com/klauspost/compress')
24 files changed, 722 insertions, 306 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 40b5802deb..5283ac5a53 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -644,7 +644,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.fill = (*compressor).fillBlock d.step = (*compressor).store case level == ConstantCompression: - d.w.logNewTablePenalty = 8 + d.w.logNewTablePenalty = 10 d.window = make([]byte, 32<<10) d.fill = (*compressor).fillBlock d.step = (*compressor).storeHuff diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index 678f081052..347ac2c902 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -45,7 +45,7 @@ const ( bTableBits = 17 // Bits used in the big tables bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history. + allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history. bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. ) diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go index db54be1398..3ad5e98072 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -6,6 +6,7 @@ package flate import ( "encoding/binary" + "fmt" "io" ) @@ -27,7 +28,7 @@ const ( // after which bytes are flushed to the writer. // Should preferably be a multiple of 6, since // we accumulate 6 bytes between writes to the buffer. - bufferFlushSize = 240 + bufferFlushSize = 246 // bufferSize is the actual output byte buffer size. // It must have additional headroom for a flush @@ -59,19 +60,31 @@ var offsetExtraBits = [64]int8{ 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, } -var offsetBase = [64]uint32{ - /* normal deflate */ - 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, - 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, - 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, - 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, - 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, - 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, +var offsetCombined = [32]uint32{} - /* extended window */ - 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, - 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, - 0x100000, 0x180000, 0x200000, 0x300000, +func init() { + var offsetBase = [64]uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, + 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, + 0x100000, 0x180000, 0x200000, 0x300000, + } + + for i := range offsetCombined[:] { + // Don't use extended window values... + if offsetBase[i] > 0x006000 { + continue + } + offsetCombined[i] = uint32(offsetExtraBits[i])<<16 | (offsetBase[i]) + } } // The odd order in which the codegen code sizes are written. @@ -88,15 +101,16 @@ type huffmanBitWriter struct { bits uint64 nbits uint16 nbytes uint8 + lastHuffMan bool literalEncoding *huffmanEncoder + tmpLitEncoding *huffmanEncoder offsetEncoding *huffmanEncoder codegenEncoding *huffmanEncoder err error lastHeader int // Set between 0 (reused block can be up to 2x the size) logNewTablePenalty uint - lastHuffMan bool - bytes [256]byte + bytes [256 + 8]byte literalFreq [lengthCodesStart + 32]uint16 offsetFreq [32]uint16 codegenFreq [codegenCodeCount]uint16 @@ -128,6 +142,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { return &huffmanBitWriter{ writer: w, literalEncoding: newHuffmanEncoder(literalCount), + tmpLitEncoding: newHuffmanEncoder(literalCount), codegenEncoding: newHuffmanEncoder(codegenCodeCount), offsetEncoding: newHuffmanEncoder(offsetCodeCount), } @@ -745,9 +760,31 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) offs := oeCodes[:32] lengths := leCodes[lengthCodesStart:] lengths = lengths[:32] + + // Go 1.16 LOVES having these on stack. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + for _, t := range tokens { if t < matchType { - w.writeCode(lits[t.literal()]) + //w.writeCode(lits[t.literal()]) + c := lits[t.literal()] + bits |= uint64(c.code) << nbits + nbits += c.len + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } continue } @@ -759,38 +796,99 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) } else { // inlined c := lengths[lengthCode&31] - w.bits |= uint64(c.code) << w.nbits - w.nbits += c.len - if w.nbits >= 48 { - w.writeOutBits() + bits |= uint64(c.code) << nbits + nbits += c.len + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } } } extraLengthBits := uint16(lengthExtraBits[lengthCode&31]) if extraLengthBits > 0 { + //w.writeBits(extraLength, extraLengthBits) extraLength := int32(length - lengthBase[lengthCode&31]) - w.writeBits(extraLength, extraLengthBits) + bits |= uint64(extraLength) << nbits + nbits += extraLengthBits + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } } // Write the offset offset := t.offset() - offsetCode := offsetCode(offset) + offsetCode := offset >> 16 + offset &= matchOffsetOnlyMask if false { w.writeCode(offs[offsetCode&31]) } else { // inlined - c := offs[offsetCode&31] - w.bits |= uint64(c.code) << w.nbits - w.nbits += c.len - if w.nbits >= 48 { - w.writeOutBits() + c := offs[offsetCode] + bits |= uint64(c.code) << nbits + nbits += c.len + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } } } - extraOffsetBits := uint16(offsetExtraBits[offsetCode&63]) - if extraOffsetBits > 0 { - extraOffset := int32(offset - offsetBase[offsetCode&63]) - w.writeBits(extraOffset, extraOffsetBits) + offsetComb := offsetCombined[offsetCode] + if offsetComb > 1<<16 { + //w.writeBits(extraOffset, extraOffsetBits) + bits |= uint64(offset&matchOffsetOnlyMask-(offsetComb&0xffff)) << nbits + nbits += uint16(offsetComb >> 16) + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } } } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + if deferEOB { w.writeCode(leCodes[endBlockMarker]) } @@ -825,13 +923,28 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { } } + // Fill is rarely better... + const fill = false + const numLiterals = endBlockMarker + 1 + const numOffsets = 1 + // Add everything as literals // We have to estimate the header size. // Assume header is around 70 bytes: // https://stackoverflow.com/a/25454430 const guessHeaderSizeBits = 70 * 8 - estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) - estBits += w.lastHeader + len(input)/32 + histogram(input, w.literalFreq[:numLiterals], fill) + w.literalFreq[endBlockMarker] = 1 + w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) + if fill { + // Clear fill... + for i := range w.literalFreq[:numLiterals] { + w.literalFreq[i] = 0 + } + histogram(input, w.literalFreq[:numLiterals], false) + } + estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) + estBits += w.lastHeader if w.lastHeader == 0 { estBits += guessHeaderSizeBits } @@ -839,33 +952,31 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { // Store bytes, if we don't get a reasonable improvement. ssize, storable := w.storedSize(input) - if storable && ssize < estBits { + if storable && ssize <= estBits { w.writeStoredHeader(len(input), eof) w.writeBytes(input) return } - reuseSize := 0 if w.lastHeader > 0 { - reuseSize = w.literalEncoding.bitLength(w.literalFreq[:256]) + reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256]) if estBits < reuseSize { + if debugDeflate { + //fmt.Println("not reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) + } // We owe an EOB w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 + } else if debugDeflate { + fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) } } - const numLiterals = endBlockMarker + 1 - const numOffsets = 1 + count := 0 if w.lastHeader == 0 { - if !eof && !sync { - // Generate a slightly suboptimal tree that can be used for all. - fillHist(w.literalFreq[:numLiterals]) - } - w.literalFreq[endBlockMarker] = 1 - w.literalEncoding.generate(w.literalFreq[:numLiterals], 15) - + // Use the temp encoding, so swap. + w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding // Generate codegen and codegenFrequencies, which indicates how to encode // the literalEncoding and the offsetEncoding. w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) @@ -876,34 +987,47 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) w.lastHuffMan = true w.lastHeader, _ = w.headerSize() + if debugDeflate { + count += w.lastHeader + fmt.Println("header:", count/8) + } } - encoding := w.literalEncoding.codes[:257] + encoding := w.literalEncoding.codes[:256] + // Go 1.16 LOVES having these on stack. At least 1.5x the speed. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes for _, t := range input { // Bitwriting inlined, ~30% speedup c := encoding[t] - w.bits |= uint64(c.code) << w.nbits - w.nbits += c.len - if w.nbits >= 48 { - bits := w.bits - w.bits >>= 48 - w.nbits -= 48 - n := w.nbytes - binary.LittleEndian.PutUint64(w.bytes[n:], bits) - n += 6 - if n >= bufferFlushSize { + bits |= uint64(c.code) << nbits + nbits += c.len + if debugDeflate { + count += int(c.len) + } + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { if w.err != nil { - n = 0 + nbytes = 0 return } - w.write(w.bytes[:n]) - n = 0 + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 } - w.nbytes = n } } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + + if debugDeflate { + fmt.Println("wrote", count/8, "bytes") + } if eof || sync { - w.writeCode(encoding[endBlockMarker]) + w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 w.lastHuffMan = false } diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go index 0d3445a1cc..67b2b38728 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -21,9 +21,13 @@ type hcode struct { } type huffmanEncoder struct { - codes []hcode - freqcache []literalNode - bitCount [17]int32 + codes []hcode + bitCount [17]int32 + + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + freqcache [literalCount + 1]literalNode } type literalNode struct { @@ -132,6 +136,21 @@ func (h *huffmanEncoder) bitLengthRaw(b []byte) int { return total } +// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused. +func (h *huffmanEncoder) canReuseBits(freq []uint16) int { + var total int + for i, f := range freq { + if f != 0 { + code := h.codes[i] + if code.len == 0 { + return math.MaxInt32 + } + total += int(f) * int(code.len) + } + } + return total +} + // Return the number of literals assigned to each bit size in the Huffman encoding // // This method is only called when list.length >= 3 @@ -291,12 +310,6 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // freq An array of frequencies, in which frequency[i] gives the frequency of literal i. // maxBits The maximum number of bits to use for any literal. func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { - if h.freqcache == nil { - // Allocate a reusable buffer with the longest possible frequency table. - // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. - // The largest of these is literalCount, so we allocate for that case. - h.freqcache = make([]literalNode, literalCount+1) - } list := h.freqcache[:len(freq)+1] // Number of non-zero literals count := 0 @@ -330,10 +343,14 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { h.assignEncodingAndSize(bitCount, list) } +// atLeastOne clamps the result between 1 and 15. func atLeastOne(v float32) float32 { if v < 1 { return 1 } + if v > 15 { + return 15 + } return v } @@ -346,31 +363,12 @@ func fillHist(b []uint16) { } } -// histogramSize accumulates a histogram of b in h. -// An estimated size in bits is returned. -// len(h) must be >= 256, and h's elements must be all zeroes. -func histogramSize(b []byte, h []uint16, fill bool) (bits int) { +func histogram(b []byte, h []uint16, fill bool) { h = h[:256] for _, t := range b { h[t]++ } - total := len(b) if fill { - for _, v := range h { - if v == 0 { - total++ - } - } + fillHist(h) } - - invTotal := 1.0 / float32(total) - shannon := float32(0.0) - for _, v := range h { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - } - } - - return int(shannon + 0.99) } diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index d513f1ffd3..293a3a320b 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -182,12 +182,27 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit // them as literal bytes. - // Extend the 4-byte match as long as possible. if l == 0 { + // Extend the 4-byte match as long as possible. l = e.matchlenLong(s+4, t+4, src) + 4 } else if l == maxMatchLength { l += e.matchlenLong(s+l, t+l, src) } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + // Test current + t2 := eLong - e.cur - l + off := s - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + } + // Extend backwards for t > 0 && s > nextEmit && src[t-1] == src[s-1] { s-- diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go index a52c80ea45..a709977ec4 100644 --- a/vendor/github.com/klauspost/compress/flate/level6.go +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -211,6 +211,31 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { l += e.matchlenLong(s+l, t+l, src) } + // Try to locate a better match by checking the end-of-match... + if sAt := s + l; sAt < sLimit { + eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] + // Test current + t2 := eLong.Cur.offset - e.cur - l + off := s - t2 + if off < maxMatchOffset { + if off > 0 && t2 >= 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + // Test next: + t2 = eLong.Prev.offset - e.cur - l + off := s - t2 + if off > 0 && off < maxMatchOffset && t2 >= 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + } + } + // Extend backwards for t > 0 && s > nextEmit && src[t-1] == src[s-1] { s-- diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go index f9abf606d6..eb862d7a92 100644 --- a/vendor/github.com/klauspost/compress/flate/token.go +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -13,14 +13,17 @@ import ( ) const ( + // From top // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused // 8 bits: xlength = length - MIN_MATCH_LENGTH - // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal - lengthShift = 22 - offsetMask = 1<<lengthShift - 1 - typeMask = 3 << 30 - literalType = 0 << 30 - matchType = 1 << 30 + // 5 bits offsetcode + // 16 bits xoffset = offset - MIN_OFFSET_SIZE, or literal + lengthShift = 22 + offsetMask = 1<<lengthShift - 1 + typeMask = 3 << 30 + literalType = 0 << 30 + matchType = 1 << 30 + matchOffsetOnlyMask = 0xffff ) // The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) @@ -187,7 +190,7 @@ func (t *tokens) indexTokens(in []token) { t.AddLiteral(tok.literal()) continue } - t.AddMatch(uint32(tok.length()), tok.offset()) + t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask) } } @@ -232,7 +235,7 @@ func (t *tokens) EstimatedBits() int { for _, v := range t.litHist[:] { if v > 0 { n := float32(v) - shannon += -mFastLog2(n*invTotal) * n + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n } } // Just add 15 for EOB @@ -240,7 +243,7 @@ func (t *tokens) EstimatedBits() int { for i, v := range t.extraHist[1 : literalCount-256] { if v > 0 { n := float32(v) - shannon += -mFastLog2(n*invTotal) * n + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n bits += int(lengthExtraBits[i&31]) * int(v) nMatches += int(v) } @@ -251,7 +254,7 @@ func (t *tokens) EstimatedBits() int { for i, v := range t.offHist[:offsetCodeCount] { if v > 0 { n := float32(v) - shannon += -mFastLog2(n*invTotal) * n + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n bits += int(offsetExtraBits[i&31]) * int(v) } } @@ -270,11 +273,13 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { panic(fmt.Errorf("invalid offset: %v", xoffset)) } } + oCode := offsetCode(xoffset) + xoffset |= oCode << 16 t.nLits++ - lengthCode := lengthCodes1[uint8(xlength)] & 31 + + t.extraHist[lengthCodes1[uint8(xlength)]]++ + t.offHist[oCode]++ t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset) - t.extraHist[lengthCode]++ - t.offHist[offsetCode(xoffset)&31]++ t.n++ } @@ -286,7 +291,8 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { panic(fmt.Errorf("invalid offset: %v", xoffset)) } } - oc := offsetCode(xoffset) & 31 + oc := offsetCode(xoffset) + xoffset |= oc << 16 for xlength > 0 { xl := xlength if xl > 258 { @@ -294,12 +300,11 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { xl = 258 - baseMatchLength } xlength -= xl - xl -= 3 + xl -= baseMatchLength t.nLits++ - lengthCode := lengthCodes1[uint8(xl)] & 31 - t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) - t.extraHist[lengthCode]++ + t.extraHist[lengthCodes1[uint8(xl)]]++ t.offHist[oc]++ + t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) t.n++ } } diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go index 568b5d4fb8..21e768b360 100644 --- a/vendor/github.com/klauspost/compress/gzip/gunzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -75,6 +75,7 @@ type Header struct { type Reader struct { Header // valid after NewReader or Reader.Reset r flate.Reader + br *bufio.Reader decompressor io.ReadCloser digest uint32 // CRC-32, IEEE polynomial (section 8) size uint32 // Uncompressed size (section 2.3.1) @@ -109,7 +110,13 @@ func (z *Reader) Reset(r io.Reader) error { if rr, ok := r.(flate.Reader); ok { z.r = rr } else { - z.r = bufio.NewReader(r) + // Reuse if we can. + if z.br != nil { + z.br.Reset(r) + } else { + z.br = bufio.NewReader(r) + } + z.r = z.br } z.Header, z.err = z.readHeader() return z.err diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index 7680bfe1dd..787813fa9e 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -16,8 +16,7 @@ Currently the package is heavily optimized for 64 bit processors and will be sig Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`. -Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd - +[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/zstd.svg)](https://pkg.go.dev/github.com/klauspost/compress/zstd) ## Compressor @@ -152,8 +151,8 @@ This package: file out level insize outsize millis mb/s silesia.tar zskp 1 211947520 73101992 643 313.87 silesia.tar zskp 2 211947520 67504318 969 208.38 -silesia.tar zskp 3 211947520 65177448 1899 106.44 -silesia.tar zskp 4 211947520 61381950 8115 24.91 +silesia.tar zskp 3 211947520 64595893 2007 100.68 +silesia.tar zskp 4 211947520 60995370 7691 26.28 cgo zstd: silesia.tar zstd 1 211947520 73605392 543 371.56 @@ -171,8 +170,8 @@ https://files.klauspost.com/compress/gob-stream.7z file out level insize outsize millis mb/s gob-stream zskp 1 1911399616 235022249 3088 590.30 gob-stream zskp 2 1911399616 205669791 3786 481.34 -gob-stream zskp 3 1911399616 185792019 9324 195.48 -gob-stream zskp 4 1911399616 171537212 32113 56.76 +gob-stream zskp 3 1911399616 175034659 9636 189.17 +gob-stream zskp 4 1911399616 167273881 29337 62.13 gob-stream zstd 1 1911399616 249810424 2637 691.26 gob-stream zstd 3 1911399616 208192146 3490 522.31 gob-stream zstd 6 1911399616 193632038 6687 272.56 @@ -187,8 +186,8 @@ http://mattmahoney.net/dc/textdata.html file out level insize outsize millis mb/s enwik9 zskp 1 1000000000 343848582 3609 264.18 enwik9 zskp 2 1000000000 317276632 5746 165.97 -enwik9 zskp 3 1000000000 294540704 11725 81.34 -enwik9 zskp 4 1000000000 276609671 44029 21.66 +enwik9 zskp 3 1000000000 292243069 12162 78.41 +enwik9 zskp 4 1000000000 275241169 36430 26.18 enwik9 zstd 1 1000000000 358072021 3110 306.65 enwik9 zstd 3 1000000000 313734672 4784 199.35 enwik9 zstd 6 1000000000 295138875 10290 92.68 @@ -202,8 +201,8 @@ https://files.klauspost.com/compress/github-june-2days-2019.json.zst file out level insize outsize millis mb/s github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 -github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54 -github-june-2days-2019.json zskp 4 6273951764 512796117 97791 61.18 +github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75 +github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78 github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 @@ -217,8 +216,8 @@ https://files.klauspost.com/compress/rawstudio-mint14.7z file out level insize outsize millis mb/s rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 -rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75 -rawstudio-mint14.tar zskp 4 8558382592 3027332295 486243 16.79 +rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08 +rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16 rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 @@ -232,8 +231,8 @@ https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst file out level insize outsize millis mb/s nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 -nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53 -nyc-taxi-data-10M.csv zskp 4 3325605752 495986829 89368 35.49 +nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66 +nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10 nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 @@ -405,13 +404,28 @@ BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 This reflects the performance around May 2020, but this may be out of date. +## Zstd inside ZIP files + +It is possible to use zstandard to compress individual files inside zip archives. +While this isn't widely supported it can be useful for internal files. + +To support the compression and decompression of these files you must register a compressor and decompressor. + +It is highly recommended registering the (de)compressors on individual zip Reader/Writer and NOT +use the global registration functions. The main reason for this is that 2 registrations from +different packages will result in a panic. + +It is a good idea to only have a single compressor and decompressor, since they can be used for multiple zip +files concurrently, and using a single instance will allow reusing some resources. + +See [this example](https://pkg.go.dev/github.com/klauspost/compress/zstd#example-ZipCompressor) for +how to compress and decompress files inside zip archives. + # Contributions Contributions are always welcome. For new features/fixes, remember to add tests and for performance enhancements include benchmarks. -For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files. - For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan). This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare. diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index b51d922bda..e30af505ca 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -123,12 +123,10 @@ func newBlockDec(lowMem bool) *blockDec { // Input must be a start of a block and will be at the end of the block when returned. func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.WindowSize = windowSize - tmp := br.readSmall(3) - if tmp == nil { - if debug { - println("Reading block header:", io.ErrUnexpectedEOF) - } - return io.ErrUnexpectedEOF + tmp, err := br.readSmall(3) + if err != nil { + println("Reading block header:", err) + return err } bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) b.Last = bh&1 != 0 @@ -146,7 +144,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { } cSize = 1 case blockTypeCompressed: - if debug { + if debugDecoder { println("Data size on stream:", cSize) } b.RLESize = 0 @@ -155,7 +153,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { maxSize = int(windowSize) } if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { - if debug { + if debugDecoder { printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b) } return ErrCompressedSizeTooBig @@ -179,10 +177,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { if cap(b.dst) <= maxSize { b.dst = make([]byte, 0, maxSize+1) } - var err error b.data, err = br.readBig(cSize, b.dataStorage) if err != nil { - if debug { + if debugDecoder { println("Reading block:", err, "(", cSize, ")", len(b.data)) printf("%T", br) } @@ -252,7 +249,7 @@ func (b *blockDec) startDecoder() { b: b.dst, err: err, } - if debug { + if debugDecoder { println("Decompressed to", len(b.dst), "bytes, error:", err) } b.result <- o @@ -267,7 +264,7 @@ func (b *blockDec) startDecoder() { default: panic("Invalid block type") } - if debug { + if debugDecoder { println("blockDec: Finished block") } } @@ -300,7 +297,7 @@ func (b *blockDec) decodeBuf(hist *history) error { b.dst = hist.b hist.b = nil err := b.decodeCompressed(hist) - if debug { + if debugDecoder { println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err) } hist.b = b.dst @@ -393,7 +390,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { in = in[5:] } } - if debug { + if debugDecoder { println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams) } var literals []byte @@ -431,7 +428,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { literals[i] = v } in = in[1:] - if debug { + if debugDecoder { printf("Found %d RLE compressed literals\n", litRegenSize) } case literalsBlockTreeless: @@ -442,7 +439,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { // Store compressed literals, so we defer decoding until we get history. literals = in[:litCompSize] in = in[litCompSize:] - if debug { + if debugDecoder { printf("Found %d compressed literals\n", litCompSize) } case literalsBlockCompressed: @@ -484,7 +481,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { if len(literals) != litRegenSize { return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) } - if debug { + if debugDecoder { printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize) } } @@ -535,12 +532,12 @@ func (b *blockDec) decodeCompressed(hist *history) error { br := byteReader{b: in, off: 0} compMode := br.Uint8() br.advance(1) - if debug { + if debugDecoder { printf("Compression modes: 0b%b", compMode) } for i := uint(0); i < 3; i++ { mode := seqCompMode((compMode >> (6 - i*2)) & 3) - if debug { + if debugDecoder { println("Table", tableIndex(i), "is", mode) } var seq *sequenceDec @@ -571,7 +568,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { } dec.setRLE(symb) seq.fse = dec - if debug { + if debugDecoder { printf("RLE set to %+v, code: %v", symb, v) } case compModeFSE: @@ -587,7 +584,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { println("Transform table error:", err) return err } - if debug { + if debugDecoder { println("Read table ok", "symbolLen:", dec.symbolLen) } seq.fse = dec @@ -655,7 +652,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { if huff != nil { hist.huffTree = huff } - if debug { + if debugDecoder { println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.") } @@ -672,7 +669,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { if err != nil { return err } - if debug { + if debugDecoder { println("History merged ok") } br := &bitReader{} @@ -731,7 +728,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { } hist.append(b.dst) hist.recentOffsets = seqs.prevOffset - if debug { + if debugDecoder { println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.") } diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index e1be092f32..3df185ee46 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -156,7 +156,7 @@ func (h *literalsHeader) setSize(regenLen int) { switch { case inBits < 5: lh |= (uint64(regenLen) << 3) | (1 << 60) - if debug { + if debugEncoder { got := int(lh>>3) & 0xff if got != regenLen { panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)")) @@ -184,7 +184,7 @@ func (h *literalsHeader) setSizes(compLen, inLen int, single bool) { lh |= 1 << 2 } lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) - if debug { + if debugEncoder { const mmask = (1 << 24) - 1 n := (lh >> 4) & mmask if int(n&1023) != inLen { @@ -312,7 +312,7 @@ func (b *blockEnc) encodeRaw(a []byte) { bh.setType(blockTypeRaw) b.output = bh.appendTo(b.output[:0]) b.output = append(b.output, a...) - if debug { + if debugEncoder { println("Adding RAW block, length", len(a), "last:", b.last) } } @@ -325,7 +325,7 @@ func (b *blockEnc) encodeRawTo(dst, src []byte) []byte { bh.setType(blockTypeRaw) dst = bh.appendTo(dst) dst = append(dst, src...) - if debug { + if debugEncoder { println("Adding RAW block, length", len(src), "last:", b.last) } return dst @@ -339,7 +339,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { // Don't compress extremely small blocks if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw { - if debug { + if debugEncoder { println("Adding RAW block, length", len(lits), "last:", b.last) } bh.setType(blockTypeRaw) @@ -371,7 +371,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { switch err { case huff0.ErrIncompressible: - if debug { + if debugEncoder { println("Adding RAW block, length", len(lits), "last:", b.last) } bh.setType(blockTypeRaw) @@ -379,7 +379,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { b.output = append(b.output, lits...) return nil case huff0.ErrUseRLE: - if debug { + if debugEncoder { println("Adding RLE block, length", len(lits)) } bh.setType(blockTypeRLE) @@ -396,12 +396,12 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { bh.setType(blockTypeCompressed) var lh literalsHeader if reUsed { - if debug { + if debugEncoder { println("Reused tree, compressed to", len(out)) } lh.setType(literalsBlockTreeless) } else { - if debug { + if debugEncoder { println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable)) } lh.setType(literalsBlockCompressed) @@ -517,7 +517,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { lh.setSize(len(b.literals)) b.output = lh.appendTo(b.output) b.output = append(b.output, b.literals...) - if debug { + if debugEncoder { println("Adding literals RAW, length", len(b.literals)) } case huff0.ErrUseRLE: @@ -525,22 +525,22 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { lh.setSize(len(b.literals)) b.output = lh.appendTo(b.output) b.output = append(b.output, b.literals[0]) - if debug { + if debugEncoder { println("Adding literals RLE") } case nil: // Compressed litLen... if reUsed { - if debug { + if debugEncoder { println("reused tree") } lh.setType(literalsBlockTreeless) } else { - if debug { + if debugEncoder { println("new tree, size:", len(b.litEnc.OutTable)) } lh.setType(literalsBlockCompressed) - if debug { + if debugEncoder { _, _, err := huff0.ReadTable(out, nil) if err != nil { panic(err) @@ -548,18 +548,18 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { } } lh.setSizes(len(out), len(b.literals), single) - if debug { + if debugEncoder { printf("Compressed %d literals to %d bytes", len(b.literals), len(out)) println("Adding literal header:", lh) } b.output = lh.appendTo(b.output) b.output = append(b.output, out...) b.litEnc.Reuse = huff0.ReusePolicyAllow - if debug { + if debugEncoder { println("Adding literals compressed") } default: - if debug { + if debugEncoder { println("Adding literals ERROR:", err) } return err @@ -577,7 +577,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { n := len(b.sequences) - 0x7f00 b.output = append(b.output, 255, uint8(n), uint8(n>>8)) } - if debug { + if debugEncoder { println("Encoding", len(b.sequences), "sequences") } b.genCodes() @@ -611,17 +611,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { nSize = nSize + (nSize+2*8*16)>>4 switch { case predefSize <= prevSize && predefSize <= nSize || forcePreDef: - if debug { + if debugEncoder { println("Using predefined", predefSize>>3, "<=", nSize>>3) } return preDef, compModePredefined case prevSize <= nSize: - if debug { + if debugEncoder { println("Using previous", prevSize>>3, "<=", nSize>>3) } return prev, compModeRepeat default: - if debug { + if debugEncoder { println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes") println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen]) } @@ -634,7 +634,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if llEnc.useRLE { mode |= uint8(compModeRLE) << 6 llEnc.setRLE(b.sequences[0].llCode) - if debug { + if debugEncoder { println("llEnc.useRLE") } } else { @@ -645,7 +645,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if ofEnc.useRLE { mode |= uint8(compModeRLE) << 4 ofEnc.setRLE(b.sequences[0].ofCode) - if debug { + if debugEncoder { println("ofEnc.useRLE") } } else { @@ -657,7 +657,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if mlEnc.useRLE { mode |= uint8(compModeRLE) << 2 mlEnc.setRLE(b.sequences[0].mlCode) - if debug { + if debugEncoder { println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen) } } else { @@ -666,7 +666,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { mode |= uint8(m) << 2 } b.output = append(b.output, mode) - if debug { + if debugEncoder { printf("Compression modes: 0b%b", mode) } b.output, err = llEnc.writeCount(b.output) @@ -786,7 +786,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { // Size is output minus block header. bh.setSize(uint32(len(b.output)-bhOffset) - 3) - if debug { + if debugEncoder { println("Rewriting block header", bh) } _ = bh.appendTo(b.output[bhOffset:bhOffset]) diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go index 658ef78380..aab71c6cf8 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -12,8 +12,8 @@ import ( type byteBuffer interface { // Read up to 8 bytes. - // Returns nil if no more input is available. - readSmall(n int) []byte + // Returns io.ErrUnexpectedEOF if this cannot be satisfied. + readSmall(n int) ([]byte, error) // Read >8 bytes. // MAY use the destination slice. @@ -29,17 +29,17 @@ type byteBuffer interface { // in-memory buffer type byteBuf []byte -func (b *byteBuf) readSmall(n int) []byte { +func (b *byteBuf) readSmall(n int) ([]byte, error) { if debugAsserts && n > 8 { panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) } bb := *b if len(bb) < n { - return nil + return nil, io.ErrUnexpectedEOF } r := bb[:n] *b = bb[n:] - return r + return r, nil } func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) { @@ -81,19 +81,22 @@ type readerWrapper struct { tmp [8]byte } -func (r *readerWrapper) readSmall(n int) []byte { +func (r *readerWrapper) readSmall(n int) ([]byte, error) { if debugAsserts && n > 8 { panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) } n2, err := io.ReadFull(r.r, r.tmp[:n]) // We only really care about the actual bytes read. - if n2 != n { - if debug { + if err != nil { + if err == io.EOF { + return nil, io.ErrUnexpectedEOF + } + if debugDecoder { println("readSmall: got", n2, "want", n, "err", err) } - return nil + return nil, err } - return r.tmp[:n] + return r.tmp[:n], nil } func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) { diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index f593e464b6..4d984c3b26 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -113,9 +113,6 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { // Returns the number of bytes written and any error that occurred. // When the stream is done, io.EOF will be returned. func (d *Decoder) Read(p []byte) (int, error) { - if d.stream == nil { - return 0, ErrDecoderNilInput - } var n int for { if len(d.current.b) > 0 { @@ -138,7 +135,7 @@ func (d *Decoder) Read(p []byte) (int, error) { } } if len(d.current.b) > 0 { - if debug { + if debugDecoder { println("returning", n, "still bytes left:", len(d.current.b)) } // Only return error at end of block @@ -147,7 +144,7 @@ func (d *Decoder) Read(p []byte) (int, error) { if d.current.err != nil { d.drainOutput() } - if debug { + if debugDecoder { println("returning", n, d.current.err, len(d.decoders)) } return n, d.current.err @@ -167,20 +164,17 @@ func (d *Decoder) Reset(r io.Reader) error { if r == nil { d.current.err = ErrDecoderNilInput + if len(d.current.b) > 0 { + d.current.b = d.current.b[:0] + } d.current.flushed = true return nil } - if d.stream == nil { - d.stream = make(chan decodeStream, 1) - d.streamWg.Add(1) - go d.startStreamDecoder(d.stream) - } - - // If bytes buffer and < 1MB, do sync decoding anyway. - if bb, ok := r.(byter); ok && bb.Len() < 1<<20 { + // If bytes buffer and < 5MB, do sync decoding anyway. + if bb, ok := r.(byter); ok && bb.Len() < 5<<20 { bb2 := bb - if debug { + if debugDecoder { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } b := bb2.Bytes() @@ -196,12 +190,18 @@ func (d *Decoder) Reset(r io.Reader) error { d.current.b = dst d.current.err = err d.current.flushed = true - if debug { + if debugDecoder { println("sync decode to", len(dst), "bytes, err:", err) } return nil } + if d.stream == nil { + d.stream = make(chan decodeStream, 1) + d.streamWg.Add(1) + go d.startStreamDecoder(d.stream) + } + // Remove current block. d.current.decodeOutput = decodeOutput{} d.current.err = nil @@ -225,7 +225,7 @@ func (d *Decoder) drainOutput() { d.current.cancel = nil } if d.current.d != nil { - if debug { + if debugDecoder { printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders)) } d.decoders <- d.current.d @@ -238,7 +238,7 @@ func (d *Decoder) drainOutput() { } for v := range d.current.output { if v.d != nil { - if debug { + if debugDecoder { printf("re-adding decoder %p", v.d) } d.decoders <- v.d @@ -255,9 +255,6 @@ func (d *Decoder) drainOutput() { // The return value n is the number of bytes written. // Any error encountered during the write is also returned. func (d *Decoder) WriteTo(w io.Writer) (int64, error) { - if d.stream == nil { - return 0, ErrDecoderNilInput - } var n int64 for { if len(d.current.b) > 0 { @@ -297,7 +294,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { block := <-d.decoders frame := block.localFrame defer func() { - if debug { + if debugDecoder { printf("re-adding decoder: %p", block) } frame.rawInput = nil @@ -310,7 +307,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { frame.history.reset() err := frame.reset(&frame.bBuf) if err == io.EOF { - if debug { + if debugDecoder { println("frame reset return EOF") } return dst, nil @@ -355,7 +352,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { return dst, err } if len(frame.bBuf) == 0 { - if debug { + if debugDecoder { println("frame dbuf empty") } break @@ -371,7 +368,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { // if no data was available without blocking. func (d *Decoder) nextBlock(blocking bool) (ok bool) { if d.current.d != nil { - if debug { + if debugDecoder { printf("re-adding current decoder %p", d.current.d) } d.decoders <- d.current.d @@ -391,7 +388,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) { return false } } - if debug { + if debugDecoder { println("got", len(d.current.b), "bytes, error:", d.current.err) } return true @@ -485,7 +482,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { defer d.streamWg.Done() frame := newFrameDec(d.o) for stream := range inStream { - if debug { + if debugDecoder { println("got new stream") } br := readerWrapper{r: stream.r} @@ -493,7 +490,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { for { frame.history.reset() err := frame.reset(&br) - if debug && err != nil { + if debugDecoder && err != nil { println("Frame decoder returned", err) } if err == nil && frame.DictionaryID != nil { @@ -510,7 +507,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { } break } - if debug { + if debugDecoder { println("starting frame decoder") } diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index fa25a18d86..a36ae83ef5 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -82,7 +82,7 @@ func loadDict(b []byte) (*dict, error) { println("Transform table error:", err) return err } - if debug { + if debugDecoder || debugEncoder { println("Read table ok", "symbolLen:", dec.symbolLen) } // Set decoders as predefined so they aren't reused. diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index fe3625c5f5..b7d4b90047 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -132,7 +132,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { } _ = addLiterals - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -220,6 +220,20 @@ encodeLoop: best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) + + // See if we can find a better match by checking where the current best ends. + // Use that offset to see if we can find a better full match. + if sAt := best.s + best.length; sAt < sLimit { + nextHashL := hash8(load6432(src, sAt), bestLongTableBits) + candidateEnd := e.longTable[nextHashL] + if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { + bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) + if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 { + bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1)) + } + best = bestEnd + } + } } // We have a match, we can store the forward value @@ -260,7 +274,7 @@ encodeLoop: nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, best.length) } @@ -398,7 +412,7 @@ encodeLoop: blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) blk.recentOffsets[2] = uint32(offset3) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index c2ce4a2bac..eab7b5083e 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -138,7 +138,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -204,7 +204,7 @@ encodeLoop: nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -264,7 +264,7 @@ encodeLoop: s += lenght + repOff2 nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -412,8 +412,41 @@ encodeLoop: cv = load6432(src, s) } - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. + // Try to find a better match by searching for a long match at the end of the current best match + if true && s+matched < sLimit { + nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + cv := load3232(src, s) + candidateL := e.longTable[nextHashL] + coffsetL := candidateL.offset - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("long match at end-of-match") + } + } + } + + // Check prev long... + if true { + coffsetL = candidateL.prev - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("prev long match at end-of-match") + } + } + } + } + } + // A match has been found. Update recent offsets. offset2 = offset1 offset1 = s - t @@ -520,7 +553,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } @@ -623,7 +656,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -691,7 +724,7 @@ encodeLoop: nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -754,7 +787,7 @@ encodeLoop: s += lenght + repOff2 nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -905,9 +938,41 @@ encodeLoop: } cv = load6432(src, s) } + // Try to find a better match by searching for a long match at the end of the current best match + if s+matched < sLimit { + nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + cv := load3232(src, s) + candidateL := e.longTable[nextHashL] + coffsetL := candidateL.offset - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("long match at end-of-match") + } + } + } - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. + // Check prev long... + if true { + coffsetL = candidateL.prev - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("prev long match at end-of-match") + } + } + } + } + } + // A match has been found. Update recent offsets. offset2 = offset1 offset1 = s - t @@ -1019,7 +1084,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 8629d43d86..96b21b90e8 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -109,7 +109,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -170,7 +170,7 @@ encodeLoop: s += lenght + repOff nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -368,7 +368,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } @@ -427,7 +427,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -483,7 +483,7 @@ encodeLoop: s += length + repOff nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, length) } @@ -677,7 +677,7 @@ encodeLoop: blk.literals = append(blk.literals, src[nextEmit:]...) blk.extraLits = len(src) - int(nextEmit) } - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } @@ -767,7 +767,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -830,7 +830,7 @@ encodeLoop: s += lenght + repOff nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, lenght) } @@ -1039,7 +1039,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } // If we encoded more than 64K mark all dirty. diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index ba4a17e106..2246d286dc 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -103,7 +103,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -178,7 +178,7 @@ encodeLoop: s += length + 2 nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, length) } @@ -330,7 +330,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } @@ -343,7 +343,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { inputMargin = 8 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debug { + if debugEncoder { if len(src) > maxBlockSize { panic("src too big") } @@ -391,7 +391,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -462,7 +462,7 @@ encodeLoop: s += length + 2 nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, length) } @@ -616,7 +616,7 @@ encodeLoop: blk.literals = append(blk.literals, src[nextEmit:]...) blk.extraLits = len(src) - int(nextEmit) } - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } // We do not store history, so we must offset e.cur to avoid false matches for next user. @@ -696,7 +696,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - if debug { + if debugEncoder { println("recent offsets:", blk.recentOffsets) } @@ -773,7 +773,7 @@ encodeLoop: s += length + 2 nextEmit = s if s >= sLimit { - if debug { + if debugEncoder { println("repeat ended", s, length) } @@ -926,7 +926,7 @@ encodeLoop: } blk.recentOffsets[0] = uint32(offset1) blk.recentOffsets[1] = uint32(offset2) - if debug { + if debugEncoder { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 4871dd03af..ea85548fc9 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -245,7 +245,7 @@ func (e *Encoder) nextBlock(final bool) error { s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current s.wg.Add(1) go func(src []byte) { - if debug { + if debugEncoder { println("Adding block,", len(src), "bytes, final:", final) } defer func() { @@ -290,7 +290,7 @@ func (e *Encoder) nextBlock(final bool) error { } switch err { case errIncompressible: - if debug { + if debugEncoder { println("Storing incompressible block as raw") } blk.encodeRaw(src) @@ -313,7 +313,7 @@ func (e *Encoder) nextBlock(final bool) error { // // The Copy function uses ReaderFrom if available. func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { - if debug { + if debugEncoder { println("Using ReadFrom") } @@ -336,20 +336,20 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { switch err { case io.EOF: e.state.filling = e.state.filling[:len(e.state.filling)-len(src)] - if debug { + if debugEncoder { println("ReadFrom: got EOF final block:", len(e.state.filling)) } return n, nil case nil: default: - if debug { + if debugEncoder { println("ReadFrom: got error:", err) } e.state.err = err return n, err } if len(src) > 0 { - if debug { + if debugEncoder { println("ReadFrom: got space left in source:", len(src)) } continue @@ -512,7 +512,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { switch err { case errIncompressible: - if debug { + if debugEncoder { println("Storing incompressible block as raw") } dst = blk.encodeRawTo(dst, src) @@ -548,7 +548,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { switch err { case errIncompressible: - if debug { + if debugEncoder { println("Storing incompressible block as raw") } dst = blk.encodeRawTo(dst, todo) diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 693c5f05d2..e8cc9a2c22 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -78,44 +78,68 @@ func newFrameDec(o decoderOptions) *frameDec { func (d *frameDec) reset(br byteBuffer) error { d.HasCheckSum = false d.WindowSize = 0 - var b []byte + var signature [4]byte for { - b = br.readSmall(4) - if b == nil { + var err error + // Check if we can read more... + b, err := br.readSmall(1) + switch err { + case io.EOF, io.ErrUnexpectedEOF: return io.EOF + default: + return err + case nil: + signature[0] = b[0] } - if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { - if debug { - println("Not skippable", hex.EncodeToString(b), hex.EncodeToString(skippableFrameMagic)) + // Read the rest, don't allow io.ErrUnexpectedEOF + b, err = br.readSmall(3) + switch err { + case io.EOF: + return io.EOF + default: + return err + case nil: + copy(signature[1:], b) + } + + if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 { + if debugDecoder { + println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic)) } // Break if not skippable frame. break } // Read size to skip - b = br.readSmall(4) - if b == nil { - println("Reading Frame Size EOF") - return io.ErrUnexpectedEOF + b, err = br.readSmall(4) + if err != nil { + if debugDecoder { + println("Reading Frame Size", err) + } + return err } n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) println("Skipping frame with", n, "bytes.") - err := br.skipN(int(n)) + err = br.skipN(int(n)) if err != nil { - if debug { + if debugDecoder { println("Reading discarded frame", err) } return err } } - if !bytes.Equal(b, frameMagic) { - println("Got magic numbers: ", b, "want:", frameMagic) + if !bytes.Equal(signature[:], frameMagic) { + if debugDecoder { + println("Got magic numbers: ", signature, "want:", frameMagic) + } return ErrMagicMismatch } // Read Frame_Header_Descriptor fhd, err := br.readByte() if err != nil { - println("Reading Frame_Header_Descriptor", err) + if debugDecoder { + println("Reading Frame_Header_Descriptor", err) + } return err } d.SingleSegment = fhd&(1<<5) != 0 @@ -130,7 +154,9 @@ func (d *frameDec) reset(br byteBuffer) error { if !d.SingleSegment { wd, err := br.readByte() if err != nil { - println("Reading Window_Descriptor", err) + if debugDecoder { + println("Reading Window_Descriptor", err) + } return err } printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) @@ -147,12 +173,11 @@ func (d *frameDec) reset(br byteBuffer) error { if size == 3 { size = 4 } - b = br.readSmall(int(size)) - if b == nil { - if debug { - println("Reading Dictionary_ID", io.ErrUnexpectedEOF) - } - return io.ErrUnexpectedEOF + + b, err := br.readSmall(int(size)) + if err != nil { + println("Reading Dictionary_ID", err) + return err } var id uint32 switch size { @@ -163,7 +188,7 @@ func (d *frameDec) reset(br byteBuffer) error { case 4: id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) } - if debug { + if debugDecoder { println("Dict size", size, "ID:", id) } if id > 0 { @@ -187,10 +212,10 @@ func (d *frameDec) reset(br byteBuffer) error { } d.FrameContentSize = 0 if fcsSize > 0 { - b := br.readSmall(fcsSize) - if b == nil { - println("Reading Frame content", io.ErrUnexpectedEOF) - return io.ErrUnexpectedEOF + b, err := br.readSmall(fcsSize) + if err != nil { + println("Reading Frame content", err) + return err } switch fcsSize { case 1: @@ -205,7 +230,7 @@ func (d *frameDec) reset(br byteBuffer) error { d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24) d.FrameContentSize = uint64(d1) | (uint64(d2) << 32) } - if debug { + if debugDecoder { println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize) } } @@ -248,7 +273,7 @@ func (d *frameDec) reset(br byteBuffer) error { // next will start decoding the next block from stream. func (d *frameDec) next(block *blockDec) error { - if debug { + if debugDecoder { printf("decoding new block %p:%p", block, block.data) } err := block.reset(d.rawInput, d.WindowSize) @@ -259,7 +284,7 @@ func (d *frameDec) next(block *blockDec) error { return err } block.input <- struct{}{} - if debug { + if debugDecoder { println("next block:", block) } d.asyncRunningMu.Lock() @@ -307,19 +332,19 @@ func (d *frameDec) checkCRC() error { tmp[3] = byte(got >> 24) // We can overwrite upper tmp now - want := d.rawInput.readSmall(4) - if want == nil { - println("CRC missing?") - return io.ErrUnexpectedEOF + want, err := d.rawInput.readSmall(4) + if err != nil { + println("CRC missing?", err) + return err } if !bytes.Equal(tmp[:], want) { - if debug { + if debugDecoder { println("CRC Check Failed:", tmp[:], "!=", want) } return ErrCRCMismatch } - if debug { + if debugDecoder { println("CRC ok", tmp[:]) } return nil @@ -340,7 +365,7 @@ func (d *frameDec) initAsync() { if cap(d.decoding) < d.o.concurrent { d.decoding = make(chan *blockDec, d.o.concurrent) } - if debug { + if debugDecoder { h := d.history printf("history init. len: %d, cap: %d", len(h.b), cap(h.b)) } @@ -388,7 +413,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) { output <- r return } - if debug { + if debugDecoder { println("got result, from ", d.offset, "to", d.offset+int64(len(r.b))) d.offset += int64(len(r.b)) } @@ -396,7 +421,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) { // Send history to next block select { case next = <-d.decoding: - if debug { + if debugDecoder { println("Sending ", len(d.history.b), "bytes as history") } next.history <- &d.history @@ -434,7 +459,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) { output <- r if next == nil { // There was no decoder available, we wait for one now that we have sent to the writer. - if debug { + if debugDecoder { println("Sending ", len(d.history.b), " bytes as history") } next = <-d.decoding @@ -458,7 +483,7 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { if err != nil { break } - if debug { + if debugDecoder { println("next block:", dec) } err = dec.decodeBuf(&d.history) diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index c74681b999..b4757ee3f0 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -229,7 +229,7 @@ func (s *fseEncoder) setRLE(val byte) { deltaFindState: 0, deltaNbBits: 0, } - if debug { + if debugEncoder { println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val]) } s.rleVal = val diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 9d9d1d567e..0372b1714a 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -203,7 +203,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { written += int64(n) continue case chunkTypeUncompressedData: - if debug { + if debugEncoder { println("Uncompressed, chunklen", chunkLen) } // Section 4.3. Uncompressed data (chunk type 0x01). @@ -246,7 +246,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { continue case chunkTypeStreamIdentifier: - if debug { + if debugEncoder { println("stream id", chunkLen, len(snappyMagicBody)) } // Section 4.1. Stream identifier (chunk type 0xff). diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go new file mode 100644 index 0000000000..9325b928ae --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -0,0 +1,121 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package zstd + +import ( + "errors" + "io" + "sync" +) + +// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip. +// See https://www.winzip.com/win/en/comp_info.html +const ZipMethodWinZip = 93 + +// ZipMethodPKWare is the original method number used by PKWARE to indicate Zstandard compression. +// Deprecated: This has been deprecated by PKWARE, use ZipMethodWinZip instead for compression. +// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT +const ZipMethodPKWare = 20 + +var zipReaderPool sync.Pool + +// newZipReader cannot be used since we would leak goroutines... +func newZipReader(r io.Reader) io.ReadCloser { + dec, ok := zipReaderPool.Get().(*Decoder) + if ok { + dec.Reset(r) + } else { + d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) + if err != nil { + panic(err) + } + dec = d + } + return &pooledZipReader{dec: dec} +} + +type pooledZipReader struct { + mu sync.Mutex // guards Close and Read + dec *Decoder +} + +func (r *pooledZipReader) Read(p []byte) (n int, err error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.dec == nil { + return 0, errors.New("Read after Close") + } + dec, err := r.dec.Read(p) + + return dec, err +} + +func (r *pooledZipReader) Close() error { + r.mu.Lock() + defer r.mu.Unlock() + var err error + if r.dec != nil { + err = r.dec.Reset(nil) + zipReaderPool.Put(r.dec) + r.dec = nil + } + return err +} + +type pooledZipWriter struct { + mu sync.Mutex // guards Close and Read + enc *Encoder +} + +func (w *pooledZipWriter) Write(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.enc == nil { + return 0, errors.New("Write after Close") + } + return w.enc.Write(p) +} + +func (w *pooledZipWriter) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + var err error + if w.enc != nil { + err = w.enc.Close() + zipReaderPool.Put(w.enc) + w.enc = nil + } + return err +} + +// ZipCompressor returns a compressor that can be registered with zip libraries. +// The provided encoder options will be used on all encodes. +func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { + var pool sync.Pool + return func(w io.Writer) (io.WriteCloser, error) { + enc, ok := pool.Get().(*Encoder) + if ok { + enc.Reset(w) + } else { + var err error + enc, err = NewWriter(w, opts...) + if err != nil { + return nil, err + } + } + return &pooledZipWriter{enc: enc}, nil + } +} + +// ZipDecompressor returns a decompressor that can be registered with zip libraries. +// See ZipCompressor for example. +func ZipDecompressor() func(r io.Reader) io.ReadCloser { + return func(r io.Reader) io.ReadCloser { + d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) + if err != nil { + panic(err) + } + return d.IOReadCloser() + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 1ba308c8bf..ef1d49a009 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -15,6 +15,12 @@ import ( // enable debug printing const debug = false +// enable encoding debug printing +const debugEncoder = debug + +// enable decoding debug printing +const debugDecoder = debug + // Enable extra assertions. const debugAsserts = debug || false @@ -82,13 +88,13 @@ var ( ) func println(a ...interface{}) { - if debug { + if debug || debugDecoder || debugEncoder { log.Println(a...) } } func printf(format string, a ...interface{}) { - if debug { + if debug || debugDecoder || debugEncoder { log.Printf(format, a...) } } |