From 8d2059a20184842d9a7a573d285956dd998d42c4 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Fri, 28 Feb 2020 10:51:18 +0100 Subject: update: macaron cores,gzip,session (#10522) Co-authored-by: zeripath --- .../github.com/klauspost/compress/flate/deflate.go | 45 +- .../klauspost/compress/flate/fast_encoder.go | 27 +- .../klauspost/compress/flate/gen_inflate.go | 274 ++++++ .../klauspost/compress/flate/huffman_bit_writer.go | 78 +- .../klauspost/compress/flate/huffman_code.go | 83 +- .../klauspost/compress/flate/huffman_sortByFreq.go | 178 ++++ .../compress/flate/huffman_sortByLiteral.go | 201 +++++ .../github.com/klauspost/compress/flate/inflate.go | 113 ++- .../klauspost/compress/flate/inflate_gen.go | 922 +++++++++++++++++++++ .../github.com/klauspost/compress/flate/level1.go | 25 +- .../github.com/klauspost/compress/flate/level2.go | 32 +- .../github.com/klauspost/compress/flate/level3.go | 58 +- .../github.com/klauspost/compress/flate/level4.go | 34 +- .../github.com/klauspost/compress/flate/level5.go | 47 +- .../github.com/klauspost/compress/flate/level6.go | 37 +- .../klauspost/compress/flate/stateless.go | 83 +- .../github.com/klauspost/compress/flate/token.go | 46 +- vendor/github.com/klauspost/compress/gzip/gzip.go | 4 +- 18 files changed, 2000 insertions(+), 287 deletions(-) create mode 100644 vendor/github.com/klauspost/compress/flate/gen_inflate.go create mode 100644 vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go create mode 100644 vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go create mode 100644 vendor/github.com/klauspost/compress/flate/inflate_gen.go (limited to 'vendor/github.com/klauspost') diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 20c94f5968..2b101d26b2 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -48,6 +48,8 @@ const ( maxHashOffset = 1 << 24 skipNever = math.MaxInt32 + + debugDeflate = false ) type compressionLevel struct { @@ -59,15 +61,13 @@ type compressionLevel struct { // See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ var levels = []compressionLevel{ {}, // 0 - // Level 1-4 uses specialized algorithm - values not used + // Level 1-6 uses specialized algorithm - values not used {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0, 2}, {0, 0, 0, 0, 0, 3}, {0, 0, 0, 0, 0, 4}, - // For levels 5-6 we don't bother trying with lazy matches. - // Lazy matching is at least 30% slower, with 1.5% increase. - {6, 0, 12, 8, 12, 5}, - {8, 0, 24, 16, 16, 6}, + {0, 0, 0, 0, 0, 5}, + {0, 0, 0, 0, 0, 6}, // Levels 7-9 use increasingly more lazy matching // and increasingly stringent conditions for "good enough". {8, 8, 24, 16, skipNever, 7}, @@ -203,9 +203,8 @@ func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { // This is much faster than doing a full encode. // Should only be used after a start/reset. func (d *compressor) fillWindow(b []byte) { - // Do not fill window if we are in store-only mode, - // use constant or Snappy compression. - if d.level == 0 { + // Do not fill window if we are in store-only or huffman mode. + if d.level <= 0 { return } if d.fast != nil { @@ -368,7 +367,7 @@ func (d *compressor) deflateLazy() { // Sanity enables additional runtime tests. // It's intended to be used during development // to supplement the currently ad-hoc unit tests. - const sanity = false + const sanity = debugDeflate if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { return @@ -644,7 +643,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.fill = (*compressor).fillBlock d.step = (*compressor).store case level == ConstantCompression: - d.w.logReusePenalty = uint(4) + d.w.logNewTablePenalty = 4 d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillBlock d.step = (*compressor).storeHuff @@ -652,13 +651,13 @@ func (d *compressor) init(w io.Writer, level int) (err error) { level = 5 fallthrough case level >= 1 && level <= 6: - d.w.logReusePenalty = uint(level + 1) + d.w.logNewTablePenalty = 6 d.fast = newFastEnc(level) d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillBlock d.step = (*compressor).storeFast case 7 <= level && level <= 9: - d.w.logReusePenalty = uint(level) + d.w.logNewTablePenalty = 10 d.state = &advancedState{} d.compressionLevel = levels[level] d.initDeflate() @@ -667,6 +666,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) { default: return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) } + d.level = level return nil } @@ -720,6 +720,7 @@ func (d *compressor) close() error { return d.w.err } d.w.flush() + d.w.reset(nil) return d.w.err } @@ -750,8 +751,7 @@ func NewWriter(w io.Writer, level int) (*Writer, error) { // can only be decompressed by a Reader initialized with the // same dictionary. func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { - dw := &dictWriter{w} - zw, err := NewWriter(dw, level) + zw, err := NewWriter(w, level) if err != nil { return nil, err } @@ -760,14 +760,6 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { return zw, err } -type dictWriter struct { - w io.Writer -} - -func (w *dictWriter) Write(b []byte) (n int, err error) { - return w.w.Write(b) -} - // A Writer takes data written to it and writes the compressed // form of that data to an underlying writer (see NewWriter). type Writer struct { @@ -805,11 +797,12 @@ func (w *Writer) Close() error { // the result of NewWriter or NewWriterDict called with dst // and w's level and dictionary. func (w *Writer) Reset(dst io.Writer) { - if dw, ok := w.d.w.writer.(*dictWriter); ok { + if len(w.dict) > 0 { // w was created with NewWriterDict - dw.w = dst - w.d.reset(dw) - w.d.fillWindow(w.dict) + w.d.reset(dst) + if dst != nil { + w.d.fillWindow(w.dict) + } } else { // w was created with NewWriter w.d.reset(dst) diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index b0a470f92e..6d4c1e98bc 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -35,17 +35,17 @@ func newFastEnc(level int) fastEnc { } const ( - tableBits = 16 // Bits used in the table + tableBits = 15 // Bits used in the table tableSize = 1 << tableBits // Size of the table tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. baseMatchOffset = 1 // The smallest match offset baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 maxMatchOffset = 1 << 15 // The largest match offset - bTableBits = 18 // Bits used in the big tables - bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxMatchOffset * 10 // Size to preallocate for history. - bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize // Reset the buffer offset when reaching this. + bTableBits = 17 // Bits used in the big tables + bTableSize = 1 << bTableBits // Size of the table + allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history. + bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. ) const ( @@ -92,7 +92,6 @@ func hash(u uint32) uint32 { } type tableEntry struct { - val uint32 offset int32 } @@ -210,16 +209,14 @@ func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { // Reset the encoding table. func (e *fastGen) Reset() { - if cap(e.hist) < int(maxMatchOffset*8) { - l := maxMatchOffset * 8 - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 - } - e.hist = make([]byte, 0, l) + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= bufferReset { + e.cur += maxMatchOffset + int32(len(e.hist)) } - // We offset current position so everything will be out of reach - e.cur += maxMatchOffset + int32(len(e.hist)) e.hist = e.hist[:0] } diff --git a/vendor/github.com/klauspost/compress/flate/gen_inflate.go b/vendor/github.com/klauspost/compress/flate/gen_inflate.go new file mode 100644 index 0000000000..c74a95fe7f --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/gen_inflate.go @@ -0,0 +1,274 @@ +// +build generate + +//go:generate go run $GOFILE && gofmt -w inflate_gen.go + +package main + +import ( + "os" + "strings" +) + +func main() { + f, err := os.Create("inflate_gen.go") + if err != nil { + panic(err) + } + defer f.Close() + types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"} + names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"} + imports := []string{"bytes", "bufio", "io", "strings", "math/bits"} + f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( +`) + + for _, imp := range imports { + f.WriteString("\t\"" + imp + "\"\n") + } + f.WriteString(")\n\n") + + template := ` + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) $FUNCNAME$() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.($TYPE$) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).$FUNCNAME$ + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).$FUNCNAME$ // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +` + for i, t := range types { + s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1) + s = strings.Replace(s, "$TYPE$", t, -1) + f.WriteString(s) + } + f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n") + f.WriteString("\tswitch f.r.(type) {\n") + for i, t := range types { + f.WriteString("\t\tcase " + t + ":\n") + f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n") + } + f.WriteString("\t\tdefault:\n") + f.WriteString("\t\t\treturn f.huffmanBlockGeneric") + f.WriteString("\t}\n}\n") +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go index 5ed476aa0d..53fe1d06e2 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -93,12 +93,12 @@ type huffmanBitWriter struct { err error lastHeader int // Set between 0 (reused block can be up to 2x the size) - logReusePenalty uint - lastHuffMan bool - bytes [256]byte - literalFreq [lengthCodesStart + 32]uint16 - offsetFreq [32]uint16 - codegenFreq [codegenCodeCount]uint16 + logNewTablePenalty uint + lastHuffMan bool + bytes [256]byte + literalFreq [lengthCodesStart + 32]uint16 + offsetFreq [32]uint16 + codegenFreq [codegenCodeCount]uint16 // codegen must have an extra space for the final symbol. codegen [literalCount + offsetCodeCount + 1]uint8 @@ -119,7 +119,7 @@ type huffmanBitWriter struct { // If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. // // An incoming block estimates the output size of a new table using a 'fresh' by calculating the -// optimal size and adding a penalty in 'logReusePenalty'. +// optimal size and adding a penalty in 'logNewTablePenalty'. // A Huffman table is not optimal, which is why we add a penalty, and generating a new table // is slower both for compression and decompression. @@ -135,7 +135,6 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { func (w *huffmanBitWriter) reset(writer io.Writer) { w.writer = writer w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil - w.bytes = [256]byte{} w.lastHeader = 0 w.lastHuffMan = false } @@ -178,6 +177,11 @@ func (w *huffmanBitWriter) flush() { w.nbits = 0 return } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } n := w.nbytes for w.nbits != 0 { w.bytes[n] = byte(w.bits) @@ -350,6 +354,13 @@ func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { int(w.codegenFreq[18])*7, numCodegens } +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { + size = litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + return size +} + // dynamicSize returns the size of dynamically encoded data in bits. func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { header, numCodegens := w.headerSize() @@ -452,30 +463,30 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n i := 0 for { - var codeWord int = int(w.codegen[i]) + var codeWord = uint32(w.codegen[i]) i++ if codeWord == badCode { break } - w.writeCode(w.codegenEncoding.codes[uint32(codeWord)]) + w.writeCode(w.codegenEncoding.codes[codeWord]) switch codeWord { case 16: w.writeBits(int32(w.codegen[i]), 2) i++ - break case 17: w.writeBits(int32(w.codegen[i]), 3) i++ - break case 18: w.writeBits(int32(w.codegen[i]), 7) i++ - break } } } +// writeStoredHeader will write a stored header. +// If the stored block is only used for EOF, +// it is replaced with a fixed huffman block. func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { if w.err != nil { return @@ -485,6 +496,16 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 } + + // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. + if length == 0 && isEof { + w.writeFixedHeader(isEof) + // EOB: 7 bits, value: 0 + w.writeBits(0, 7) + w.flush() + return + } + var flag int32 if isEof { flag = 1 @@ -591,8 +612,8 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b tokens.AddEOB() } - // We cannot reuse pure huffman table. - if w.lastHuffMan && w.lastHeader > 0 { + // We cannot reuse pure huffman table, and must mark as EOF. + if (w.lastHuffMan || eof) && w.lastHeader > 0 { // We will not try to reuse. w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 @@ -606,14 +627,14 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b var size int // Check if we should reuse. if w.lastHeader > 0 { - // Estimate size for using a new table + // Estimate size for using a new table. + // Use the previous header size as the best estimate. newSize := w.lastHeader + tokens.EstimatedBits() + newSize += newSize >> w.logNewTablePenalty // The estimated size is calculated as an optimal table. // We add a penalty to make it more realistic and re-use a bit more. - newSize += newSize >> (w.logReusePenalty & 31) - extra := w.extraBitSize() - reuseSize, _ := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extra) + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize() // Check if a new table is better. if newSize < reuseSize { @@ -805,21 +826,30 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { } // Add everything as literals - estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) + 15 + // We have to estimate the header size. + // Assume header is around 70 bytes: + // https://stackoverflow.com/a/25454430 + const guessHeaderSizeBits = 70 * 8 + estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync) + estBits += w.lastHeader + 15 + if w.lastHeader == 0 { + estBits += guessHeaderSizeBits + } + estBits += estBits >> w.logNewTablePenalty // Store bytes, if we don't get a reasonable improvement. ssize, storable := w.storedSize(input) - if storable && ssize < (estBits+estBits>>4) { + if storable && ssize < estBits { w.writeStoredHeader(len(input), eof) w.writeBytes(input) return } if w.lastHeader > 0 { - size, _ := w.dynamicSize(w.literalEncoding, huffOffset, w.lastHeader) - estBits += estBits >> (w.logReusePenalty) + reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256]) + estBits += estExtra - if estBits < size { + if estBits < reuseSize { // We owe an EOB w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go index d0099599c5..4c39a30187 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -7,7 +7,6 @@ package flate import ( "math" "math/bits" - "sort" ) const ( @@ -25,8 +24,6 @@ type huffmanEncoder struct { codes []hcode freqcache []literalNode bitCount [17]int32 - lns byLiteral // stored to avoid repeated allocation in generate - lfs byFreq // stored to avoid repeated allocation in generate } type literalNode struct { @@ -85,17 +82,14 @@ func generateFixedLiteralEncoding() *huffmanEncoder { // size 8, 000110000 .. 10111111 bits = ch + 48 size = 8 - break case ch < 256: // size 9, 110010000 .. 111111111 bits = ch + 400 - 144 size = 9 - break case ch < 280: // size 7, 0000000 .. 0010111 bits = ch - 256 size = 7 - break default: // size 8, 11000000 .. 11000111 bits = ch + 192 - 280 @@ -115,8 +109,8 @@ func generateFixedOffsetEncoding() *huffmanEncoder { return h } -var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() -var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() +var fixedLiteralEncoding = generateFixedLiteralEncoding() +var fixedOffsetEncoding = generateFixedOffsetEncoding() func (h *huffmanEncoder) bitLength(freq []uint16) int { var total int @@ -273,7 +267,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // assigned in literal order (not frequency order). chunk := list[len(list)-int(bits):] - h.lns.sort(chunk) + sortByLiteral(chunk) for _, node := range chunk { h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)} code++ @@ -318,7 +312,7 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { } return } - h.lfs.sort(list) + sortByFreq(list) // Get the number of literals for each bit count bitCount := h.bitCounts(list, maxBits) @@ -326,59 +320,44 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { h.assignEncodingAndSize(bitCount, list) } -type byLiteral []literalNode - -func (s *byLiteral) sort(a []literalNode) { - *s = byLiteral(a) - sort.Sort(s) -} - -func (s byLiteral) Len() int { return len(s) } - -func (s byLiteral) Less(i, j int) bool { - return s[i].literal < s[j].literal -} - -func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - -type byFreq []literalNode - -func (s *byFreq) sort(a []literalNode) { - *s = byFreq(a) - sort.Sort(s) -} - -func (s byFreq) Len() int { return len(s) } - -func (s byFreq) Less(i, j int) bool { - if s[i].freq == s[j].freq { - return s[i].literal < s[j].literal +func atLeastOne(v float32) float32 { + if v < 1 { + return 1 } - return s[i].freq < s[j].freq + return v } -func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - // histogramSize accumulates a histogram of b in h. // An estimated size in bits is returned. // Unassigned values are assigned '1' in the histogram. // len(h) must be >= 256, and h's elements must be all zeroes. -func histogramSize(b []byte, h []uint16, fill bool) int { +func histogramSize(b []byte, h []uint16, fill bool) (int, int) { h = h[:256] for _, t := range b { h[t]++ } - invTotal := 1.0 / float64(len(b)) - shannon := 0.0 - single := math.Ceil(-math.Log2(invTotal)) - for i, v := range h[:] { - if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) - } else if fill { - shannon += single - h[i] = 1 + invTotal := 1.0 / float32(len(b)) + shannon := float32(0.0) + var extra float32 + if fill { + oneBits := atLeastOne(-mFastLog2(invTotal)) + for i, v := range h[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } else { + h[i] = 1 + extra += oneBits + } + } + } else { + for _, v := range h[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } } } - return int(shannon + 0.99) + + return int(shannon + 0.99), int(extra + 0.99) } diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go new file mode 100644 index 0000000000..2077802990 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go @@ -0,0 +1,178 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +// siftDownByFreq implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDownByFreq(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { + child++ + } + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go new file mode 100644 index 0000000000..93f1aea109 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 6dc5b5d06e..7f175a4ec2 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -106,7 +106,7 @@ const ( ) type huffmanDecoder struct { - min int // the minimum code length + maxRead int // the maximum number of bits we can read and not overread chunks *[huffmanNumChunks]uint16 // chunks as described above links [][]uint16 // overflow links linkMask uint32 // mask the width of the link table @@ -126,12 +126,12 @@ func (h *huffmanDecoder) init(lengths []int) bool { if h.chunks == nil { h.chunks = &[huffmanNumChunks]uint16{} } - if h.min != 0 { + if h.maxRead != 0 { *h = huffmanDecoder{chunks: h.chunks, links: h.links} } // Count number of codes of each length, - // compute min and max length. + // compute maxRead and max length. var count [maxCodeLen]int var min, max int for _, n := range lengths { @@ -178,7 +178,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { return false } - h.min = min + h.maxRead = min chunks := h.chunks[:] for i := range chunks { chunks[i] = 0 @@ -342,7 +342,7 @@ func (f *decompressor) nextBlock() { // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil - f.huffmanBlock() + f.huffmanBlockDecoder()() case 2: // compressed, dynamic Huffman tables if f.err = f.readHuffman(); f.err != nil { @@ -350,7 +350,7 @@ func (f *decompressor) nextBlock() { } f.hl = &f.h1 f.hd = &f.h2 - f.huffmanBlock() + f.huffmanBlockDecoder()() default: // 3 is reserved. if debugDecode { @@ -543,12 +543,18 @@ func (f *decompressor) readHuffman() error { return CorruptInputError(f.roffset) } - // As an optimization, we can initialize the min bits to read at a time + // As an optimization, we can initialize the maxRead bits to read at a time // for the HLIT tree to the length of the EOB marker since we know that // every block must terminate with one. This preserves the property that // we never read any extra bytes after the end of the DEFLATE stream. - if f.h1.min < f.bits[endBlockMarker] { - f.h1.min = f.bits[endBlockMarker] + if f.h1.maxRead < f.bits[endBlockMarker] { + f.h1.maxRead = f.bits[endBlockMarker] + } + if !f.final { + // If not the final block, the smallest block possible is + // a predefined table, BTYPE=01, with a single EOB marker. + // This will take up 3 + 7 bits. + f.h1.maxRead += 10 } return nil @@ -558,7 +564,7 @@ func (f *decompressor) readHuffman() error { // hl and hd are the Huffman states for the lit/length values // and the distance values, respectively. If hd == nil, using the // fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBlock() { +func (f *decompressor) huffmanBlockGeneric() { const ( stateInit = iota // Zero value must be stateInit stateDict @@ -574,19 +580,64 @@ func (f *decompressor) huffmanBlock() { readLiteral: // Read literal and/or (length, distance) according to RFC section 3.2.3. { - v, err := f.huffSym(f.hl) - if err != nil { - f.err = err - return + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } } + var n uint // number of bits extra var length int + var err error switch { case v < 256: f.dict.writeByte(byte(v)) if f.dict.availWrite() == 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlock + f.step = (*decompressor).huffmanBlockGeneric f.stepState = stateInit return } @@ -714,7 +765,7 @@ copyHistory: if f.dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlock // We need to continue this work + f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work f.stepState = stateDict return } @@ -726,21 +777,33 @@ copyHistory: func (f *decompressor) dataBlock() { // Uncompressed. // Discard current half-byte. - f.nb = 0 - f.b = 0 + left := (f.nb) & 7 + f.nb -= left + f.b >>= left + + offBytes := f.nb >> 3 + // Unfilled values will be overwritten. + f.buf[0] = uint8(f.b) + f.buf[1] = uint8(f.b >> 8) + f.buf[2] = uint8(f.b >> 16) + f.buf[3] = uint8(f.b >> 24) + + f.roffset += int64(offBytes) + f.nb, f.b = 0, 0 // Length then ones-complement of length. - nr, err := io.ReadFull(f.r, f.buf[0:4]) + nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) f.roffset += int64(nr) if err != nil { f.err = noEOF(err) return } - n := int(f.buf[0]) | int(f.buf[1])<<8 - nn := int(f.buf[2]) | int(f.buf[3])<<8 - if uint16(nn) != uint16(^n) { + n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 + nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 + if nn != ^n { if debugDecode { - fmt.Println("uint16(nn) != uint16(^n)", nn, ^n) + ncomp := ^n + fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) } f.err = CorruptInputError(f.roffset) return @@ -752,7 +815,7 @@ func (f *decompressor) dataBlock() { return } - f.copyLen = n + f.copyLen = int(n) f.copyData() } @@ -816,7 +879,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { // with single element, huffSym must error on these two edge cases. In both // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. - n := uint(h.min) + n := uint(h.maxRead) // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go new file mode 100644 index 0000000000..397dc1b1a1 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -0,0 +1,922 @@ +// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( + "bufio" + "bytes" + "fmt" + "math/bits" + "strings" +) + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesBuffer() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Buffer) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBufioReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bufio.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBufioReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanStringsReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*strings.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanStringsReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +func (f *decompressor) huffmanBlockDecoder() func() { + switch f.r.(type) { + case *bytes.Buffer: + return f.huffmanBytesBuffer + case *bytes.Reader: + return f.huffmanBytesReader + case *bufio.Reader: + return f.huffmanBufioReader + case *strings.Reader: + return f.huffmanStringsReader + default: + return f.huffmanBlockGeneric + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go index 20de8f11f4..1e5eea3968 100644 --- a/vendor/github.com/klauspost/compress/flate/level1.go +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -1,5 +1,7 @@ package flate +import "fmt" + // fastGen maintains the table for matches, // and the previous byte block for level 2. // This is the generic implementation. @@ -14,6 +16,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } // Protect against e.cur wraparound. for e.cur >= bufferReset { @@ -76,12 +81,12 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { } now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} nextHash = hash(uint32(now)) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } @@ -91,11 +96,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { nextS++ candidate = e.table[nextHash] now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } cv = uint32(now) @@ -134,7 +139,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+l+4) < len(src) { cv := load3232(src, s) - e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv} + e.table[hash(cv)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -148,14 +153,14 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { x := load6432(src, s-2) o := e.cur + s - 2 prevHash := hash(uint32(x)) - e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHash] = tableEntry{offset: o} x >>= 16 currHash := hash(uint32(x)) candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)} + e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x) != candidate.val { + if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { cv = uint32(x >> 8) s++ break diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go index 7c824431e6..5b986a1944 100644 --- a/vendor/github.com/klauspost/compress/flate/level2.go +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -1,5 +1,7 @@ package flate +import "fmt" + // fastGen maintains the table for matches, // and the previous byte block for level 2. // This is the generic implementation. @@ -16,6 +18,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { @@ -77,12 +83,12 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { } candidate = e.table[nextHash] now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} nextHash = hash4u(uint32(now), bTableBits) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } @@ -92,10 +98,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { nextS++ candidate = e.table[nextHash] now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { break } cv = uint32(now) @@ -142,7 +148,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+l+4) < len(src) { cv := load3232(src, s) - e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv} + e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -151,15 +157,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { for i := s - l + 2; i < s-5; i += 7 { x := load6432(src, int32(i)) nextHash := hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i} // Skip one x >>= 16 nextHash = hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i + 2} // Skip one x >>= 16 nextHash = hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i + 4} } // We could immediately start working at s now, but to improve @@ -172,14 +178,14 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { o := e.cur + s - 2 prevHash := hash4u(uint32(x), bTableBits) prevHash2 := hash4u(uint32(x>>8), bTableBits) - e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} - e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)} + e.table[prevHash] = tableEntry{offset: o} + e.table[prevHash2] = tableEntry{offset: o + 1} currHash := hash4u(uint32(x>>16), bTableBits) candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)} + e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x>>16) != candidate.val { + if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { cv = uint32(x >> 24) s++ break diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go index 4153d24c95..c22b4244a5 100644 --- a/vendor/github.com/klauspost/compress/flate/level3.go +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -1,5 +1,7 @@ package flate +import "fmt" + // fastEncL3 type fastEncL3 struct { fastGen @@ -13,6 +15,10 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { @@ -75,22 +81,26 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { } candidates := e.table[nextHash] now := load3232(src, nextS) - e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}} + + // Safe offset distance until s + 4... + minOffset := e.cur + s - (maxMatchOffset - 4) + e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} // Check both candidates candidate = candidates.Cur - offset := s - (candidate.offset - e.cur) - if cv == candidate.val { - if offset > maxMatchOffset { - cv = now - // Previous will also be invalid, we have nothing. - continue - } - o2 := s - (candidates.Prev.offset - e.cur) - if cv != candidates.Prev.val || o2 > maxMatchOffset { + if candidate.offset < minOffset { + cv = now + // Previous will also be invalid, we have nothing. + continue + } + + if cv == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { break } // Both match and are valid, pick longest. + offset := s - (candidate.offset - e.cur) + o2 := s - (candidates.Prev.offset - e.cur) l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) if l2 > l1 { candidate = candidates.Prev @@ -100,11 +110,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { // We only check if value mismatches. // Offset will always be invalid in other cases. candidate = candidates.Prev - if cv == candidate.val { - offset := s - (candidate.offset - e.cur) - if offset <= maxMatchOffset { - break - } + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + break } } cv = now @@ -152,7 +159,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { nextHash := hash(cv) e.table[nextHash] = tableEntryPrev{ Prev: e.table[nextHash].Cur, - Cur: tableEntry{offset: e.cur + t, val: cv}, + Cur: tableEntry{offset: e.cur + t}, } } goto emitRemainder @@ -164,21 +171,21 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { prevHash := hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 3}, } x >>= 8 prevHash = hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 2}, } x >>= 8 prevHash = hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 1}, } x >>= 8 currHash := hash(uint32(x)) @@ -186,21 +193,18 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { cv = uint32(x) e.table[currHash] = tableEntryPrev{ Prev: candidates.Cur, - Cur: tableEntry{offset: s + e.cur, val: cv}, + Cur: tableEntry{offset: s + e.cur}, } // Check both candidates candidate = candidates.Cur - if cv == candidate.val { - offset := s - (candidate.offset - e.cur) - if offset <= maxMatchOffset { - continue - } - } else { + minOffset := e.cur + s - (maxMatchOffset - 4) + + if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) { // We only check if value mismatches. // Offset will always be invalid in other cases. candidate = candidates.Prev - if cv == candidate.val { + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { offset := s - (candidate.offset - e.cur) if offset <= maxMatchOffset { continue diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go index c689ac771b..e62f0c02b1 100644 --- a/vendor/github.com/klauspost/compress/flate/level4.go +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -13,7 +13,9 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { @@ -90,24 +92,24 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry e.bTable[nextHashL] = entry t = lCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) { // We got a long match. Use that. break } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... lCandidate = e.bTable[hash7(next, tableBits)] // If the next long is a candidate, check if we should use that instead... lOff := nextS - (lCandidate.offset - e.cur) - if lOff < maxMatchOffset && lCandidate.val == uint32(next) { + if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) { l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) if l2 > l1 { s = nextS @@ -135,7 +137,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { if nextEmit < s { emitLiteral(dst, src[nextEmit:s]) } - if false { + if debugDeflate { if t >= s { panic("s-t") } @@ -158,8 +160,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+8) < len(src) { cv := load6432(src, s) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} - e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} + e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -169,20 +171,20 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { i := nextS if i < s-1 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 i += 3 for ; i < s-1; i += 3 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 } } } @@ -193,8 +195,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { o := e.cur + s - 1 prevHashS := hash4x64(x, tableBits) prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} - e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHashS] = tableEntry{offset: o} + e.bTable[prevHashL] = tableEntry{offset: o} cv = x >> 8 } diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index 14a2356126..d513f1ffd3 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -13,6 +13,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } // Protect against e.cur wraparound. for e.cur >= bufferReset { @@ -97,7 +100,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry eLong := &e.bTable[nextHashL] eLong.Cur, eLong.Prev = entry, eLong.Cur @@ -107,14 +110,14 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { t = lCandidate.Cur.offset - e.cur if s-t < maxMatchOffset { - if uint32(cv) == lCandidate.Cur.val { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { l = e.matchlen(s+4, t+4, src) + 4 ml1 := e.matchlen(s+4, t2+4, src) + 4 if ml1 > l { @@ -126,30 +129,30 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { break } t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur break } } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... l = e.matchlen(s+4, t+4, src) + 4 lCandidate = e.bTable[nextHashL] // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // If the next long is a candidate, use that... t2 := lCandidate.Cur.offset - e.cur if nextS-t2 < maxMatchOffset { - if lCandidate.Cur.val == uint32(next) { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -160,7 +163,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { } // If the previous long is a candidate, use that... t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -194,7 +197,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { if nextEmit < s { emitLiteral(dst, src[nextEmit:s]) } - if false { + if debugDeflate { if t >= s { panic(fmt.Sprintln("s-t", s, t)) } @@ -223,31 +226,31 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { i := s - l + 1 if i < s-1 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} + t := tableEntry{offset: i + e.cur} e.table[hash4x64(cv, tableBits)] = t eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur // Do an long at i+1 cv >>= 8 - t = tableEntry{offset: t.offset + 1, val: uint32(cv)} + t = tableEntry{offset: t.offset + 1} eLong = &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur // We only have enough bits for a short entry at i+2 cv >>= 8 - t = tableEntry{offset: t.offset + 1, val: uint32(cv)} + t = tableEntry{offset: t.offset + 1} e.table[hash4x64(cv, tableBits)] = t // Skip one - otherwise we risk hitting 's' i += 4 for ; i < s-1; i += hashEvery { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 } } } @@ -258,9 +261,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { o := e.cur + s - 1 prevHashS := hash4x64(x, tableBits) prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHashS] = tableEntry{offset: o} eLong := &e.bTable[prevHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur cv = x >> 8 } diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go index cad0c7df7f..a52c80ea45 100644 --- a/vendor/github.com/klauspost/compress/flate/level6.go +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -13,6 +13,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } // Protect against e.cur wraparound. for e.cur >= bufferReset { @@ -98,7 +101,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry eLong := &e.bTable[nextHashL] eLong.Cur, eLong.Prev = entry, eLong.Cur @@ -109,17 +112,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { t = lCandidate.Cur.offset - e.cur if s-t < maxMatchOffset { - if uint32(cv) == lCandidate.Cur.val { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { // Long candidate matches at least 4 bytes. // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // Check the previous long candidate as well. t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { l = e.matchlen(s+4, t+4, src) + 4 ml1 := e.matchlen(s+4, t2+4, src) + 4 if ml1 > l { @@ -132,17 +135,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { } // Current value did not match, but check if previous long value does. t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur break } } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... l = e.matchlen(s+4, t+4, src) + 4 @@ -150,9 +153,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { lCandidate = e.bTable[nextHashL] // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // Check repeat at s + repOff const repOff = 1 @@ -171,7 +174,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // If the next long is a candidate, use that... t2 = lCandidate.Cur.offset - e.cur if nextS-t2 < maxMatchOffset { - if lCandidate.Cur.val == uint32(next) { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -182,7 +185,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { } // If the previous long is a candidate, use that... t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -241,9 +244,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // Index after match end. for i := nextS + 1; i < int32(len(src))-8; i += 2 { cv := load6432(src, i) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)} + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur } goto emitRemainder } @@ -252,8 +255,8 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { if true { for i := nextS + 1; i < s-1; i += 2 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong2 := &e.bTable[hash7(cv>>8, tableBits)] e.table[hash4x64(cv, tableBits)] = t diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go index 524ee0ae37..53e8991246 100644 --- a/vendor/github.com/klauspost/compress/flate/stateless.go +++ b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -3,10 +3,13 @@ package flate import ( "io" "math" + "sync" ) const ( maxStatelessBlock = math.MaxInt16 + // dictionary will be taken from maxStatelessBlock, so limit it. + maxStatelessDict = 8 << 10 slTableBits = 13 slTableSize = 1 << slTableBits @@ -24,11 +27,11 @@ func (s *statelessWriter) Close() error { } s.closed = true // Emit EOF block - return StatelessDeflate(s.dst, nil, true) + return StatelessDeflate(s.dst, nil, true, nil) } func (s *statelessWriter) Write(p []byte) (n int, err error) { - err = StatelessDeflate(s.dst, p, false) + err = StatelessDeflate(s.dst, p, false, nil) if err != nil { return 0, err } @@ -49,11 +52,27 @@ func NewStatelessWriter(dst io.Writer) io.WriteCloser { return &statelessWriter{dst: dst} } +// bitWriterPool contains bit writers that can be reused. +var bitWriterPool = sync.Pool{ + New: func() interface{} { + return newHuffmanBitWriter(nil) + }, +} + // StatelessDeflate allows to compress directly to a Writer without retaining state. // When returning everything will be flushed. -func StatelessDeflate(out io.Writer, in []byte, eof bool) error { +// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block. +// Longer dictionaries will be truncated and will still produce valid output. +// Sending nil dictionary is perfectly fine. +func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { var dst tokens - bw := newHuffmanBitWriter(out) + bw := bitWriterPool.Get().(*huffmanBitWriter) + bw.reset(out) + defer func() { + // don't keep a reference to our output + bw.reset(nil) + bitWriterPool.Put(bw) + }() if eof && len(in) == 0 { // Just write an EOF block. // Could be faster... @@ -62,35 +81,53 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool) error { return bw.err } + // Truncate dict + if len(dict) > maxStatelessDict { + dict = dict[len(dict)-maxStatelessDict:] + } + for len(in) > 0 { todo := in - if len(todo) > maxStatelessBlock { - todo = todo[:maxStatelessBlock] + if len(todo) > maxStatelessBlock-len(dict) { + todo = todo[:maxStatelessBlock-len(dict)] } in = in[len(todo):] + uncompressed := todo + if len(dict) > 0 { + // combine dict and source + bufLen := len(todo) + len(dict) + combined := make([]byte, bufLen) + copy(combined, dict) + copy(combined[len(dict):], todo) + todo = combined + } // Compress - statelessEnc(&dst, todo) + statelessEnc(&dst, todo, int16(len(dict))) isEof := eof && len(in) == 0 if dst.n == 0 { - bw.writeStoredHeader(len(todo), isEof) + bw.writeStoredHeader(len(uncompressed), isEof) if bw.err != nil { return bw.err } - bw.writeBytes(todo) - } else if int(dst.n) > len(todo)-len(todo)>>4 { + bw.writeBytes(uncompressed) + } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { // If we removed less than 1/16th, huffman compress the block. - bw.writeBlockHuff(isEof, todo, false) + bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) } else { - bw.writeBlockDynamic(&dst, isEof, todo, false) + bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) + } + if len(in) > 0 { + // Retain a dict if we have more + dict = todo[len(todo)-maxStatelessDict:] + dst.Reset() } if bw.err != nil { return bw.err } - dst.Reset() } if !eof { - // Align. + // Align, only a stored block can do that. bw.writeStoredHeader(0, false) } bw.flush() @@ -116,7 +153,7 @@ func load6416(b []byte, i int16) uint64 { uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 } -func statelessEnc(dst *tokens, src []byte) { +func statelessEnc(dst *tokens, src []byte, startAt int16) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin @@ -130,15 +167,23 @@ func statelessEnc(dst *tokens, src []byte) { // This check isn't in the Snappy implementation, but there, the caller // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { + if len(src)-int(startAt) < minNonLiteralBlockSize { // We do not fill the token table. // This will be picked up by caller. - dst.n = uint16(len(src)) + dst.n = 0 return } + // Index until startAt + if startAt > 0 { + cv := load3232(src, 0) + for i := int16(0); i < startAt; i++ { + table[hashSL(cv)] = tableEntry{offset: i} + cv = (cv >> 8) | (uint32(src[i+4]) << 24) + } + } - s := int16(1) - nextEmit := int16(0) + s := startAt + 1 + nextEmit := startAt // sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go index b3df0d8941..f9abf606d6 100644 --- a/vendor/github.com/klauspost/compress/flate/token.go +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -184,9 +184,7 @@ func (t *tokens) indexTokens(in []token) { t.Reset() for _, tok := range in { if tok < matchType { - t.tokens[t.n] = tok - t.litHist[tok]++ - t.n++ + t.AddLiteral(tok.literal()) continue } t.AddMatch(uint32(tok.length()), tok.offset()) @@ -211,50 +209,60 @@ func (t *tokens) AddLiteral(lit byte) { t.nLits++ } +// from https://stackoverflow.com/a/28730362 +func mFastLog2(val float32) float32 { + ux := int32(math.Float32bits(val)) + log2 := (float32)(((ux >> 23) & 255) - 128) + ux &= -0x7f800001 + ux += 127 << 23 + uval := math.Float32frombits(uint32(ux)) + log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 + return log2 +} + // EstimatedBits will return an minimum size estimated by an *optimal* // compression of the block. // The size of the block func (t *tokens) EstimatedBits() int { - shannon := float64(0) + shannon := float32(0) bits := int(0) nMatches := 0 if t.nLits > 0 { - invTotal := 1.0 / float64(t.nLits) + invTotal := 1.0 / float32(t.nLits) for _, v := range t.litHist[:] { if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n } } // Just add 15 for EOB shannon += 15 - for _, v := range t.extraHist[1 : literalCount-256] { + for i, v := range t.extraHist[1 : literalCount-256] { if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) - bits += int(lengthExtraBits[v&31]) * int(v) + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + bits += int(lengthExtraBits[i&31]) * int(v) nMatches += int(v) } } } if nMatches > 0 { - invTotal := 1.0 / float64(nMatches) - for _, v := range t.offHist[:offsetCodeCount] { + invTotal := 1.0 / float32(nMatches) + for i, v := range t.offHist[:offsetCodeCount] { if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) - bits += int(offsetExtraBits[v&31]) * int(n) + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + bits += int(offsetExtraBits[i&31]) * int(v) } } } - return int(shannon) + bits } // AddMatch adds a match to the tokens. // This function is very sensitive to inlining and right on the border. func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { - if debugDecode { + if debugDeflate { if xlength >= maxMatchLength+baseMatchLength { panic(fmt.Errorf("invalid length: %v", xlength)) } @@ -273,7 +281,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { // AddMatchLong adds a match to the tokens, potentially longer than max match length. // Length should NOT have the base subtracted, only offset should. func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { - if debugDecode { + if debugDeflate { if xoffset >= maxMatchOffset+baseMatchOffset { panic(fmt.Errorf("invalid offset: %v", xoffset)) } diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go index ed0cc148f8..6794cf48f4 100644 --- a/vendor/github.com/klauspost/compress/gzip/gzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -207,7 +207,7 @@ func (z *Writer) Write(p []byte) (int, error) { z.size += uint32(len(p)) z.digest = crc32.Update(z.digest, crc32.IEEETable, p) if z.level == StatelessCompression { - return len(p), flate.StatelessDeflate(z.w, p, false) + return len(p), flate.StatelessDeflate(z.w, p, false, nil) } n, z.err = z.compressor.Write(p) return n, z.err @@ -255,7 +255,7 @@ func (z *Writer) Close() error { } } if z.level == StatelessCompression { - z.err = flate.StatelessDeflate(z.w, nil, true) + z.err = flate.StatelessDeflate(z.w, nil, true, nil) } else { z.err = z.compressor.Close() } -- cgit v1.2.3