1 files changed, 185 insertions, 710 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 76e9682f7e..20c94f5968 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -50,8 +50,6 @@ const (
 	skipNever = math.MaxInt32
 )
 
-var useSSE42 bool
-
 type compressionLevel struct {
 	good, lazy, nice, chain, fastSkipHashing, level int
 }
@@ -77,16 +75,14 @@ var levels = []compressionLevel{
 	{32, 258, 258, 4096, skipNever, 9},
 }
 
-type compressor struct {
-	compressionLevel
-
-	w          *huffmanBitWriter
-	bulkHasher func([]byte, []uint32)
-
-	// compression algorithm
-	fill func(*compressor, []byte) int // copy data to window
-	step func(*compressor)             // process window
-	sync bool                          // requesting flush
+// advancedState contains state for the advanced levels, with bigger hash tables, etc.
+type advancedState struct {
+	// deflate state
+	length         int
+	offset         int
+	hash           uint32
+	maxInsertIndex int
+	ii             uint16 // position of last match, intended to overflow to reset.
 
 	// Input hash chains
 	// hashHead[hashValue] contains the largest inputIndex with the specified hash value
@@ -99,55 +95,63 @@ type compressor struct {
 	hashOffset int
 
 	// input window: unprocessed data is window[index:windowEnd]
-	index         int
+	index     int
+	hashMatch [maxMatchLength + minMatchLength]uint32
+}
+
+type compressor struct {
+	compressionLevel
+
+	w *huffmanBitWriter
+
+	// compression algorithm
+	fill func(*compressor, []byte) int // copy data to window
+	step func(*compressor)             // process window
+	sync bool                          // requesting flush
+
 	window        []byte
 	windowEnd     int
 	blockStart    int  // window index where current tokens start
 	byteAvailable bool // if true, still need to process window[index-1].
+	err           error
 
 	// queued output tokens
 	tokens tokens
-
-	// deflate state
-	length         int
-	offset         int
-	hash           uint32
-	maxInsertIndex int
-	err            error
-	ii             uint16 // position of last match, intended to overflow to reset.
-
-	snap      snappyEnc
-	hashMatch [maxMatchLength + minMatchLength]uint32
+	fast   fastEnc
+	state  *advancedState
 }
 
 func (d *compressor) fillDeflate(b []byte) int {
-	if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
+	s := d.state
+	if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
 		// shift the window by windowSize
 		copy(d.window[:], d.window[windowSize:2*windowSize])
-		d.index -= windowSize
+		s.index -= windowSize
 		d.windowEnd -= windowSize
 		if d.blockStart >= windowSize {
 			d.blockStart -= windowSize
 		} else {
 			d.blockStart = math.MaxInt32
 		}
-		d.hashOffset += windowSize
-		if d.hashOffset > maxHashOffset {
-			delta := d.hashOffset - 1
-			d.hashOffset -= delta
-			d.chainHead -= delta
-			for i, v := range d.hashPrev {
+		s.hashOffset += windowSize
+		if s.hashOffset > maxHashOffset {
+			delta := s.hashOffset - 1
+			s.hashOffset -= delta
+			s.chainHead -= delta
+			// Iterate over slices instead of arrays to avoid copying
+			// the entire table onto the stack (Issue #18625).
+			for i, v := range s.hashPrev[:] {
 				if int(v) > delta {
-					d.hashPrev[i] = uint32(int(v) - delta)
+					s.hashPrev[i] = uint32(int(v) - delta)
 				} else {
-					d.hashPrev[i] = 0
+					s.hashPrev[i] = 0
 				}
 			}
-			for i, v := range d.hashHead {
+			for i, v := range s.hashHead[:] {
 				if int(v) > delta {
-					d.hashHead[i] = uint32(int(v) - delta)
+					s.hashHead[i] = uint32(int(v) - delta)
 				} else {
-					d.hashHead[i] = 0
+					s.hashHead[i] = 0
 				}
 			}
 		}
@@ -157,14 +161,14 @@ func (d *compressor) fillDeflate(b []byte) int {
 	return n
 }
 
-func (d *compressor) writeBlock(tok tokens, index int, eof bool) error {
+func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
 	if index > 0 || eof {
 		var window []byte
 		if d.blockStart <= index {
 			window = d.window[d.blockStart:index]
 		}
 		d.blockStart = index
-		d.w.writeBlock(tok.tokens[:tok.n], eof, window)
+		d.w.writeBlock(tok, eof, window)
 		return d.w.err
 	}
 	return nil
@@ -173,20 +177,20 @@ func (d *compressor) writeBlock(tok tokens, index int, eof bool) error {
 // writeBlockSkip writes the current block and uses the number of tokens
 // to determine if the block should be stored on no matches, or
 // only huffman encoded.
-func (d *compressor) writeBlockSkip(tok tokens, index int, eof bool) error {
+func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
 	if index > 0 || eof {
 		if d.blockStart <= index {
 			window := d.window[d.blockStart:index]
 			// If we removed less than a 64th of all literals
 			// we huffman compress the block.
 			if int(tok.n) > len(window)-int(tok.n>>6) {
-				d.w.writeBlockHuff(eof, window)
+				d.w.writeBlockHuff(eof, window, d.sync)
 			} else {
 				// Write a dynamic huffman block.
-				d.w.writeBlockDynamic(tok.tokens[:tok.n], eof, window)
+				d.w.writeBlockDynamic(tok, eof, window, d.sync)
 			}
 		} else {
-			d.w.writeBlock(tok.tokens[:tok.n], eof, nil)
+			d.w.writeBlock(tok, eof, nil)
 		}
 		d.blockStart = index
 		return d.w.err
@@ -201,10 +205,19 @@ func (d *compressor) writeBlockSkip(tok tokens, index int, eof bool) error {
 func (d *compressor) fillWindow(b []byte) {
 	// Do not fill window if we are in store-only mode,
 	// use constant or Snappy compression.
-	switch d.compressionLevel.level {
-	case 0, 1, 2:
+	if d.level == 0 {
+		return
+	}
+	if d.fast != nil {
+		// encode the last data, but discard the result
+		if len(b) > maxMatchOffset {
+			b = b[len(b)-maxMatchOffset:]
+		}
+		d.fast.Encode(&d.tokens, b)
+		d.tokens.Reset()
 		return
 	}
+	s := d.state
 	// If we are given too much, cut it.
 	if len(b) > windowSize {
 		b = b[len(b)-windowSize:]
@@ -227,28 +240,28 @@ func (d *compressor) fillWindow(b []byte) {
 			continue
 		}
 
-		dst := d.hashMatch[:dstSize]
-		d.bulkHasher(tocheck, dst)
+		dst := s.hashMatch[:dstSize]
+		bulkHash4(tocheck, dst)
 		var newH uint32
 		for i, val := range dst {
 			di := i + startindex
 			newH = val & hashMask
 			// Get previous value with the same hash.
 			// Our chain should point to the previous value.
-			d.hashPrev[di&windowMask] = d.hashHead[newH]
+			s.hashPrev[di&windowMask] = s.hashHead[newH]
 			// Set the head of the hash chain to us.
-			d.hashHead[newH] = uint32(di + d.hashOffset)
+			s.hashHead[newH] = uint32(di + s.hashOffset)
 		}
-		d.hash = newH
+		s.hash = newH
 	}
 	// Update window information.
 	d.windowEnd += n
-	d.index = n
+	s.index = n
 }
 
 // Try to find a match starting at index whose length is greater than prevSize.
 // We only look at chainCount possibilities before giving up.
-// pos = d.index, prevHead = d.chainHead-d.hashOffset, prevLength=minMatchLength-1, lookahead
+// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
 	minMatchLook := maxMatchLength
 	if lookahead < minMatchLook {
@@ -276,7 +289,7 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
 
 	for i := prevHead; tries > 0; tries-- {
 		if wEnd == win[i+length] {
-			n := matchLen(win[i:], wPos, minMatchLook)
+			n := matchLen(win[i:i+minMatchLook], wPos)
 
 			if n > length && (n > minMatchLength || pos-i <= 4096) {
 				length = n
@@ -293,62 +306,7 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
 			// hashPrev[i & windowMask] has already been overwritten, so stop now.
 			break
 		}
-		i = int(d.hashPrev[i&windowMask]) - d.hashOffset
-		if i < minIndex || i < 0 {
-			break
-		}
-	}
-	return
-}
-
-// Try to find a match starting at index whose length is greater than prevSize.
-// We only look at chainCount possibilities before giving up.
-// pos = d.index, prevHead = d.chainHead-d.hashOffset, prevLength=minMatchLength-1, lookahead
-func (d *compressor) findMatchSSE(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
-	minMatchLook := maxMatchLength
-	if lookahead < minMatchLook {
-		minMatchLook = lookahead
-	}
-
-	win := d.window[0 : pos+minMatchLook]
-
-	// We quit when we get a match that's at least nice long
-	nice := len(win) - pos
-	if d.nice < nice {
-		nice = d.nice
-	}
-
-	// If we've got a match that's good enough, only look in 1/4 the chain.
-	tries := d.chain
-	length = prevLength
-	if length >= d.good {
-		tries >>= 2
-	}
-
-	wEnd := win[pos+length]
-	wPos := win[pos:]
-	minIndex := pos - windowSize
-
-	for i := prevHead; tries > 0; tries-- {
-		if wEnd == win[i+length] {
-			n := matchLenSSE4(win[i:], wPos, minMatchLook)
-
-			if n > length && (n > minMatchLength || pos-i <= 4096) {
-				length = n
-				offset = pos - i
-				ok = true
-				if n >= nice {
-					// The match is good enough that we don't try to find a better one.
-					break
-				}
-				wEnd = win[pos+n]
-			}
-		}
-		if i == minIndex {
-			// hashPrev[i & windowMask] has already been overwritten, so stop now.
-			break
-		}
-		i = int(d.hashPrev[i&windowMask]) - d.hashOffset
+		i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
 		if i < minIndex || i < 0 {
 			break
 		}
@@ -364,293 +322,139 @@ func (d *compressor) writeStoredBlock(buf []byte) error {
 	return d.w.err
 }
 
-const hashmul = 0x1e35a7bd
-
 // hash4 returns a hash representation of the first 4 bytes
 // of the supplied slice.
 // The caller must ensure that len(b) >= 4.
 func hash4(b []byte) uint32 {
-	return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits)
+	b = b[:4]
+	return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits)
 }
 
 // bulkHash4 will compute hashes using the same
 // algorithm as hash4
 func bulkHash4(b []byte, dst []uint32) {
-	if len(b) < minMatchLength {
+	if len(b) < 4 {
 		return
 	}
 	hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
-	dst[0] = (hb * hashmul) >> (32 - hashBits)
-	end := len(b) - minMatchLength + 1
+	dst[0] = hash4u(hb, hashBits)
+	end := len(b) - 4 + 1
 	for i := 1; i < end; i++ {
 		hb = (hb << 8) | uint32(b[i+3])
-		dst[i] = (hb * hashmul) >> (32 - hashBits)
+		dst[i] = hash4u(hb, hashBits)
 	}
 }
 
-// matchLen returns the number of matching bytes in a and b
-// up to length 'max'. Both slices must be at least 'max'
-// bytes in size.
-func matchLen(a, b []byte, max int) int {
-	a = a[:max]
-	b = b[:len(a)]
-	for i, av := range a {
-		if b[i] != av {
-			return i
-		}
-	}
-	return max
-}
-
 func (d *compressor) initDeflate() {
 	d.window = make([]byte, 2*windowSize)
-	d.hashOffset = 1
-	d.length = minMatchLength - 1
-	d.offset = 0
 	d.byteAvailable = false
-	d.index = 0
-	d.hash = 0
-	d.chainHead = -1
-	d.bulkHasher = bulkHash4
-	if useSSE42 {
-		d.bulkHasher = crc32sseAll
-	}
-}
-
-// Assumes that d.fastSkipHashing != skipNever,
-// otherwise use deflateLazy
-func (d *compressor) deflate() {
-
-	// Sanity enables additional runtime tests.
-	// It's intended to be used during development
-	// to supplement the currently ad-hoc unit tests.
-	const sanity = false
-
-	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+	d.err = nil
+	if d.state == nil {
 		return
 	}
-
-	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if d.index < d.maxInsertIndex {
-		d.hash = hash4(d.window[d.index : d.index+minMatchLength])
-	}
-
-	for {
-		if sanity && d.index > d.windowEnd {
-			panic("index > windowEnd")
-		}
-		lookahead := d.windowEnd - d.index
-		if lookahead < minMatchLength+maxMatchLength {
-			if !d.sync {
-				return
-			}
-			if sanity && d.index > d.windowEnd {
-				panic("index > windowEnd")
-			}
-			if lookahead == 0 {
-				if d.tokens.n > 0 {
-					if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-				return
-			}
-		}
-		if d.index < d.maxInsertIndex {
-			// Update the hash
-			d.hash = hash4(d.window[d.index : d.index+minMatchLength])
-			ch := d.hashHead[d.hash&hashMask]
-			d.chainHead = int(ch)
-			d.hashPrev[d.index&windowMask] = ch
-			d.hashHead[d.hash&hashMask] = uint32(d.index + d.hashOffset)
-		}
-		d.length = minMatchLength - 1
-		d.offset = 0
-		minIndex := d.index - windowSize
-		if minIndex < 0 {
-			minIndex = 0
-		}
-
-		if d.chainHead-d.hashOffset >= minIndex && lookahead > minMatchLength-1 {
-			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
-				d.length = newLength
-				d.offset = newOffset
-			}
-		}
-		if d.length >= minMatchLength {
-			d.ii = 0
-			// There was a match at the previous step, and the current match is
-			// not better. Output the previous match.
-			// "d.length-3" should NOT be "d.length-minMatchLength", since the format always assume 3
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize))
-			d.tokens.n++
-			// Insert in the hash table all strings up to the end of the match.
-			// index and index-1 are already inserted. If there is not enough
-			// lookahead, the last two strings are not inserted into the hash
-			// table.
-			if d.length <= d.fastSkipHashing {
-				var newIndex int
-				newIndex = d.index + d.length
-				// Calculate missing hashes
-				end := newIndex
-				if end > d.maxInsertIndex {
-					end = d.maxInsertIndex
-				}
-				end += minMatchLength - 1
-				startindex := d.index + 1
-				if startindex > d.maxInsertIndex {
-					startindex = d.maxInsertIndex
-				}
-				tocheck := d.window[startindex:end]
-				dstSize := len(tocheck) - minMatchLength + 1
-				if dstSize > 0 {
-					dst := d.hashMatch[:dstSize]
-					bulkHash4(tocheck, dst)
-					var newH uint32
-					for i, val := range dst {
-						di := i + startindex
-						newH = val & hashMask
-						// Get previous value with the same hash.
-						// Our chain should point to the previous value.
-						d.hashPrev[di&windowMask] = d.hashHead[newH]
-						// Set the head of the hash chain to us.
-						d.hashHead[newH] = uint32(di + d.hashOffset)
-					}
-					d.hash = newH
-				}
-				d.index = newIndex
-			} else {
-				// For matches this long, we don't bother inserting each individual
-				// item into the table.
-				d.index += d.length
-				if d.index < d.maxInsertIndex {
-					d.hash = hash4(d.window[d.index : d.index+minMatchLength])
-				}
-			}
-			if d.tokens.n == maxFlateBlockTokens {
-				// The block includes the current character
-				if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
-					return
-				}
-				d.tokens.n = 0
-			}
-		} else {
-			d.ii++
-			end := d.index + int(d.ii>>uint(d.fastSkipHashing)) + 1
-			if end > d.windowEnd {
-				end = d.windowEnd
-			}
-			for i := d.index; i < end; i++ {
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
-				d.tokens.n++
-				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-			}
-			d.index = end
-		}
-	}
+	s := d.state
+	s.index = 0
+	s.hashOffset = 1
+	s.length = minMatchLength - 1
+	s.offset = 0
+	s.hash = 0
+	s.chainHead = -1
 }
 
 // deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
 // meaning it always has lazy matching on.
 func (d *compressor) deflateLazy() {
+	s := d.state
 	// Sanity enables additional runtime tests.
 	// It's intended to be used during development
 	// to supplement the currently ad-hoc unit tests.
 	const sanity = false
 
-	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
 		return
 	}
 
-	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if d.index < d.maxInsertIndex {
-		d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if s.index < s.maxInsertIndex {
+		s.hash = hash4(d.window[s.index : s.index+minMatchLength])
 	}
 
 	for {
-		if sanity && d.index > d.windowEnd {
+		if sanity && s.index > d.windowEnd {
 			panic("index > windowEnd")
 		}
-		lookahead := d.windowEnd - d.index
+		lookahead := d.windowEnd - s.index
 		if lookahead < minMatchLength+maxMatchLength {
 			if !d.sync {
 				return
 			}
-			if sanity && d.index > d.windowEnd {
+			if sanity && s.index > d.windowEnd {
 				panic("index > windowEnd")
 			}
 			if lookahead == 0 {
 				// Flush current output block if any.
 				if d.byteAvailable {
 					// There is still one pending token that needs to be flushed
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
-					d.tokens.n++
+					d.tokens.AddLiteral(d.window[s.index-1])
 					d.byteAvailable = false
 				}
 				if d.tokens.n > 0 {
-					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+					if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 						return
 					}
-					d.tokens.n = 0
+					d.tokens.Reset()
 				}
 				return
 			}
 		}
-		if d.index < d.maxInsertIndex {
+		if s.index < s.maxInsertIndex {
 			// Update the hash
-			d.hash = hash4(d.window[d.index : d.index+minMatchLength])
-			ch := d.hashHead[d.hash&hashMask]
-			d.chainHead = int(ch)
-			d.hashPrev[d.index&windowMask] = ch
-			d.hashHead[d.hash&hashMask] = uint32(d.index + d.hashOffset)
-		}
-		prevLength := d.length
-		prevOffset := d.offset
-		d.length = minMatchLength - 1
-		d.offset = 0
-		minIndex := d.index - windowSize
+			s.hash = hash4(d.window[s.index : s.index+minMatchLength])
+			ch := s.hashHead[s.hash&hashMask]
+			s.chainHead = int(ch)
+			s.hashPrev[s.index&windowMask] = ch
+			s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
+		}
+		prevLength := s.length
+		prevOffset := s.offset
+		s.length = minMatchLength - 1
+		s.offset = 0
+		minIndex := s.index - windowSize
 		if minIndex < 0 {
 			minIndex = 0
 		}
 
-		if d.chainHead-d.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
-			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
-				d.length = newLength
-				d.offset = newOffset
+		if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
+			if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
+				s.length = newLength
+				s.offset = newOffset
 			}
 		}
-		if prevLength >= minMatchLength && d.length <= prevLength {
+		if prevLength >= minMatchLength && s.length <= prevLength {
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
-			d.tokens.n++
+			d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
 
 			// Insert in the hash table all strings up to the end of the match.
 			// index and index-1 are already inserted. If there is not enough
 			// lookahead, the last two strings are not inserted into the hash
 			// table.
 			var newIndex int
-			newIndex = d.index + prevLength - 1
+			newIndex = s.index + prevLength - 1
 			// Calculate missing hashes
 			end := newIndex
-			if end > d.maxInsertIndex {
-				end = d.maxInsertIndex
+			if end > s.maxInsertIndex {
+				end = s.maxInsertIndex
 			}
 			end += minMatchLength - 1
-			startindex := d.index + 1
-			if startindex > d.maxInsertIndex {
-				startindex = d.maxInsertIndex
+			startindex := s.index + 1
+			if startindex > s.maxInsertIndex {
+				startindex = s.maxInsertIndex
 			}
 			tocheck := d.window[startindex:end]
 			dstSize := len(tocheck) - minMatchLength + 1
 			if dstSize > 0 {
-				dst := d.hashMatch[:dstSize]
+				dst := s.hashMatch[:dstSize]
 				bulkHash4(tocheck, dst)
 				var newH uint32
 				for i, val := range dst {
@@ -658,390 +462,71 @@ func (d *compressor) deflateLazy() {
 					newH = val & hashMask
 					// Get previous value with the same hash.
 					// Our chain should point to the previous value.
-					d.hashPrev[di&windowMask] = d.hashHead[newH]
-					// Set the head of the hash chain to us.
-					d.hashHead[newH] = uint32(di + d.hashOffset)
-				}
-				d.hash = newH
-			}
-
-			d.index = newIndex
-			d.byteAvailable = false
-			d.length = minMatchLength - 1
-			if d.tokens.n == maxFlateBlockTokens {
-				// The block includes the current character
-				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
-					return
-				}
-				d.tokens.n = 0
-			}
-		} else {
-			// Reset, if we got a match this run.
-			if d.length >= minMatchLength {
-				d.ii = 0
-			}
-			// We have a byte waiting. Emit it.
-			if d.byteAvailable {
-				d.ii++
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
-				d.tokens.n++
-				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-				d.index++
-
-				// If we have a long run of no matches, skip additional bytes
-				// Resets when d.ii overflows after 64KB.
-				if d.ii > 31 {
-					n := int(d.ii >> 5)
-					for j := 0; j < n; j++ {
-						if d.index >= d.windowEnd-1 {
-							break
-						}
-
-						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
-						d.tokens.n++
-						if d.tokens.n == maxFlateBlockTokens {
-							if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
-								return
-							}
-							d.tokens.n = 0
-						}
-						d.index++
-					}
-					// Flush last byte
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
-					d.tokens.n++
-					d.byteAvailable = false
-					// d.length = minMatchLength - 1 // not needed, since d.ii is reset above, so it should never be > minMatchLength
-					if d.tokens.n == maxFlateBlockTokens {
-						if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
-							return
-						}
-						d.tokens.n = 0
-					}
-				}
-			} else {
-				d.index++
-				d.byteAvailable = true
-			}
-		}
-	}
-}
-
-// Assumes that d.fastSkipHashing != skipNever,
-// otherwise use deflateLazySSE
-func (d *compressor) deflateSSE() {
-
-	// Sanity enables additional runtime tests.
-	// It's intended to be used during development
-	// to supplement the currently ad-hoc unit tests.
-	const sanity = false
-
-	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
-		return
-	}
-
-	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if d.index < d.maxInsertIndex {
-		d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
-	}
-
-	for {
-		if sanity && d.index > d.windowEnd {
-			panic("index > windowEnd")
-		}
-		lookahead := d.windowEnd - d.index
-		if lookahead < minMatchLength+maxMatchLength {
-			if !d.sync {
-				return
-			}
-			if sanity && d.index > d.windowEnd {
-				panic("index > windowEnd")
-			}
-			if lookahead == 0 {
-				if d.tokens.n > 0 {
-					if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-				return
-			}
-		}
-		if d.index < d.maxInsertIndex {
-			// Update the hash
-			d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
-			ch := d.hashHead[d.hash]
-			d.chainHead = int(ch)
-			d.hashPrev[d.index&windowMask] = ch
-			d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
-		}
-		d.length = minMatchLength - 1
-		d.offset = 0
-		minIndex := d.index - windowSize
-		if minIndex < 0 {
-			minIndex = 0
-		}
-
-		if d.chainHead-d.hashOffset >= minIndex && lookahead > minMatchLength-1 {
-			if newLength, newOffset, ok := d.findMatchSSE(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
-				d.length = newLength
-				d.offset = newOffset
-			}
-		}
-		if d.length >= minMatchLength {
-			d.ii = 0
-			// There was a match at the previous step, and the current match is
-			// not better. Output the previous match.
-			// "d.length-3" should NOT be "d.length-minMatchLength", since the format always assume 3
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize))
-			d.tokens.n++
-			// Insert in the hash table all strings up to the end of the match.
-			// index and index-1 are already inserted. If there is not enough
-			// lookahead, the last two strings are not inserted into the hash
-			// table.
-			if d.length <= d.fastSkipHashing {
-				var newIndex int
-				newIndex = d.index + d.length
-				// Calculate missing hashes
-				end := newIndex
-				if end > d.maxInsertIndex {
-					end = d.maxInsertIndex
-				}
-				end += minMatchLength - 1
-				startindex := d.index + 1
-				if startindex > d.maxInsertIndex {
-					startindex = d.maxInsertIndex
-				}
-				tocheck := d.window[startindex:end]
-				dstSize := len(tocheck) - minMatchLength + 1
-				if dstSize > 0 {
-					dst := d.hashMatch[:dstSize]
-
-					crc32sseAll(tocheck, dst)
-					var newH uint32
-					for i, val := range dst {
-						di := i + startindex
-						newH = val & hashMask
-						// Get previous value with the same hash.
-						// Our chain should point to the previous value.
-						d.hashPrev[di&windowMask] = d.hashHead[newH]
-						// Set the head of the hash chain to us.
-						d.hashHead[newH] = uint32(di + d.hashOffset)
-					}
-					d.hash = newH
-				}
-				d.index = newIndex
-			} else {
-				// For matches this long, we don't bother inserting each individual
-				// item into the table.
-				d.index += d.length
-				if d.index < d.maxInsertIndex {
-					d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
-				}
-			}
-			if d.tokens.n == maxFlateBlockTokens {
-				// The block includes the current character
-				if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
-					return
-				}
-				d.tokens.n = 0
-			}
-		} else {
-			d.ii++
-			end := d.index + int(d.ii>>5) + 1
-			if end > d.windowEnd {
-				end = d.windowEnd
-			}
-			for i := d.index; i < end; i++ {
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
-				d.tokens.n++
-				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-			}
-			d.index = end
-		}
-	}
-}
-
-// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
-// meaning it always has lazy matching on.
-func (d *compressor) deflateLazySSE() {
-	// Sanity enables additional runtime tests.
-	// It's intended to be used during development
-	// to supplement the currently ad-hoc unit tests.
-	const sanity = false
-
-	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
-		return
-	}
-
-	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if d.index < d.maxInsertIndex {
-		d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
-	}
-
-	for {
-		if sanity && d.index > d.windowEnd {
-			panic("index > windowEnd")
-		}
-		lookahead := d.windowEnd - d.index
-		if lookahead < minMatchLength+maxMatchLength {
-			if !d.sync {
-				return
-			}
-			if sanity && d.index > d.windowEnd {
-				panic("index > windowEnd")
-			}
-			if lookahead == 0 {
-				// Flush current output block if any.
-				if d.byteAvailable {
-					// There is still one pending token that needs to be flushed
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
-					d.tokens.n++
-					d.byteAvailable = false
-				}
-				if d.tokens.n > 0 {
-					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-				return
-			}
-		}
-		if d.index < d.maxInsertIndex {
-			// Update the hash
-			d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
-			ch := d.hashHead[d.hash]
-			d.chainHead = int(ch)
-			d.hashPrev[d.index&windowMask] = ch
-			d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
-		}
-		prevLength := d.length
-		prevOffset := d.offset
-		d.length = minMatchLength - 1
-		d.offset = 0
-		minIndex := d.index - windowSize
-		if minIndex < 0 {
-			minIndex = 0
-		}
-
-		if d.chainHead-d.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
-			if newLength, newOffset, ok := d.findMatchSSE(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
-				d.length = newLength
-				d.offset = newOffset
-			}
-		}
-		if prevLength >= minMatchLength && d.length <= prevLength {
-			// There was a match at the previous step, and the current match is
-			// not better. Output the previous match.
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
-			d.tokens.n++
-
-			// Insert in the hash table all strings up to the end of the match.
-			// index and index-1 are already inserted. If there is not enough
-			// lookahead, the last two strings are not inserted into the hash
-			// table.
-			var newIndex int
-			newIndex = d.index + prevLength - 1
-			// Calculate missing hashes
-			end := newIndex
-			if end > d.maxInsertIndex {
-				end = d.maxInsertIndex
-			}
-			end += minMatchLength - 1
-			startindex := d.index + 1
-			if startindex > d.maxInsertIndex {
-				startindex = d.maxInsertIndex
-			}
-			tocheck := d.window[startindex:end]
-			dstSize := len(tocheck) - minMatchLength + 1
-			if dstSize > 0 {
-				dst := d.hashMatch[:dstSize]
-				crc32sseAll(tocheck, dst)
-				var newH uint32
-				for i, val := range dst {
-					di := i + startindex
-					newH = val & hashMask
-					// Get previous value with the same hash.
-					// Our chain should point to the previous value.
-					d.hashPrev[di&windowMask] = d.hashHead[newH]
+					s.hashPrev[di&windowMask] = s.hashHead[newH]
 					// Set the head of the hash chain to us.
-					d.hashHead[newH] = uint32(di + d.hashOffset)
+					s.hashHead[newH] = uint32(di + s.hashOffset)
 				}
-				d.hash = newH
+				s.hash = newH
 			}
 
-			d.index = newIndex
+			s.index = newIndex
 			d.byteAvailable = false
-			d.length = minMatchLength - 1
+			s.length = minMatchLength - 1
 			if d.tokens.n == maxFlateBlockTokens {
 				// The block includes the current character
-				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+				if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 					return
 				}
-				d.tokens.n = 0
+				d.tokens.Reset()
 			}
 		} else {
 			// Reset, if we got a match this run.
-			if d.length >= minMatchLength {
-				d.ii = 0
+			if s.length >= minMatchLength {
+				s.ii = 0
 			}
 			// We have a byte waiting. Emit it.
 			if d.byteAvailable {
-				d.ii++
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
-				d.tokens.n++
+				s.ii++
+				d.tokens.AddLiteral(d.window[s.index-1])
 				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+					if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 						return
 					}
-					d.tokens.n = 0
+					d.tokens.Reset()
 				}
-				d.index++
+				s.index++
 
 				// If we have a long run of no matches, skip additional bytes
-				// Resets when d.ii overflows after 64KB.
-				if d.ii > 31 {
-					n := int(d.ii >> 6)
+				// Resets when s.ii overflows after 64KB.
+				if s.ii > 31 {
+					n := int(s.ii >> 5)
 					for j := 0; j < n; j++ {
-						if d.index >= d.windowEnd-1 {
+						if s.index >= d.windowEnd-1 {
 							break
 						}
 
-						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
-						d.tokens.n++
+						d.tokens.AddLiteral(d.window[s.index-1])
 						if d.tokens.n == maxFlateBlockTokens {
-							if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+							if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 								return
 							}
-							d.tokens.n = 0
+							d.tokens.Reset()
 						}
-						d.index++
+						s.index++
 					}
 					// Flush last byte
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
-					d.tokens.n++
+					d.tokens.AddLiteral(d.window[s.index-1])
 					d.byteAvailable = false
-					// d.length = minMatchLength - 1 // not needed, since d.ii is reset above, so it should never be > minMatchLength
+					// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
 					if d.tokens.n == maxFlateBlockTokens {
-						if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+						if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 							return
 						}
-						d.tokens.n = 0
+						d.tokens.Reset()
 					}
 				}
 			} else {
-				d.index++
+				s.index++
 				d.byteAvailable = true
 			}
 		}
@@ -1070,17 +555,17 @@ func (d *compressor) storeHuff() {
 	if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
 		return
 	}
-	d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+	d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
 	d.err = d.w.err
 	d.windowEnd = 0
 }
 
-// storeHuff will compress and store the currently added data,
+// storeFast will compress and store the currently added data,
 // if enough has been accumulated or we at the end of the stream.
 // Any error that occurred will be in d.err
-func (d *compressor) storeSnappy() {
+func (d *compressor) storeFast() {
 	// We only compress if we have maxStoreBlockSize.
-	if d.windowEnd < maxStoreBlockSize {
+	if d.windowEnd < len(d.window) {
 		if !d.sync {
 			return
 		}
@@ -1091,32 +576,30 @@ func (d *compressor) storeSnappy() {
 			}
 			if d.windowEnd <= 32 {
 				d.err = d.writeStoredBlock(d.window[:d.windowEnd])
-				d.tokens.n = 0
-				d.windowEnd = 0
 			} else {
-				d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+				d.w.writeBlockHuff(false, d.window[:d.windowEnd], true)
 				d.err = d.w.err
 			}
-			d.tokens.n = 0
+			d.tokens.Reset()
 			d.windowEnd = 0
-			d.snap.Reset()
+			d.fast.Reset()
 			return
 		}
 	}
 
-	d.snap.Encode(&d.tokens, d.window[:d.windowEnd])
+	d.fast.Encode(&d.tokens, d.window[:d.windowEnd])
 	// If we made zero matches, store the block as is.
-	if int(d.tokens.n) == d.windowEnd {
+	if d.tokens.n == 0 {
 		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
 		// If we removed less than 1/16th, huffman compress the block.
 	} else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) {
-		d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+		d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
 		d.err = d.w.err
 	} else {
-		d.w.writeBlockDynamic(d.tokens.tokens[:d.tokens.n], false, d.window[:d.windowEnd])
+		d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync)
 		d.err = d.w.err
 	}
-	d.tokens.n = 0
+	d.tokens.Reset()
 	d.windowEnd = 0
 }
 
@@ -1161,35 +644,26 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).store
 	case level == ConstantCompression:
+		d.w.logReusePenalty = uint(4)
 		d.window = make([]byte, maxStoreBlockSize)
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).storeHuff
-	case level >= 1 && level <= 4:
-		d.snap = newSnappy(level)
-		d.window = make([]byte, maxStoreBlockSize)
-		d.fill = (*compressor).fillBlock
-		d.step = (*compressor).storeSnappy
 	case level == DefaultCompression:
 		level = 5
 		fallthrough
-	case 5 <= level && level <= 9:
+	case level >= 1 && level <= 6:
+		d.w.logReusePenalty = uint(level + 1)
+		d.fast = newFastEnc(level)
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).storeFast
+	case 7 <= level && level <= 9:
+		d.w.logReusePenalty = uint(level)
+		d.state = &advancedState{}
 		d.compressionLevel = levels[level]
 		d.initDeflate()
 		d.fill = (*compressor).fillDeflate
-		if d.fastSkipHashing == skipNever {
-			if useSSE42 {
-				d.step = (*compressor).deflateLazySSE
-			} else {
-				d.step = (*compressor).deflateLazy
-			}
-		} else {
-			if useSSE42 {
-				d.step = (*compressor).deflateSSE
-			} else {
-				d.step = (*compressor).deflate
-
-			}
-		}
+		d.step = (*compressor).deflateLazy
 	default:
 		return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
 	}
@@ -1202,10 +676,10 @@ func (d *compressor) reset(w io.Writer) {
 	d.sync = false
 	d.err = nil
 	// We only need to reset a few things for Snappy.
-	if d.snap != nil {
-		d.snap.Reset()
+	if d.fast != nil {
+		d.fast.Reset()
 		d.windowEnd = 0
-		d.tokens.n = 0
+		d.tokens.Reset()
 		return
 	}
 	switch d.compressionLevel.chain {
@@ -1213,22 +687,23 @@ func (d *compressor) reset(w io.Writer) {
 		// level was NoCompression or ConstantCompresssion.
 		d.windowEnd = 0
 	default:
-		d.chainHead = -1
-		for i := range d.hashHead {
-			d.hashHead[i] = 0
+		s := d.state
+		s.chainHead = -1
+		for i := range s.hashHead {
+			s.hashHead[i] = 0
 		}
-		for i := range d.hashPrev {
-			d.hashPrev[i] = 0
+		for i := range s.hashPrev {
+			s.hashPrev[i] = 0
 		}
-		d.hashOffset = 1
-		d.index, d.windowEnd = 0, 0
+		s.hashOffset = 1
+		s.index, d.windowEnd = 0, 0
 		d.blockStart, d.byteAvailable = 0, false
-		d.tokens.n = 0
-		d.length = minMatchLength - 1
-		d.offset = 0
-		d.hash = 0
-		d.ii = 0
-		d.maxInsertIndex = 0
+		d.tokens.Reset()
+		s.length = minMatchLength - 1
+		s.offset = 0
+		s.hash = 0
+		s.ii = 0
+		s.maxInsertIndex = 0
 	}
 }