aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/golang
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/golang')
-rw-r--r--vendor/github.com/golang/snappy/.gitignore16
-rw-r--r--vendor/github.com/golang/snappy/README107
-rw-r--r--vendor/github.com/golang/snappy/decode.go24
-rw-r--r--vendor/github.com/golang/snappy/decode_amd64.go4
-rw-r--r--vendor/github.com/golang/snappy/decode_amd64.s38
-rw-r--r--vendor/github.com/golang/snappy/decode_other.go13
-rw-r--r--vendor/github.com/golang/snappy/encode.go186
-rw-r--r--vendor/github.com/golang/snappy/encode_amd64.go29
-rw-r--r--vendor/github.com/golang/snappy/encode_amd64.s730
-rw-r--r--vendor/github.com/golang/snappy/encode_other.go238
-rw-r--r--vendor/github.com/golang/snappy/snappy.go22
11 files changed, 1225 insertions, 182 deletions
diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore
new file mode 100644
index 0000000000..042091d9b3
--- /dev/null
+++ b/vendor/github.com/golang/snappy/.gitignore
@@ -0,0 +1,16 @@
+cmd/snappytool/snappytool
+testdata/bench
+
+# These explicitly listed benchmark data files are for an obsolete version of
+# snappy_test.go.
+testdata/alice29.txt
+testdata/asyoulik.txt
+testdata/fireworks.jpeg
+testdata/geo.protodata
+testdata/html
+testdata/html_x_4
+testdata/kppkn.gtb
+testdata/lcet10.txt
+testdata/paper-100k.pdf
+testdata/plrabn12.txt
+testdata/urls.10K
diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README
new file mode 100644
index 0000000000..cea12879a0
--- /dev/null
+++ b/vendor/github.com/golang/snappy/README
@@ -0,0 +1,107 @@
+The Snappy compression format in the Go programming language.
+
+To download and install from source:
+$ go get github.com/golang/snappy
+
+Unless otherwise noted, the Snappy-Go source files are distributed
+under the BSD-style license found in the LICENSE file.
+
+
+
+Benchmarks.
+
+The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
+or so files, the same set used by the C++ Snappy code (github.com/google/snappy
+and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
+3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
+
+"go test -test.bench=."
+
+_UFlat0-8 2.19GB/s ± 0% html
+_UFlat1-8 1.41GB/s ± 0% urls
+_UFlat2-8 23.5GB/s ± 2% jpg
+_UFlat3-8 1.91GB/s ± 0% jpg_200
+_UFlat4-8 14.0GB/s ± 1% pdf
+_UFlat5-8 1.97GB/s ± 0% html4
+_UFlat6-8 814MB/s ± 0% txt1
+_UFlat7-8 785MB/s ± 0% txt2
+_UFlat8-8 857MB/s ± 0% txt3
+_UFlat9-8 719MB/s ± 1% txt4
+_UFlat10-8 2.84GB/s ± 0% pb
+_UFlat11-8 1.05GB/s ± 0% gaviota
+
+_ZFlat0-8 1.04GB/s ± 0% html
+_ZFlat1-8 534MB/s ± 0% urls
+_ZFlat2-8 15.7GB/s ± 1% jpg
+_ZFlat3-8 740MB/s ± 3% jpg_200
+_ZFlat4-8 9.20GB/s ± 1% pdf
+_ZFlat5-8 991MB/s ± 0% html4
+_ZFlat6-8 379MB/s ± 0% txt1
+_ZFlat7-8 352MB/s ± 0% txt2
+_ZFlat8-8 396MB/s ± 1% txt3
+_ZFlat9-8 327MB/s ± 1% txt4
+_ZFlat10-8 1.33GB/s ± 1% pb
+_ZFlat11-8 605MB/s ± 1% gaviota
+
+
+
+"go test -test.bench=. -tags=noasm"
+
+_UFlat0-8 621MB/s ± 2% html
+_UFlat1-8 494MB/s ± 1% urls
+_UFlat2-8 23.2GB/s ± 1% jpg
+_UFlat3-8 1.12GB/s ± 1% jpg_200
+_UFlat4-8 4.35GB/s ± 1% pdf
+_UFlat5-8 609MB/s ± 0% html4
+_UFlat6-8 296MB/s ± 0% txt1
+_UFlat7-8 288MB/s ± 0% txt2
+_UFlat8-8 309MB/s ± 1% txt3
+_UFlat9-8 280MB/s ± 1% txt4
+_UFlat10-8 753MB/s ± 0% pb
+_UFlat11-8 400MB/s ± 0% gaviota
+
+_ZFlat0-8 409MB/s ± 1% html
+_ZFlat1-8 250MB/s ± 1% urls
+_ZFlat2-8 12.3GB/s ± 1% jpg
+_ZFlat3-8 132MB/s ± 0% jpg_200
+_ZFlat4-8 2.92GB/s ± 0% pdf
+_ZFlat5-8 405MB/s ± 1% html4
+_ZFlat6-8 179MB/s ± 1% txt1
+_ZFlat7-8 170MB/s ± 1% txt2
+_ZFlat8-8 189MB/s ± 1% txt3
+_ZFlat9-8 164MB/s ± 1% txt4
+_ZFlat10-8 479MB/s ± 1% pb
+_ZFlat11-8 270MB/s ± 1% gaviota
+
+
+
+For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
+are the numbers from C++ Snappy's
+
+make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
+
+BM_UFlat/0 2.4GB/s html
+BM_UFlat/1 1.4GB/s urls
+BM_UFlat/2 21.8GB/s jpg
+BM_UFlat/3 1.5GB/s jpg_200
+BM_UFlat/4 13.3GB/s pdf
+BM_UFlat/5 2.1GB/s html4
+BM_UFlat/6 1.0GB/s txt1
+BM_UFlat/7 959.4MB/s txt2
+BM_UFlat/8 1.0GB/s txt3
+BM_UFlat/9 864.5MB/s txt4
+BM_UFlat/10 2.9GB/s pb
+BM_UFlat/11 1.2GB/s gaviota
+
+BM_ZFlat/0 944.3MB/s html (22.31 %)
+BM_ZFlat/1 501.6MB/s urls (47.78 %)
+BM_ZFlat/2 14.3GB/s jpg (99.95 %)
+BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %)
+BM_ZFlat/4 8.3GB/s pdf (83.30 %)
+BM_ZFlat/5 903.5MB/s html4 (22.52 %)
+BM_ZFlat/6 336.0MB/s txt1 (57.88 %)
+BM_ZFlat/7 312.3MB/s txt2 (61.91 %)
+BM_ZFlat/8 353.1MB/s txt3 (54.99 %)
+BM_ZFlat/9 289.9MB/s txt4 (66.26 %)
+BM_ZFlat/10 1.2GB/s pb (19.68 %)
+BM_ZFlat/11 527.4MB/s gaviota (37.72 %)
diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go
index 7be590cee7..72efb0353d 100644
--- a/vendor/github.com/golang/snappy/decode.go
+++ b/vendor/github.com/golang/snappy/decode.go
@@ -18,7 +18,6 @@ var (
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("snappy: unsupported input")
- errUnsupportedCopy4Tag = errors.New("snappy: unsupported COPY_4 tag")
errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
)
@@ -46,7 +45,6 @@ func decodedLen(src []byte) (blockLen, headerLen int, err error) {
const (
decodeErrCodeCorrupt = 1
decodeErrCodeUnsupportedLiteralLength = 2
- decodeErrCodeUnsupportedCopy4Tag = 3
)
// Decode returns the decoded form of src. The returned slice may be a sub-
@@ -69,8 +67,6 @@ func Decode(dst, src []byte) ([]byte, error) {
return dst, nil
case decodeErrCodeUnsupportedLiteralLength:
return nil, errUnsupportedLiteralLength
- case decodeErrCodeUnsupportedCopy4Tag:
- return nil, errUnsupportedCopy4Tag
}
return nil, ErrCorrupt
}
@@ -108,9 +104,9 @@ func (r *Reader) Reset(reader io.Reader) {
r.readHeader = false
}
-func (r *Reader) readFull(p []byte) (ok bool) {
+func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
if _, r.err = io.ReadFull(r.r, p); r.err != nil {
- if r.err == io.ErrUnexpectedEOF {
+ if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
r.err = ErrCorrupt
}
return false
@@ -129,7 +125,7 @@ func (r *Reader) Read(p []byte) (int, error) {
r.i += n
return n, nil
}
- if !r.readFull(r.buf[:4]) {
+ if !r.readFull(r.buf[:4], true) {
return 0, r.err
}
chunkType := r.buf[0]
@@ -156,7 +152,7 @@ func (r *Reader) Read(p []byte) (int, error) {
return 0, r.err
}
buf := r.buf[:chunkLen]
- if !r.readFull(buf) {
+ if !r.readFull(buf, false) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
@@ -189,13 +185,17 @@ func (r *Reader) Read(p []byte) (int, error) {
return 0, r.err
}
buf := r.buf[:checksumSize]
- if !r.readFull(buf) {
+ if !r.readFull(buf, false) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
// Read directly into r.decoded instead of via r.buf.
n := chunkLen - checksumSize
- if !r.readFull(r.decoded[:n]) {
+ if n > len(r.decoded) {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ if !r.readFull(r.decoded[:n], false) {
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
@@ -211,7 +211,7 @@ func (r *Reader) Read(p []byte) (int, error) {
r.err = ErrCorrupt
return 0, r.err
}
- if !r.readFull(r.buf[:len(magicBody)]) {
+ if !r.readFull(r.buf[:len(magicBody)], false) {
return 0, r.err
}
for i := 0; i < len(magicBody); i++ {
@@ -230,7 +230,7 @@ func (r *Reader) Read(p []byte) (int, error) {
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
- if !r.readFull(r.buf[:chunkLen]) {
+ if !r.readFull(r.buf[:chunkLen], false) {
return 0, r.err
}
}
diff --git a/vendor/github.com/golang/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_amd64.go
index 32bce47093..fcd192b849 100644
--- a/vendor/github.com/golang/snappy/decode_amd64.go
+++ b/vendor/github.com/golang/snappy/decode_amd64.go
@@ -2,6 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// +build !appengine
+// +build gc
+// +build !noasm
+
package snappy
// decode has the same semantics as in decode_other.go.
diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s
index c33f5bf97b..e6179f65e3 100644
--- a/vendor/github.com/golang/snappy/decode_amd64.s
+++ b/vendor/github.com/golang/snappy/decode_amd64.s
@@ -2,12 +2,16 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// +build !appengine
+// +build gc
+// +build !noasm
+
#include "textflag.h"
-// func decode(dst, src []byte) int
-//
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
+
+// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
@@ -222,6 +226,25 @@ tagLit63:
// ----------------------------------------
// The code below handles copy tags.
+tagCopy4:
+ // case tagCopy4:
+ // s += 5
+ ADDQ $5, SI
+
+ // if uint(s) > uint(len(src)) { etc }
+ MOVQ SI, BX
+ SUBQ R11, BX
+ CMPQ BX, R12
+ JA errCorrupt
+
+ // length = 1 + int(src[s-5])>>2
+ SHRQ $2, CX
+ INCQ CX
+
+ // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+ MOVLQZX -4(SI), DX
+ JMP doCopy
+
tagCopy2:
// case tagCopy2:
// s += 3
@@ -237,7 +260,7 @@ tagCopy2:
SHRQ $2, CX
INCQ CX
- // offset = int(src[s-2]) | int(src[s-1])<<8
+ // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVWQZX -2(SI), DX
JMP doCopy
@@ -247,7 +270,7 @@ tagCopy:
// - CX == src[s]
CMPQ BX, $2
JEQ tagCopy2
- JA errUC4T
+ JA tagCopy4
// case tagCopy1:
// s += 2
@@ -259,7 +282,7 @@ tagCopy:
CMPQ BX, R12
JA errCorrupt
- // offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
+ // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
MOVQ CX, DX
ANDQ $0xe0, DX
SHLQ $3, DX
@@ -465,8 +488,3 @@ errCorrupt:
// return decodeErrCodeCorrupt
MOVQ $1, ret+48(FP)
RET
-
-errUC4T:
- // return decodeErrCodeUnsupportedCopy4Tag
- MOVQ $3, ret+48(FP)
- RET
diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go
index 1a8114ab1f..8c9f2049bc 100644
--- a/vendor/github.com/golang/snappy/decode_other.go
+++ b/vendor/github.com/golang/snappy/decode_other.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !amd64
+// +build !amd64 appengine !gc noasm
package snappy
@@ -63,7 +63,7 @@ func decode(dst, src []byte) int {
return decodeErrCodeCorrupt
}
length = 4 + int(src[s-2])>>2&0x7
- offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
+ offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
case tagCopy2:
s += 3
@@ -71,10 +71,15 @@ func decode(dst, src []byte) int {
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
- offset = int(src[s-2]) | int(src[s-1])<<8
+ offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
- return decodeErrCodeUnsupportedCopy4Tag
+ s += 5
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ return decodeErrCodeCorrupt
+ }
+ length = 1 + int(src[s-5])>>2
+ offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || d < offset || length > len(dst)-d {
diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go
index 38ebe952e0..8d393e904b 100644
--- a/vendor/github.com/golang/snappy/encode.go
+++ b/vendor/github.com/golang/snappy/encode.go
@@ -10,78 +10,11 @@ import (
"io"
)
-// maxOffset limits how far copy back-references can go, the same as the C++
-// code.
-const maxOffset = 1 << 15
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-func emitLiteral(dst, lit []byte) int {
- i, n := 0, uint(len(lit)-1)
- switch {
- case n < 60:
- dst[0] = uint8(n)<<2 | tagLiteral
- i = 1
- case n < 1<<8:
- dst[0] = 60<<2 | tagLiteral
- dst[1] = uint8(n)
- i = 2
- case n < 1<<16:
- dst[0] = 61<<2 | tagLiteral
- dst[1] = uint8(n)
- dst[2] = uint8(n >> 8)
- i = 3
- case n < 1<<24:
- dst[0] = 62<<2 | tagLiteral
- dst[1] = uint8(n)
- dst[2] = uint8(n >> 8)
- dst[3] = uint8(n >> 16)
- i = 4
- case int64(n) < 1<<32:
- dst[0] = 63<<2 | tagLiteral
- dst[1] = uint8(n)
- dst[2] = uint8(n >> 8)
- dst[3] = uint8(n >> 16)
- dst[4] = uint8(n >> 24)
- i = 5
- default:
- panic("snappy: source buffer is too long")
- }
- if copy(dst[i:], lit) != len(lit) {
- panic("snappy: destination buffer is too short")
- }
- return i + len(lit)
-}
-
-// emitCopy writes a copy chunk and returns the number of bytes written.
-func emitCopy(dst []byte, offset, length int32) int {
- i := 0
- for length > 0 {
- x := length - 4
- if 0 <= x && x < 1<<3 && offset < 1<<11 {
- dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
- dst[i+1] = uint8(offset)
- i += 2
- break
- }
-
- x = length
- if x > 1<<6 {
- x = 1 << 6
- }
- dst[i+0] = uint8(x-1)<<2 | tagCopy2
- dst[i+1] = uint8(offset)
- dst[i+2] = uint8(offset >> 8)
- i += 3
- length -= x
- }
- return i
-}
-
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
-// It is valid to pass a nil dst.
+// The dst and src must not overlap. It is valid to pass a nil dst.
func Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
@@ -98,94 +31,43 @@ func Encode(dst, src []byte) []byte {
if len(p) > maxBlockSize {
p, src = p[:maxBlockSize], p[maxBlockSize:]
}
- d += encodeBlock(dst[d:], p)
+ if len(p) < minNonLiteralBlockSize {
+ d += emitLiteral(dst[d:], p)
+ } else {
+ d += encodeBlock(dst[d:], p)
+ }
}
return dst[:d]
}
-// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
+// inputMargin is the minimum number of extra input bytes to keep, inside
+// encodeBlock's inner loop. On some architectures, this margin lets us
+// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
+// literals can be implemented as a single load to and store from a 16-byte
+// register. That literal's actual length can be as short as 1 byte, so this
+// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
+// the encoding loop will fix up the copy overrun, and this inputMargin ensures
+// that we don't overrun the dst and src buffers.
+const inputMargin = 16 - 1
+
+// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
+// could be encoded with a copy tag. This is the minimum with respect to the
+// algorithm used by encodeBlock, not a minimum enforced by the file format.
//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// 0 < len(src) && len(src) <= maxBlockSize
-func encodeBlock(dst, src []byte) (d int) {
- // Return early if src is short.
- if len(src) <= 4 {
- return emitLiteral(dst, src)
- }
-
- // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
- const maxTableSize = 1 << 14
- shift, tableSize := uint(32-8), 1<<8
- for tableSize < maxTableSize && tableSize < len(src) {
- shift--
- tableSize *= 2
- }
- var table [maxTableSize]int32
-
- // Iterate over the source bytes.
- var (
- s int32 // The iterator position.
- t int32 // The last position with the same hash as s.
- lit int32 // The start position of any pending literal bytes.
-
- // Copied from the C++ snappy implementation:
- //
- // Heuristic match skipping: If 32 bytes are scanned with no matches
- // found, start looking only at every other byte. If 32 more bytes are
- // scanned, look at every third byte, etc.. When a match is found,
- // immediately go back to looking at every byte. This is a small loss
- // (~5% performance, ~0.1% density) for compressible data due to more
- // bookkeeping, but for non-compressible data (such as JPEG) it's a
- // huge win since the compressor quickly "realizes" the data is
- // incompressible and doesn't bother looking for matches everywhere.
- //
- // The "skip" variable keeps track of how many bytes there are since
- // the last match; dividing it by 32 (ie. right-shifting by five) gives
- // the number of bytes to move ahead for each iteration.
- skip uint32 = 32
- )
- for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3.
- // Update the hash table.
- b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3]
- h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24
- p := &table[(h*0x1e35a7bd)>>shift]
- // We need to to store values in [-1, inf) in table. To save
- // some initialization time, (re)use the table's zero value
- // and shift the values against this zero: add 1 on writes,
- // subtract 1 on reads.
- t, *p = *p-1, s+1
- // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
- if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
- s += int32(skip >> 5)
- skip++
- continue
- }
- skip = 32
- // Otherwise, we have a match. First, emit any pending literal bytes.
- if lit != s {
- d += emitLiteral(dst[d:], src[lit:s])
- }
- // Extend the match to be as long as possible.
- s0 := s
- s, t = s+4, t+4
- for int(s) < len(src) && src[s] == src[t] {
- s++
- t++
- }
- // Emit the copied bytes.
- d += emitCopy(dst[d:], s-t, s-s0)
- lit = s
- }
-
- // Emit any final pending literal bytes and return.
- if int(lit) != len(src) {
- d += emitLiteral(dst[d:], src[lit:])
- }
- return d
-}
+// The encoded output must start with at least a 1 byte literal, as there are
+// no previous bytes to copy. A minimal (1 byte) copy after that, generated
+// from an emitCopy call in encodeBlock's main loop, would require at least
+// another inputMargin bytes, for the reason above: we want any emitLiteral
+// calls inside encodeBlock's main loop to use the fast path if possible, which
+// requires being able to overrun by inputMargin bytes. Thus,
+// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
+//
+// The C++ code doesn't use this exact threshold, but it could, as discussed at
+// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
+// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
+// optimization. It should not affect the encoded form. This is tested by
+// TestSameEncodingAsCppShortCopies.
+const minNonLiteralBlockSize = 1 + 1 + inputMargin
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
@@ -256,7 +138,7 @@ func NewBufferedWriter(w io.Writer) *Writer {
}
}
-// Writer is an io.Writer than can write Snappy-compressed bytes.
+// Writer is an io.Writer that can write Snappy-compressed bytes.
type Writer struct {
w io.Writer
err error
diff --git a/vendor/github.com/golang/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_amd64.go
new file mode 100644
index 0000000000..150d91bc8b
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_amd64.go
@@ -0,0 +1,29 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package snappy
+
+// emitLiteral has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitLiteral(dst, lit []byte) int
+
+// emitCopy has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitCopy(dst []byte, offset, length int) int
+
+// extendMatch has the same semantics as in encode_other.go.
+//
+//go:noescape
+func extendMatch(src []byte, i, j int) int
+
+// encodeBlock has the same semantics as in encode_other.go.
+//
+//go:noescape
+func encodeBlock(dst, src []byte) (d int)
diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s
new file mode 100644
index 0000000000..adfd979fe2
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_amd64.s
@@ -0,0 +1,730 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
+// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
+// https://github.com/golang/snappy/issues/29
+//
+// As a workaround, the package was built with a known good assembler, and
+// those instructions were disassembled by "objdump -d" to yield the
+// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
+// style comments, in AT&T asm syntax. Note that rsp here is a physical
+// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
+// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
+// fine on Go 1.6.
+
+// The asm code generally follows the pure Go code in encode_other.go, except
+// where marked with a "!!!".
+
+// ----------------------------------------------------------------------------
+
+// func emitLiteral(dst, lit []byte) int
+//
+// All local variables fit into registers. The register allocation:
+// - AX len(lit)
+// - BX n
+// - DX return value
+// - DI &dst[i]
+// - R10 &lit[0]
+//
+// The 24 bytes of stack space is to call runtime·memmove.
+//
+// The unusual register allocation of local variables, such as R10 for the
+// source pointer, matches the allocation used at the call site in encodeBlock,
+// which makes it easier to manually inline this function.
+TEXT ·emitLiteral(SB), NOSPLIT, $24-56
+ MOVQ dst_base+0(FP), DI
+ MOVQ lit_base+24(FP), R10
+ MOVQ lit_len+32(FP), AX
+ MOVQ AX, DX
+ MOVL AX, BX
+ SUBL $1, BX
+
+ CMPL BX, $60
+ JLT oneByte
+ CMPL BX, $256
+ JLT twoBytes
+
+threeBytes:
+ MOVB $0xf4, 0(DI)
+ MOVW BX, 1(DI)
+ ADDQ $3, DI
+ ADDQ $3, DX
+ JMP memmove
+
+twoBytes:
+ MOVB $0xf0, 0(DI)
+ MOVB BX, 1(DI)
+ ADDQ $2, DI
+ ADDQ $2, DX
+ JMP memmove
+
+oneByte:
+ SHLB $2, BX
+ MOVB BX, 0(DI)
+ ADDQ $1, DI
+ ADDQ $1, DX
+
+memmove:
+ MOVQ DX, ret+48(FP)
+
+ // copy(dst[i:], lit)
+ //
+ // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+ // DI, R10 and AX as arguments.
+ MOVQ DI, 0(SP)
+ MOVQ R10, 8(SP)
+ MOVQ AX, 16(SP)
+ CALL runtime·memmove(SB)
+ RET
+
+// ----------------------------------------------------------------------------
+
+// func emitCopy(dst []byte, offset, length int) int
+//
+// All local variables fit into registers. The register allocation:
+// - AX length
+// - SI &dst[0]
+// - DI &dst[i]
+// - R11 offset
+//
+// The unusual register allocation of local variables, such as R11 for the
+// offset, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·emitCopy(SB), NOSPLIT, $0-48
+ MOVQ dst_base+0(FP), DI
+ MOVQ DI, SI
+ MOVQ offset+24(FP), R11
+ MOVQ length+32(FP), AX
+
+loop0:
+ // for length >= 68 { etc }
+ CMPL AX, $68
+ JLT step1
+
+ // Emit a length 64 copy, encoded as 3 bytes.
+ MOVB $0xfe, 0(DI)
+ MOVW R11, 1(DI)
+ ADDQ $3, DI
+ SUBL $64, AX
+ JMP loop0
+
+step1:
+ // if length > 64 { etc }
+ CMPL AX, $64
+ JLE step2
+
+ // Emit a length 60 copy, encoded as 3 bytes.
+ MOVB $0xee, 0(DI)
+ MOVW R11, 1(DI)
+ ADDQ $3, DI
+ SUBL $60, AX
+
+step2:
+ // if length >= 12 || offset >= 2048 { goto step3 }
+ CMPL AX, $12
+ JGE step3
+ CMPL R11, $2048
+ JGE step3
+
+ // Emit the remaining copy, encoded as 2 bytes.
+ MOVB R11, 1(DI)
+ SHRL $8, R11
+ SHLB $5, R11
+ SUBB $4, AX
+ SHLB $2, AX
+ ORB AX, R11
+ ORB $1, R11
+ MOVB R11, 0(DI)
+ ADDQ $2, DI
+
+ // Return the number of bytes written.
+ SUBQ SI, DI
+ MOVQ DI, ret+40(FP)
+ RET
+
+step3:
+ // Emit the remaining copy, encoded as 3 bytes.
+ SUBL $1, AX
+ SHLB $2, AX
+ ORB $2, AX
+ MOVB AX, 0(DI)
+ MOVW R11, 1(DI)
+ ADDQ $3, DI
+
+ // Return the number of bytes written.
+ SUBQ SI, DI
+ MOVQ DI, ret+40(FP)
+ RET
+
+// ----------------------------------------------------------------------------
+
+// func extendMatch(src []byte, i, j int) int
+//
+// All local variables fit into registers. The register allocation:
+// - DX &src[0]
+// - SI &src[j]
+// - R13 &src[len(src) - 8]
+// - R14 &src[len(src)]
+// - R15 &src[i]
+//
+// The unusual register allocation of local variables, such as R15 for a source
+// pointer, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·extendMatch(SB), NOSPLIT, $0-48
+ MOVQ src_base+0(FP), DX
+ MOVQ src_len+8(FP), R14
+ MOVQ i+24(FP), R15
+ MOVQ j+32(FP), SI
+ ADDQ DX, R14
+ ADDQ DX, R15
+ ADDQ DX, SI
+ MOVQ R14, R13
+ SUBQ $8, R13
+
+cmp8:
+ // As long as we are 8 or more bytes before the end of src, we can load and
+ // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+ CMPQ SI, R13
+ JA cmp1
+ MOVQ (R15), AX
+ MOVQ (SI), BX
+ CMPQ AX, BX
+ JNE bsf
+ ADDQ $8, R15
+ ADDQ $8, SI
+ JMP cmp8
+
+bsf:
+ // If those 8 bytes were not equal, XOR the two 8 byte values, and return
+ // the index of the first byte that differs. The BSF instruction finds the
+ // least significant 1 bit, the amd64 architecture is little-endian, and
+ // the shift by 3 converts a bit index to a byte index.
+ XORQ AX, BX
+ BSFQ BX, BX
+ SHRQ $3, BX
+ ADDQ BX, SI
+
+ // Convert from &src[ret] to ret.
+ SUBQ DX, SI
+ MOVQ SI, ret+40(FP)
+ RET
+
+cmp1:
+ // In src's tail, compare 1 byte at a time.
+ CMPQ SI, R14
+ JAE extendMatchEnd
+ MOVB (R15), AX
+ MOVB (SI), BX
+ CMPB AX, BX
+ JNE extendMatchEnd
+ ADDQ $1, R15
+ ADDQ $1, SI
+ JMP cmp1
+
+extendMatchEnd:
+ // Convert from &src[ret] to ret.
+ SUBQ DX, SI
+ MOVQ SI, ret+40(FP)
+ RET
+
+// ----------------------------------------------------------------------------
+
+// func encodeBlock(dst, src []byte) (d int)
+//
+// All local variables fit into registers, other than "var table". The register
+// allocation:
+// - AX . .
+// - BX . .
+// - CX 56 shift (note that amd64 shifts by non-immediates must use CX).
+// - DX 64 &src[0], tableSize
+// - SI 72 &src[s]
+// - DI 80 &dst[d]
+// - R9 88 sLimit
+// - R10 . &src[nextEmit]
+// - R11 96 prevHash, currHash, nextHash, offset
+// - R12 104 &src[base], skip
+// - R13 . &src[nextS], &src[len(src) - 8]
+// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
+// - R15 112 candidate
+//
+// The second column (56, 64, etc) is the stack offset to spill the registers
+// when calling other functions. We could pack this slightly tighter, but it's
+// simpler to have a dedicated spill map independent of the function called.
+//
+// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
+// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
+// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
+TEXT ·encodeBlock(SB), 0, $32888-56
+ MOVQ dst_base+0(FP), DI
+ MOVQ src_base+24(FP), SI
+ MOVQ src_len+32(FP), R14
+
+ // shift, tableSize := uint32(32-8), 1<<8
+ MOVQ $24, CX
+ MOVQ $256, DX
+
+calcShift:
+ // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+ // shift--
+ // }
+ CMPQ DX, $16384
+ JGE varTable
+ CMPQ DX, R14
+ JGE varTable
+ SUBQ $1, CX
+ SHLQ $1, DX
+ JMP calcShift
+
+varTable:
+ // var table [maxTableSize]uint16
+ //
+ // In the asm code, unlike the Go code, we can zero-initialize only the
+ // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
+ // writes 16 bytes, so we can do only tableSize/8 writes instead of the
+ // 2048 writes that would zero-initialize all of table's 32768 bytes.
+ SHRQ $3, DX
+ LEAQ table-32768(SP), BX
+ PXOR X0, X0
+
+memclr:
+ MOVOU X0, 0(BX)
+ ADDQ $16, BX
+ SUBQ $1, DX
+ JNZ memclr
+
+ // !!! DX = &src[0]
+ MOVQ SI, DX
+
+ // sLimit := len(src) - inputMargin
+ MOVQ R14, R9
+ SUBQ $15, R9
+
+ // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
+ // change for the rest of the function.
+ MOVQ CX, 56(SP)
+ MOVQ DX, 64(SP)
+ MOVQ R9, 88(SP)
+
+ // nextEmit := 0
+ MOVQ DX, R10
+
+ // s := 1
+ ADDQ $1, SI
+
+ // nextHash := hash(load32(src, s), shift)
+ MOVL 0(SI), R11
+ IMULL $0x1e35a7bd, R11
+ SHRL CX, R11
+
+outer:
+ // for { etc }
+
+ // skip := 32
+ MOVQ $32, R12
+
+ // nextS := s
+ MOVQ SI, R13
+
+ // candidate := 0
+ MOVQ $0, R15
+
+inner0:
+ // for { etc }
+
+ // s := nextS
+ MOVQ R13, SI
+
+ // bytesBetweenHashLookups := skip >> 5
+ MOVQ R12, R14
+ SHRQ $5, R14
+
+ // nextS = s + bytesBetweenHashLookups
+ ADDQ R14, R13
+
+ // skip += bytesBetweenHashLookups
+ ADDQ R14, R12
+
+ // if nextS > sLimit { goto emitRemainder }
+ MOVQ R13, AX
+ SUBQ DX, AX
+ CMPQ AX, R9
+ JA emitRemainder
+
+ // candidate = int(table[nextHash])
+ // XXX: MOVWQZX table-32768(SP)(R11*2), R15
+ // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
+ BYTE $0x4e
+ BYTE $0x0f
+ BYTE $0xb7
+ BYTE $0x7c
+ BYTE $0x5c
+ BYTE $0x78
+
+ // table[nextHash] = uint16(s)
+ MOVQ SI, AX
+ SUBQ DX, AX
+
+ // XXX: MOVW AX, table-32768(SP)(R11*2)
+ // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
+ BYTE $0x66
+ BYTE $0x42
+ BYTE $0x89
+ BYTE $0x44
+ BYTE $0x5c
+ BYTE $0x78
+
+ // nextHash = hash(load32(src, nextS), shift)
+ MOVL 0(R13), R11
+ IMULL $0x1e35a7bd, R11
+ SHRL CX, R11
+
+ // if load32(src, s) != load32(src, candidate) { continue } break
+ MOVL 0(SI), AX
+ MOVL (DX)(R15*1), BX
+ CMPL AX, BX
+ JNE inner0
+
+fourByteMatch:
+ // As per the encode_other.go code:
+ //
+ // A 4-byte match has been found. We'll later see etc.
+
+ // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
+ // on inputMargin in encode.go.
+ MOVQ SI, AX
+ SUBQ R10, AX
+ CMPQ AX, $16
+ JLE emitLiteralFastPath
+
+ // ----------------------------------------
+ // Begin inline of the emitLiteral call.
+ //
+ // d += emitLiteral(dst[d:], src[nextEmit:s])
+
+ MOVL AX, BX
+ SUBL $1, BX
+
+ CMPL BX, $60
+ JLT inlineEmitLiteralOneByte
+ CMPL BX, $256
+ JLT inlineEmitLiteralTwoBytes
+
+inlineEmitLiteralThreeBytes:
+ MOVB $0xf4, 0(DI)
+ MOVW BX, 1(DI)
+ ADDQ $3, DI
+ JMP inlineEmitLiteralMemmove
+
+inlineEmitLiteralTwoBytes:
+ MOVB $0xf0, 0(DI)
+ MOVB BX, 1(DI)
+ ADDQ $2, DI
+ JMP inlineEmitLiteralMemmove
+
+inlineEmitLiteralOneByte:
+ SHLB $2, BX
+ MOVB BX, 0(DI)
+ ADDQ $1, DI
+
+inlineEmitLiteralMemmove:
+ // Spill local variables (registers) onto the stack; call; unspill.
+ //
+ // copy(dst[i:], lit)
+ //
+ // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+ // DI, R10 and AX as arguments.
+ MOVQ DI, 0(SP)
+ MOVQ R10, 8(SP)
+ MOVQ AX, 16(SP)
+ ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)".
+ MOVQ SI, 72(SP)
+ MOVQ DI, 80(SP)
+ MOVQ R15, 112(SP)
+ CALL runtime·memmove(SB)
+ MOVQ 56(SP), CX
+ MOVQ 64(SP), DX
+ MOVQ 72(SP), SI
+ MOVQ 80(SP), DI
+ MOVQ 88(SP), R9
+ MOVQ 112(SP), R15
+ JMP inner1
+
+inlineEmitLiteralEnd:
+ // End inline of the emitLiteral call.
+ // ----------------------------------------
+
+emitLiteralFastPath:
+ // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
+ MOVB AX, BX
+ SUBB $1, BX
+ SHLB $2, BX
+ MOVB BX, (DI)
+ ADDQ $1, DI
+
+ // !!! Implement the copy from lit to dst as a 16-byte load and store.
+ // (Encode's documentation says that dst and src must not overlap.)
+ //
+ // This always copies 16 bytes, instead of only len(lit) bytes, but that's
+ // OK. Subsequent iterations will fix up the overrun.
+ //
+ // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
+ // 16-byte loads and stores. This technique probably wouldn't be as
+ // effective on architectures that are fussier about alignment.
+ MOVOU 0(R10), X0
+ MOVOU X0, 0(DI)
+ ADDQ AX, DI
+
+inner1:
+ // for { etc }
+
+ // base := s
+ MOVQ SI, R12
+
+ // !!! offset := base - candidate
+ MOVQ R12, R11
+ SUBQ R15, R11
+ SUBQ DX, R11
+
+ // ----------------------------------------
+ // Begin inline of the extendMatch call.
+ //
+ // s = extendMatch(src, candidate+4, s+4)
+
+ // !!! R14 = &src[len(src)]
+ MOVQ src_len+32(FP), R14
+ ADDQ DX, R14
+
+ // !!! R13 = &src[len(src) - 8]
+ MOVQ R14, R13
+ SUBQ $8, R13
+
+ // !!! R15 = &src[candidate + 4]
+ ADDQ $4, R15
+ ADDQ DX, R15
+
+ // !!! s += 4
+ ADDQ $4, SI
+
+inlineExtendMatchCmp8:
+ // As long as we are 8 or more bytes before the end of src, we can load and
+ // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+ CMPQ SI, R13
+ JA inlineExtendMatchCmp1
+ MOVQ (R15), AX
+ MOVQ (SI), BX
+ CMPQ AX, BX
+ JNE inlineExtendMatchBSF
+ ADDQ $8, R15
+ ADDQ $8, SI
+ JMP inlineExtendMatchCmp8
+
+inlineExtendMatchBSF:
+ // If those 8 bytes were not equal, XOR the two 8 byte values, and return
+ // the index of the first byte that differs. The BSF instruction finds the
+ // least significant 1 bit, the amd64 architecture is little-endian, and
+ // the shift by 3 converts a bit index to a byte index.
+ XORQ AX, BX
+ BSFQ BX, BX
+ SHRQ $3, BX
+ ADDQ BX, SI
+ JMP inlineExtendMatchEnd
+
+inlineExtendMatchCmp1:
+ // In src's tail, compare 1 byte at a time.
+ CMPQ SI, R14
+ JAE inlineExtendMatchEnd
+ MOVB (R15), AX
+ MOVB (SI), BX
+ CMPB AX, BX
+ JNE inlineExtendMatchEnd
+ ADDQ $1, R15
+ ADDQ $1, SI
+ JMP inlineExtendMatchCmp1
+
+inlineExtendMatchEnd:
+ // End inline of the extendMatch call.
+ // ----------------------------------------
+
+ // ----------------------------------------
+ // Begin inline of the emitCopy call.
+ //
+ // d += emitCopy(dst[d:], base-candidate, s-base)
+
+ // !!! length := s - base
+ MOVQ SI, AX
+ SUBQ R12, AX
+
+inlineEmitCopyLoop0:
+ // for length >= 68 { etc }
+ CMPL AX, $68
+ JLT inlineEmitCopyStep1
+
+ // Emit a length 64 copy, encoded as 3 bytes.
+ MOVB $0xfe, 0(DI)
+ MOVW R11, 1(DI)
+ ADDQ $3, DI
+ SUBL $64, AX
+ JMP inlineEmitCopyLoop0
+
+inlineEmitCopyStep1:
+ // if length > 64 { etc }
+ CMPL AX, $64
+ JLE inlineEmitCopyStep2
+
+ // Emit a length 60 copy, encoded as 3 bytes.
+ MOVB $0xee, 0(DI)
+ MOVW R11, 1(DI)
+ ADDQ $3, DI
+ SUBL $60, AX
+
+inlineEmitCopyStep2:
+ // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
+ CMPL AX, $12
+ JGE inlineEmitCopyStep3
+ CMPL R11, $2048
+ JGE inlineEmitCopyStep3
+
+ // Emit the remaining copy, encoded as 2 bytes.
+ MOVB R11, 1(DI)
+ SHRL $8, R11
+ SHLB $5, R11
+ SUBB $4, AX
+ SHLB $2, AX
+ ORB AX, R11
+ ORB $1, R11
+ MOVB R11, 0(DI)
+ ADDQ $2, DI
+ JMP inlineEmitCopyEnd
+
+inlineEmitCopyStep3:
+ // Emit the remaining copy, encoded as 3 bytes.
+ SUBL $1, AX
+ SHLB $2, AX
+ ORB $2, AX
+ MOVB AX, 0(DI)
+ MOVW R11, 1(DI)
+ ADDQ $3, DI
+
+inlineEmitCopyEnd:
+ // End inline of the emitCopy call.
+ // ----------------------------------------
+
+ // nextEmit = s
+ MOVQ SI, R10
+
+ // if s >= sLimit { goto emitRemainder }
+ MOVQ SI, AX
+ SUBQ DX, AX
+ CMPQ AX, R9
+ JAE emitRemainder
+
+ // As per the encode_other.go code:
+ //
+ // We could immediately etc.
+
+ // x := load64(src, s-1)
+ MOVQ -1(SI), R14
+
+ // prevHash := hash(uint32(x>>0), shift)
+ MOVL R14, R11
+ IMULL $0x1e35a7bd, R11
+ SHRL CX, R11
+
+ // table[prevHash] = uint16(s-1)
+ MOVQ SI, AX
+ SUBQ DX, AX
+ SUBQ $1, AX
+
+ // XXX: MOVW AX, table-32768(SP)(R11*2)
+ // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
+ BYTE $0x66
+ BYTE $0x42
+ BYTE $0x89
+ BYTE $0x44
+ BYTE $0x5c
+ BYTE $0x78
+
+ // currHash := hash(uint32(x>>8), shift)
+ SHRQ $8, R14
+ MOVL R14, R11
+ IMULL $0x1e35a7bd, R11
+ SHRL CX, R11
+
+ // candidate = int(table[currHash])
+ // XXX: MOVWQZX table-32768(SP)(R11*2), R15
+ // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
+ BYTE $0x4e
+ BYTE $0x0f
+ BYTE $0xb7
+ BYTE $0x7c
+ BYTE $0x5c
+ BYTE $0x78
+
+ // table[currHash] = uint16(s)
+ ADDQ $1, AX
+
+ // XXX: MOVW AX, table-32768(SP)(R11*2)
+ // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
+ BYTE $0x66
+ BYTE $0x42
+ BYTE $0x89
+ BYTE $0x44
+ BYTE $0x5c
+ BYTE $0x78
+
+ // if uint32(x>>8) == load32(src, candidate) { continue }
+ MOVL (DX)(R15*1), BX
+ CMPL R14, BX
+ JEQ inner1
+
+ // nextHash = hash(uint32(x>>16), shift)
+ SHRQ $8, R14
+ MOVL R14, R11
+ IMULL $0x1e35a7bd, R11
+ SHRL CX, R11
+
+ // s++
+ ADDQ $1, SI
+
+ // break out of the inner1 for loop, i.e. continue the outer loop.
+ JMP outer
+
+emitRemainder:
+ // if nextEmit < len(src) { etc }
+ MOVQ src_len+32(FP), AX
+ ADDQ DX, AX
+ CMPQ R10, AX
+ JEQ encodeBlockEnd
+
+ // d += emitLiteral(dst[d:], src[nextEmit:])
+ //
+ // Push args.
+ MOVQ DI, 0(SP)
+ MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative.
+ MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative.
+ MOVQ R10, 24(SP)
+ SUBQ R10, AX
+ MOVQ AX, 32(SP)
+ MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative.
+
+ // Spill local variables (registers) onto the stack; call; unspill.
+ MOVQ DI, 80(SP)
+ CALL ·emitLiteral(SB)
+ MOVQ 80(SP), DI
+
+ // Finish the "d +=" part of "d += emitLiteral(etc)".
+ ADDQ 48(SP), DI
+
+encodeBlockEnd:
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, DI
+ MOVQ DI, d+48(FP)
+ RET
diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go
new file mode 100644
index 0000000000..dbcae905e6
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_other.go
@@ -0,0 +1,238 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 appengine !gc noasm
+
+package snappy
+
+func load32(b []byte, i int) uint32 {
+ b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+ return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load64(b []byte, i int) uint64 {
+ b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+ return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+ uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+//
+// It assumes that:
+// dst is long enough to hold the encoded bytes
+// 1 <= len(lit) && len(lit) <= 65536
+func emitLiteral(dst, lit []byte) int {
+ i, n := 0, uint(len(lit)-1)
+ switch {
+ case n < 60:
+ dst[0] = uint8(n)<<2 | tagLiteral
+ i = 1
+ case n < 1<<8:
+ dst[0] = 60<<2 | tagLiteral
+ dst[1] = uint8(n)
+ i = 2
+ default:
+ dst[0] = 61<<2 | tagLiteral
+ dst[1] = uint8(n)
+ dst[2] = uint8(n >> 8)
+ i = 3
+ }
+ return i + copy(dst[i:], lit)
+}
+
+// emitCopy writes a copy chunk and returns the number of bytes written.
+//
+// It assumes that:
+// dst is long enough to hold the encoded bytes
+// 1 <= offset && offset <= 65535
+// 4 <= length && length <= 65535
+func emitCopy(dst []byte, offset, length int) int {
+ i := 0
+ // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
+ // threshold for this loop is a little higher (at 68 = 64 + 4), and the
+ // length emitted down below is is a little lower (at 60 = 64 - 4), because
+ // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
+ // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
+ // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
+ // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
+ // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
+ // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
+ for length >= 68 {
+ // Emit a length 64 copy, encoded as 3 bytes.
+ dst[i+0] = 63<<2 | tagCopy2
+ dst[i+1] = uint8(offset)
+ dst[i+2] = uint8(offset >> 8)
+ i += 3
+ length -= 64
+ }
+ if length > 64 {
+ // Emit a length 60 copy, encoded as 3 bytes.
+ dst[i+0] = 59<<2 | tagCopy2
+ dst[i+1] = uint8(offset)
+ dst[i+2] = uint8(offset >> 8)
+ i += 3
+ length -= 60
+ }
+ if length >= 12 || offset >= 2048 {
+ // Emit the remaining copy, encoded as 3 bytes.
+ dst[i+0] = uint8(length-1)<<2 | tagCopy2
+ dst[i+1] = uint8(offset)
+ dst[i+2] = uint8(offset >> 8)
+ return i + 3
+ }
+ // Emit the remaining copy, encoded as 2 bytes.
+ dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+ dst[i+1] = uint8(offset)
+ return i + 2
+}
+
+// extendMatch returns the largest k such that k <= len(src) and that
+// src[i:i+k-j] and src[j:k] have the same contents.
+//
+// It assumes that:
+// 0 <= i && i < j && j <= len(src)
+func extendMatch(src []byte, i, j int) int {
+ for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
+ }
+ return j
+}
+
+func hash(u, shift uint32) uint32 {
+ return (u * 0x1e35a7bd) >> shift
+}
+
+// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
+// assumes that the varint-encoded length of the decompressed bytes has already
+// been written.
+//
+// It also assumes that:
+// len(dst) >= MaxEncodedLen(len(src)) &&
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+func encodeBlock(dst, src []byte) (d int) {
+ // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
+ // The table element type is uint16, as s < sLimit and sLimit < len(src)
+ // and len(src) <= maxBlockSize and maxBlockSize == 65536.
+ const (
+ maxTableSize = 1 << 14
+ // tableMask is redundant, but helps the compiler eliminate bounds
+ // checks.
+ tableMask = maxTableSize - 1
+ )
+ shift := uint32(32 - 8)
+ for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+ shift--
+ }
+ // In Go, all array elements are zero-initialized, so there is no advantage
+ // to a smaller tableSize per se. However, it matches the C++ algorithm,
+ // and in the asm versions of this code, we can get away with zeroing only
+ // the first tableSize elements.
+ var table [maxTableSize]uint16
+
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := len(src) - inputMargin
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := 0
+
+ // The encoded form must start with a literal, as there are no previous
+ // bytes to copy, so we start looking for hash matches at s == 1.
+ s := 1
+ nextHash := hash(load32(src, s), shift)
+
+ for {
+ // Copied from the C++ snappy implementation:
+ //
+ // Heuristic match skipping: If 32 bytes are scanned with no matches
+ // found, start looking only at every other byte. If 32 more bytes are
+ // scanned (or skipped), look at every third byte, etc.. When a match
+ // is found, immediately go back to looking at every byte. This is a
+ // small loss (~5% performance, ~0.1% density) for compressible data
+ // due to more bookkeeping, but for non-compressible data (such as
+ // JPEG) it's a huge win since the compressor quickly "realizes" the
+ // data is incompressible and doesn't bother looking for matches
+ // everywhere.
+ //
+ // The "skip" variable keeps track of how many bytes there are since
+ // the last match; dividing it by 32 (ie. right-shifting by five) gives
+ // the number of bytes to move ahead for each iteration.
+ skip := 32
+
+ nextS := s
+ candidate := 0
+ for {
+ s = nextS
+ bytesBetweenHashLookups := skip >> 5
+ nextS = s + bytesBetweenHashLookups
+ skip += bytesBetweenHashLookups
+ if nextS > sLimit {
+ goto emitRemainder
+ }
+ candidate = int(table[nextHash&tableMask])
+ table[nextHash&tableMask] = uint16(s)
+ nextHash = hash(load32(src, nextS), shift)
+ if load32(src, s) == load32(src, candidate) {
+ break
+ }
+ }
+
+ // A 4-byte match has been found. We'll later see if more than 4 bytes
+ // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+ // them as literal bytes.
+ d += emitLiteral(dst[d:], src[nextEmit:s])
+
+ // Call emitCopy, and then see if another emitCopy could be our next
+ // move. Repeat until we find no match for the input immediately after
+ // what was consumed by the last emitCopy call.
+ //
+ // If we exit this loop normally then we need to call emitLiteral next,
+ // though we don't yet know how big the literal will be. We handle that
+ // by proceeding to the next iteration of the main loop. We also can
+ // exit this loop via goto if we get close to exhausting the input.
+ for {
+ // Invariant: we have a 4-byte match at s, and no need to emit any
+ // literal bytes prior to s.
+ base := s
+
+ // Extend the 4-byte match as long as possible.
+ //
+ // This is an inlined version of:
+ // s = extendMatch(src, candidate+4, s+4)
+ s += 4
+ for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
+ }
+
+ d += emitCopy(dst[d:], base-candidate, s-base)
+ nextEmit = s
+ if s >= sLimit {
+ goto emitRemainder
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-1 and at s. If
+ // another emitCopy is not our next move, also calculate nextHash
+ // at s+1. At least on GOARCH=amd64, these three hash calculations
+ // are faster as one load64 call (with some shifts) instead of
+ // three load32 calls.
+ x := load64(src, s-1)
+ prevHash := hash(uint32(x>>0), shift)
+ table[prevHash&tableMask] = uint16(s - 1)
+ currHash := hash(uint32(x>>8), shift)
+ candidate = int(table[currHash&tableMask])
+ table[currHash&tableMask] = uint16(s)
+ if uint32(x>>8) != load32(src, candidate) {
+ nextHash = hash(uint32(x>>16), shift)
+ s++
+ break
+ }
+ }
+ }
+
+emitRemainder:
+ if nextEmit < len(src) {
+ d += emitLiteral(dst[d:], src[nextEmit:])
+ }
+ return d
+}
diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go
index 010254265f..ece692ea46 100644
--- a/vendor/github.com/golang/snappy/snappy.go
+++ b/vendor/github.com/golang/snappy/snappy.go
@@ -2,10 +2,21 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Package snappy implements the snappy block-based compression format.
-// It aims for very high speeds and reasonable compression.
+// Package snappy implements the Snappy compression format. It aims for very
+// high speeds and reasonable compression.
//
-// The C++ snappy implementation is at https://github.com/google/snappy
+// There are actually two Snappy formats: block and stream. They are related,
+// but different: trying to decompress block-compressed data as a Snappy stream
+// will fail, and vice versa. The block format is the Decode and Encode
+// functions and the stream format is the Reader and Writer types.
+//
+// The block format, the more common case, is used when the complete size (the
+// number of bytes) of the original data is known upfront, at the time
+// compression starts. The stream format, also known as the framing format, is
+// for when that isn't always true.
+//
+// The canonical, C++ implementation is at https://github.com/google/snappy and
+// it only implements the block format.
package snappy // import "github.com/golang/snappy"
import (
@@ -32,7 +43,10 @@ Lempel-Ziv compression algorithms. In particular:
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- - For l == 3, this tag is a legacy format that is no longer supported.
+ - For l == 3, this tag is a legacy format that is no longer issued by most
+ encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
+ [1, 65). The length is 1 + m. The offset is the little-endian unsigned
+ integer denoted by the next 4 bytes.
*/
const (
tagLiteral = 0x00