aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go
diff options
context:
space:
mode:
authorAntoine GIRARD <sapk@users.noreply.github.com>2018-05-19 14:49:46 +0200
committerLunny Xiao <xiaolunwen@gmail.com>2018-05-19 20:49:46 +0800
commit917b9641eca3fa1b1676ba1b4fd77a4e958ee153 (patch)
tree2caf049dfebccf5ccbc44316630a6c9220062d78 /vendor/github.com/glycerine/go-unsnap-stream/unsnap.go
parent1b7cd3d0b0d3652e0660489b9c4da72619400c98 (diff)
downloadgitea-917b9641eca3fa1b1676ba1b4fd77a4e958ee153.tar.gz
gitea-917b9641eca3fa1b1676ba1b4fd77a4e958ee153.zip
Update to last common bleve (#3986)
Diffstat (limited to 'vendor/github.com/glycerine/go-unsnap-stream/unsnap.go')
-rw-r--r--vendor/github.com/glycerine/go-unsnap-stream/unsnap.go513
1 files changed, 513 insertions, 0 deletions
diff --git a/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go b/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go
new file mode 100644
index 0000000000..8789445c9a
--- /dev/null
+++ b/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go
@@ -0,0 +1,513 @@
+package unsnap
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+
+ "hash/crc32"
+
+ snappy "github.com/golang/snappy"
+ // The C library can be used, but this makes the binary dependent
+ // lots of extraneous c-libraries; it is no longer stand-alone. Yuck.
+ //
+ // Therefore we comment out the "dgryski/go-csnappy" path and use the
+ // "github.com/golang/snappy/snappy" above instead. If you are
+ // performance limited and can deal with distributing more libraries,
+ // then this is easy to swap.
+ //
+ // If you swap, note that some of the tests won't pass
+ // because snappy-go produces slightly different (but still
+ // conformant) encodings on some data. Here are bindings
+ // to the C-snappy:
+ // snappy "github.com/dgryski/go-csnappy"
+)
+
+// SnappyFile: create a drop-in-replacement/wrapper for an *os.File that handles doing the unsnappification online as more is read from it
+
+type SnappyFile struct {
+ Fname string
+
+ Reader io.Reader
+ Writer io.Writer
+
+ // allow clients to substitute us for an os.File and just switch
+ // off compression if they don't want it.
+ SnappyEncodeDecodeOff bool // if true, we bypass straight to Filep
+
+ EncBuf FixedSizeRingBuf // holds any extra that isn't yet returned, encoded
+ DecBuf FixedSizeRingBuf // holds any extra that isn't yet returned, decoded
+
+ // for writing to stream-framed snappy
+ HeaderChunkWritten bool
+
+ // Sanity check: we can only read, or only write, to one SnappyFile.
+ // EncBuf and DecBuf are used differently in each mode. Verify
+ // that we are consistent with this flag.
+ Writing bool
+}
+
+var total int
+
+// for debugging, show state of buffers
+func (f *SnappyFile) Dump() {
+ fmt.Printf("EncBuf has length %d and contents:\n%s\n", len(f.EncBuf.Bytes()), string(f.EncBuf.Bytes()))
+ fmt.Printf("DecBuf has length %d and contents:\n%s\n", len(f.DecBuf.Bytes()), string(f.DecBuf.Bytes()))
+}
+
+func (f *SnappyFile) Read(p []byte) (n int, err error) {
+
+ if f.SnappyEncodeDecodeOff {
+ return f.Reader.Read(p)
+ }
+
+ if f.Writing {
+ panic("Reading on a write-only SnappyFile")
+ }
+
+ // before we unencrypt more, try to drain the DecBuf first
+ n, _ = f.DecBuf.Read(p)
+ if n > 0 {
+ total += n
+ return n, nil
+ }
+
+ //nEncRead, nDecAdded, err := UnsnapOneFrame(f.Filep, &f.EncBuf, &f.DecBuf, f.Fname)
+ _, _, err = UnsnapOneFrame(f.Reader, &f.EncBuf, &f.DecBuf, f.Fname)
+ if err != nil && err != io.EOF {
+ panic(err)
+ }
+
+ n, _ = f.DecBuf.Read(p)
+
+ if n > 0 {
+ total += n
+ return n, nil
+ }
+ if f.DecBuf.Readable == 0 {
+ if f.DecBuf.Readable == 0 && f.EncBuf.Readable == 0 {
+ // only now (when EncBuf is empty) can we give io.EOF.
+ // Any earlier, and we leave stuff un-decoded!
+ return 0, io.EOF
+ }
+ }
+ return 0, nil
+}
+
+func Open(name string) (file *SnappyFile, err error) {
+ fp, err := os.Open(name)
+ if err != nil {
+ return nil, err
+ }
+ // encoding in snappy can apparently go beyond the original size, so
+ // we make our buffers big enough, 2*max snappy chunk => 2 * CHUNK_MAX(65536)
+
+ snap := NewReader(fp)
+ snap.Fname = name
+ return snap, nil
+}
+
+func NewReader(r io.Reader) *SnappyFile {
+ return &SnappyFile{
+ Reader: r,
+ EncBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snappy encoded bytes
+ DecBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snapppy decoded bytes
+ Writing: false,
+ }
+}
+
+func NewWriter(w io.Writer) *SnappyFile {
+ return &SnappyFile{
+ Writer: w,
+ EncBuf: *NewFixedSizeRingBuf(65536), // on writing: temp for testing compression
+ DecBuf: *NewFixedSizeRingBuf(65536 * 2), // on writing: final buffer of snappy framed and encoded bytes
+ Writing: true,
+ }
+}
+
+func Create(name string) (file *SnappyFile, err error) {
+ fp, err := os.Create(name)
+ if err != nil {
+ return nil, err
+ }
+ snap := NewWriter(fp)
+ snap.Fname = name
+ return snap, nil
+}
+
+func (f *SnappyFile) Close() error {
+ if f.Writing {
+ wc, ok := f.Writer.(io.WriteCloser)
+ if ok {
+ return wc.Close()
+ }
+ return nil
+ }
+ rc, ok := f.Reader.(io.ReadCloser)
+ if ok {
+ return rc.Close()
+ }
+ return nil
+}
+
+func (f *SnappyFile) Sync() error {
+ file, ok := f.Writer.(*os.File)
+ if ok {
+ return file.Sync()
+ }
+ return nil
+}
+
+// for an increment of a frame at a time:
+// read from r into encBuf (encBuf is still encoded, thus the name), and write unsnappified frames into outDecodedBuf
+// the returned n: number of bytes read from the encrypted encBuf
+func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedSizeRingBuf, fname string) (nEnc int64, nDec int64, err error) {
+ // b, err := ioutil.ReadAll(r)
+ // if err != nil {
+ // panic(err)
+ // }
+
+ nEnc = 0
+ nDec = 0
+
+ // read up to 65536 bytes from r into encBuf, at least a snappy frame
+ nread, err := io.CopyN(encBuf, r, 65536) // returns nwrotebytes, err
+ nEnc += nread
+ if err != nil {
+ if err == io.EOF {
+ if nread == 0 {
+ if encBuf.Readable == 0 {
+ return nEnc, nDec, io.EOF
+ }
+ // else we have bytes in encBuf, so decode them!
+ err = nil
+ } else {
+ // continue below, processing the nread bytes
+ err = nil
+ }
+ } else {
+ panic(err)
+ }
+ }
+
+ // flag for printing chunk size alignment messages
+ verbose := false
+
+ const snappyStreamHeaderSz = 10
+ const headerSz = 4
+ const crc32Sz = 4
+ // the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum
+ // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
+
+ chunk := (*encBuf).Bytes()
+
+ // however we exit, advance as
+ // defer func() { (*encBuf).Next(N) }()
+
+ // 65536 is the max size of a snappy framed chunk. See
+ // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91
+ // buf := make([]byte, 65536)
+
+ // fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b)
+ // fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut)
+
+ //fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14])
+
+ fourbytes := make([]byte, 4)
+ chunkCount := 0
+
+ for nDec < 65536 {
+ if len(chunk) == 0 {
+ break
+ }
+ chunkCount++
+ fourbytes[3] = 0
+ copy(fourbytes, chunk[1:4])
+ chunksz := binary.LittleEndian.Uint32(fourbytes)
+ chunk_type := chunk[0]
+
+ switch true {
+ case chunk_type == 0xff:
+ { // stream identifier
+
+ streamHeader := chunk[:snappyStreamHeaderSz]
+ if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) {
+ panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.")
+ } else {
+ //fmt.Printf("got streaming snappy magic header just fine.\n")
+ }
+ chunk = chunk[snappyStreamHeaderSz:]
+ (*encBuf).Advance(snappyStreamHeaderSz)
+ nEnc += snappyStreamHeaderSz
+ continue
+ }
+ case chunk_type == 0x00:
+ { // compressed data
+ if verbose {
+ fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4)
+ }
+
+ crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
+ section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
+
+ dec, ok := snappy.Decode(nil, section)
+ if ok != nil {
+ // we've probably truncated a snappy frame at this point
+ // ok=snappy: corrupt input
+ // len(dec) == 0
+ //
+ panic(fmt.Sprintf("could not decode snappy stream: '%s' and len dec=%d and ok=%v\n", fname, len(dec), ok))
+
+ // get back to caller with what we've got so far
+ return nEnc, nDec, nil
+ }
+ // fmt.Printf("ok, b is %#v , %#v\n", ok, dec)
+
+ // spit out decoded text
+ // n, err := w.Write(dec)
+ //fmt.Printf("len(dec) = %d, outDecodedBuf.Readable=%d\n", len(dec), outDecodedBuf.Readable)
+ bnb := bytes.NewBuffer(dec)
+ n, err := io.Copy(outDecodedBuf, bnb)
+ if err != nil {
+ //fmt.Printf("got n=%d, err= %s ; when trying to io.Copy(outDecodedBuf: N=%d, Readable=%d)\n", n, err, outDecodedBuf.N, outDecodedBuf.Readable)
+ panic(err)
+ }
+ if n != int64(len(dec)) {
+ panic("could not write all bytes to outDecodedBuf")
+ }
+ nDec += n
+
+ // verify the crc32 rotated checksum
+ m32 := masked_crc32c(dec)
+ if m32 != crc {
+ panic(fmt.Sprintf("crc32 masked failiure. expected: %v but got: %v", crc, m32))
+ } else {
+ //fmt.Printf("\nchecksums match: %v == %v\n", crc, m32)
+ }
+
+ // move to next header
+ inc := (headerSz + int(chunksz))
+ chunk = chunk[inc:]
+ (*encBuf).Advance(inc)
+ nEnc += int64(inc)
+ continue
+ }
+ case chunk_type == 0x01:
+ { // uncompressed data
+
+ //n, err := w.Write(chunk[(headerSz+crc32Sz):(headerSz + int(chunksz))])
+ n, err := io.Copy(outDecodedBuf, bytes.NewBuffer(chunk[(headerSz+crc32Sz):(headerSz+int(chunksz))]))
+ if verbose {
+ //fmt.Printf("debug: n=%d err=%v chunksz=%d outDecodedBuf='%v'\n", n, err, chunksz, outDecodedBuf)
+ }
+ if err != nil {
+ panic(err)
+ }
+ if n != int64(chunksz-crc32Sz) {
+ panic("could not write all bytes to stdout")
+ }
+ nDec += n
+
+ inc := (headerSz + int(chunksz))
+ chunk = chunk[inc:]
+ (*encBuf).Advance(inc)
+ nEnc += int64(inc)
+ continue
+ }
+ case chunk_type == 0xfe:
+ fallthrough // padding, just skip it
+ case chunk_type >= 0x80 && chunk_type <= 0xfd:
+ { // Reserved skippable chunks
+ //fmt.Printf("\nin reserved skippable chunks, at nEnc=%v\n", nEnc)
+ inc := (headerSz + int(chunksz))
+ chunk = chunk[inc:]
+ nEnc += int64(inc)
+ (*encBuf).Advance(inc)
+ continue
+ }
+
+ default:
+ panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type))
+ }
+
+ } // end for{}
+
+ return nEnc, nDec, err
+ //return int64(N), nil
+}
+
+// for whole file at once:
+//
+// receive on stdin a stream of bytes in the snappy-streaming framed
+// format, defined here: http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
+// Grab each frame, run it through the snappy decoder, and spit out
+// each frame all joined back-to-back on stdout.
+//
+func Unsnappy(r io.Reader, w io.Writer) (err error) {
+ b, err := ioutil.ReadAll(r)
+ if err != nil {
+ panic(err)
+ }
+
+ // flag for printing chunk size alignment messages
+ verbose := false
+
+ const snappyStreamHeaderSz = 10
+ const headerSz = 4
+ const crc32Sz = 4
+ // the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum
+ // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
+
+ chunk := b[:]
+
+ // 65536 is the max size of a snappy framed chunk. See
+ // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91
+ //buf := make([]byte, 65536)
+
+ // fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b)
+ // fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut)
+
+ //fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14])
+
+ fourbytes := make([]byte, 4)
+ chunkCount := 0
+
+ for {
+ if len(chunk) == 0 {
+ break
+ }
+ chunkCount++
+ fourbytes[3] = 0
+ copy(fourbytes, chunk[1:4])
+ chunksz := binary.LittleEndian.Uint32(fourbytes)
+ chunk_type := chunk[0]
+
+ switch true {
+ case chunk_type == 0xff:
+ { // stream identifier
+
+ streamHeader := chunk[:snappyStreamHeaderSz]
+ if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) {
+ panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.")
+ } else {
+ //fmt.Printf("got streaming snappy magic header just fine.\n")
+ }
+ chunk = chunk[snappyStreamHeaderSz:]
+ continue
+ }
+ case chunk_type == 0x00:
+ { // compressed data
+ if verbose {
+ fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4)
+ }
+
+ //crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
+ section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
+
+ dec, ok := snappy.Decode(nil, section)
+ if ok != nil {
+ panic("could not decode snappy stream")
+ }
+ // fmt.Printf("ok, b is %#v , %#v\n", ok, dec)
+
+ // spit out decoded text
+ n, err := w.Write(dec)
+ if err != nil {
+ panic(err)
+ }
+ if n != len(dec) {
+ panic("could not write all bytes to stdout")
+ }
+
+ // TODO: verify the crc32 rotated checksum?
+
+ // move to next header
+ chunk = chunk[(headerSz + int(chunksz)):]
+ continue
+ }
+ case chunk_type == 0x01:
+ { // uncompressed data
+
+ //crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
+ section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
+
+ n, err := w.Write(section)
+ if err != nil {
+ panic(err)
+ }
+ if n != int(chunksz-crc32Sz) {
+ panic("could not write all bytes to stdout")
+ }
+
+ chunk = chunk[(headerSz + int(chunksz)):]
+ continue
+ }
+ case chunk_type == 0xfe:
+ fallthrough // padding, just skip it
+ case chunk_type >= 0x80 && chunk_type <= 0xfd:
+ { // Reserved skippable chunks
+ chunk = chunk[(headerSz + int(chunksz)):]
+ continue
+ }
+
+ default:
+ panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type))
+ }
+
+ } // end for{}
+
+ return nil
+}
+
+// 0xff 0x06 0x00 0x00 sNaPpY
+var SnappyStreamHeaderMagic = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}
+
+const CHUNK_MAX = 65536
+const _STREAM_TO_STREAM_BLOCK_SIZE = CHUNK_MAX
+const _STREAM_IDENTIFIER = `sNaPpY`
+const _COMPRESSED_CHUNK = 0x00
+const _UNCOMPRESSED_CHUNK = 0x01
+const _IDENTIFIER_CHUNK = 0xff
+const _RESERVED_UNSKIPPABLE0 = 0x02 // chunk ranges are [inclusive, exclusive)
+const _RESERVED_UNSKIPPABLE1 = 0x80
+const _RESERVED_SKIPPABLE0 = 0x80
+const _RESERVED_SKIPPABLE1 = 0xff
+
+// the minimum percent of bytes compression must save to be enabled in automatic
+// mode
+const _COMPRESSION_THRESHOLD = .125
+
+var crctab *crc32.Table
+
+func init() {
+ crctab = crc32.MakeTable(crc32.Castagnoli) // this is correct table, matches the crc32c.c code used by python
+}
+
+func masked_crc32c(data []byte) uint32 {
+
+ // see the framing format specification, http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
+ var crc uint32 = crc32.Checksum(data, crctab)
+ return (uint32((crc>>15)|(crc<<17)) + 0xa282ead8)
+}
+
+func ReadSnappyStreamCompressedFile(filename string) ([]byte, error) {
+
+ snappyFile, err := Open(filename)
+ if err != nil {
+ return []byte{}, err
+ }
+
+ var bb bytes.Buffer
+ _, err = bb.ReadFrom(snappyFile)
+ if err == io.EOF {
+ err = nil
+ }
+ if err != nil {
+ panic(err)
+ }
+
+ return bb.Bytes(), err
+}