diff options
author | Antoine GIRARD <sapk@users.noreply.github.com> | 2018-05-19 14:49:46 +0200 |
---|---|---|
committer | Lunny Xiao <xiaolunwen@gmail.com> | 2018-05-19 20:49:46 +0800 |
commit | 917b9641eca3fa1b1676ba1b4fd77a4e958ee153 (patch) | |
tree | 2caf049dfebccf5ccbc44316630a6c9220062d78 /vendor/github.com/glycerine/go-unsnap-stream/unsnap.go | |
parent | 1b7cd3d0b0d3652e0660489b9c4da72619400c98 (diff) | |
download | gitea-917b9641eca3fa1b1676ba1b4fd77a4e958ee153.tar.gz gitea-917b9641eca3fa1b1676ba1b4fd77a4e958ee153.zip |
Update to last common bleve (#3986)
Diffstat (limited to 'vendor/github.com/glycerine/go-unsnap-stream/unsnap.go')
-rw-r--r-- | vendor/github.com/glycerine/go-unsnap-stream/unsnap.go | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go b/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go new file mode 100644 index 0000000000..8789445c9a --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go @@ -0,0 +1,513 @@ +package unsnap + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "io/ioutil" + "os" + + "hash/crc32" + + snappy "github.com/golang/snappy" + // The C library can be used, but this makes the binary dependent + // lots of extraneous c-libraries; it is no longer stand-alone. Yuck. + // + // Therefore we comment out the "dgryski/go-csnappy" path and use the + // "github.com/golang/snappy/snappy" above instead. If you are + // performance limited and can deal with distributing more libraries, + // then this is easy to swap. + // + // If you swap, note that some of the tests won't pass + // because snappy-go produces slightly different (but still + // conformant) encodings on some data. Here are bindings + // to the C-snappy: + // snappy "github.com/dgryski/go-csnappy" +) + +// SnappyFile: create a drop-in-replacement/wrapper for an *os.File that handles doing the unsnappification online as more is read from it + +type SnappyFile struct { + Fname string + + Reader io.Reader + Writer io.Writer + + // allow clients to substitute us for an os.File and just switch + // off compression if they don't want it. + SnappyEncodeDecodeOff bool // if true, we bypass straight to Filep + + EncBuf FixedSizeRingBuf // holds any extra that isn't yet returned, encoded + DecBuf FixedSizeRingBuf // holds any extra that isn't yet returned, decoded + + // for writing to stream-framed snappy + HeaderChunkWritten bool + + // Sanity check: we can only read, or only write, to one SnappyFile. + // EncBuf and DecBuf are used differently in each mode. Verify + // that we are consistent with this flag. + Writing bool +} + +var total int + +// for debugging, show state of buffers +func (f *SnappyFile) Dump() { + fmt.Printf("EncBuf has length %d and contents:\n%s\n", len(f.EncBuf.Bytes()), string(f.EncBuf.Bytes())) + fmt.Printf("DecBuf has length %d and contents:\n%s\n", len(f.DecBuf.Bytes()), string(f.DecBuf.Bytes())) +} + +func (f *SnappyFile) Read(p []byte) (n int, err error) { + + if f.SnappyEncodeDecodeOff { + return f.Reader.Read(p) + } + + if f.Writing { + panic("Reading on a write-only SnappyFile") + } + + // before we unencrypt more, try to drain the DecBuf first + n, _ = f.DecBuf.Read(p) + if n > 0 { + total += n + return n, nil + } + + //nEncRead, nDecAdded, err := UnsnapOneFrame(f.Filep, &f.EncBuf, &f.DecBuf, f.Fname) + _, _, err = UnsnapOneFrame(f.Reader, &f.EncBuf, &f.DecBuf, f.Fname) + if err != nil && err != io.EOF { + panic(err) + } + + n, _ = f.DecBuf.Read(p) + + if n > 0 { + total += n + return n, nil + } + if f.DecBuf.Readable == 0 { + if f.DecBuf.Readable == 0 && f.EncBuf.Readable == 0 { + // only now (when EncBuf is empty) can we give io.EOF. + // Any earlier, and we leave stuff un-decoded! + return 0, io.EOF + } + } + return 0, nil +} + +func Open(name string) (file *SnappyFile, err error) { + fp, err := os.Open(name) + if err != nil { + return nil, err + } + // encoding in snappy can apparently go beyond the original size, so + // we make our buffers big enough, 2*max snappy chunk => 2 * CHUNK_MAX(65536) + + snap := NewReader(fp) + snap.Fname = name + return snap, nil +} + +func NewReader(r io.Reader) *SnappyFile { + return &SnappyFile{ + Reader: r, + EncBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snappy encoded bytes + DecBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snapppy decoded bytes + Writing: false, + } +} + +func NewWriter(w io.Writer) *SnappyFile { + return &SnappyFile{ + Writer: w, + EncBuf: *NewFixedSizeRingBuf(65536), // on writing: temp for testing compression + DecBuf: *NewFixedSizeRingBuf(65536 * 2), // on writing: final buffer of snappy framed and encoded bytes + Writing: true, + } +} + +func Create(name string) (file *SnappyFile, err error) { + fp, err := os.Create(name) + if err != nil { + return nil, err + } + snap := NewWriter(fp) + snap.Fname = name + return snap, nil +} + +func (f *SnappyFile) Close() error { + if f.Writing { + wc, ok := f.Writer.(io.WriteCloser) + if ok { + return wc.Close() + } + return nil + } + rc, ok := f.Reader.(io.ReadCloser) + if ok { + return rc.Close() + } + return nil +} + +func (f *SnappyFile) Sync() error { + file, ok := f.Writer.(*os.File) + if ok { + return file.Sync() + } + return nil +} + +// for an increment of a frame at a time: +// read from r into encBuf (encBuf is still encoded, thus the name), and write unsnappified frames into outDecodedBuf +// the returned n: number of bytes read from the encrypted encBuf +func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedSizeRingBuf, fname string) (nEnc int64, nDec int64, err error) { + // b, err := ioutil.ReadAll(r) + // if err != nil { + // panic(err) + // } + + nEnc = 0 + nDec = 0 + + // read up to 65536 bytes from r into encBuf, at least a snappy frame + nread, err := io.CopyN(encBuf, r, 65536) // returns nwrotebytes, err + nEnc += nread + if err != nil { + if err == io.EOF { + if nread == 0 { + if encBuf.Readable == 0 { + return nEnc, nDec, io.EOF + } + // else we have bytes in encBuf, so decode them! + err = nil + } else { + // continue below, processing the nread bytes + err = nil + } + } else { + panic(err) + } + } + + // flag for printing chunk size alignment messages + verbose := false + + const snappyStreamHeaderSz = 10 + const headerSz = 4 + const crc32Sz = 4 + // the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum + // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt + + chunk := (*encBuf).Bytes() + + // however we exit, advance as + // defer func() { (*encBuf).Next(N) }() + + // 65536 is the max size of a snappy framed chunk. See + // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91 + // buf := make([]byte, 65536) + + // fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b) + // fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut) + + //fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14]) + + fourbytes := make([]byte, 4) + chunkCount := 0 + + for nDec < 65536 { + if len(chunk) == 0 { + break + } + chunkCount++ + fourbytes[3] = 0 + copy(fourbytes, chunk[1:4]) + chunksz := binary.LittleEndian.Uint32(fourbytes) + chunk_type := chunk[0] + + switch true { + case chunk_type == 0xff: + { // stream identifier + + streamHeader := chunk[:snappyStreamHeaderSz] + if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) { + panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.") + } else { + //fmt.Printf("got streaming snappy magic header just fine.\n") + } + chunk = chunk[snappyStreamHeaderSz:] + (*encBuf).Advance(snappyStreamHeaderSz) + nEnc += snappyStreamHeaderSz + continue + } + case chunk_type == 0x00: + { // compressed data + if verbose { + fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4) + } + + crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)]) + section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)] + + dec, ok := snappy.Decode(nil, section) + if ok != nil { + // we've probably truncated a snappy frame at this point + // ok=snappy: corrupt input + // len(dec) == 0 + // + panic(fmt.Sprintf("could not decode snappy stream: '%s' and len dec=%d and ok=%v\n", fname, len(dec), ok)) + + // get back to caller with what we've got so far + return nEnc, nDec, nil + } + // fmt.Printf("ok, b is %#v , %#v\n", ok, dec) + + // spit out decoded text + // n, err := w.Write(dec) + //fmt.Printf("len(dec) = %d, outDecodedBuf.Readable=%d\n", len(dec), outDecodedBuf.Readable) + bnb := bytes.NewBuffer(dec) + n, err := io.Copy(outDecodedBuf, bnb) + if err != nil { + //fmt.Printf("got n=%d, err= %s ; when trying to io.Copy(outDecodedBuf: N=%d, Readable=%d)\n", n, err, outDecodedBuf.N, outDecodedBuf.Readable) + panic(err) + } + if n != int64(len(dec)) { + panic("could not write all bytes to outDecodedBuf") + } + nDec += n + + // verify the crc32 rotated checksum + m32 := masked_crc32c(dec) + if m32 != crc { + panic(fmt.Sprintf("crc32 masked failiure. expected: %v but got: %v", crc, m32)) + } else { + //fmt.Printf("\nchecksums match: %v == %v\n", crc, m32) + } + + // move to next header + inc := (headerSz + int(chunksz)) + chunk = chunk[inc:] + (*encBuf).Advance(inc) + nEnc += int64(inc) + continue + } + case chunk_type == 0x01: + { // uncompressed data + + //n, err := w.Write(chunk[(headerSz+crc32Sz):(headerSz + int(chunksz))]) + n, err := io.Copy(outDecodedBuf, bytes.NewBuffer(chunk[(headerSz+crc32Sz):(headerSz+int(chunksz))])) + if verbose { + //fmt.Printf("debug: n=%d err=%v chunksz=%d outDecodedBuf='%v'\n", n, err, chunksz, outDecodedBuf) + } + if err != nil { + panic(err) + } + if n != int64(chunksz-crc32Sz) { + panic("could not write all bytes to stdout") + } + nDec += n + + inc := (headerSz + int(chunksz)) + chunk = chunk[inc:] + (*encBuf).Advance(inc) + nEnc += int64(inc) + continue + } + case chunk_type == 0xfe: + fallthrough // padding, just skip it + case chunk_type >= 0x80 && chunk_type <= 0xfd: + { // Reserved skippable chunks + //fmt.Printf("\nin reserved skippable chunks, at nEnc=%v\n", nEnc) + inc := (headerSz + int(chunksz)) + chunk = chunk[inc:] + nEnc += int64(inc) + (*encBuf).Advance(inc) + continue + } + + default: + panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type)) + } + + } // end for{} + + return nEnc, nDec, err + //return int64(N), nil +} + +// for whole file at once: +// +// receive on stdin a stream of bytes in the snappy-streaming framed +// format, defined here: http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt +// Grab each frame, run it through the snappy decoder, and spit out +// each frame all joined back-to-back on stdout. +// +func Unsnappy(r io.Reader, w io.Writer) (err error) { + b, err := ioutil.ReadAll(r) + if err != nil { + panic(err) + } + + // flag for printing chunk size alignment messages + verbose := false + + const snappyStreamHeaderSz = 10 + const headerSz = 4 + const crc32Sz = 4 + // the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum + // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt + + chunk := b[:] + + // 65536 is the max size of a snappy framed chunk. See + // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91 + //buf := make([]byte, 65536) + + // fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b) + // fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut) + + //fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14]) + + fourbytes := make([]byte, 4) + chunkCount := 0 + + for { + if len(chunk) == 0 { + break + } + chunkCount++ + fourbytes[3] = 0 + copy(fourbytes, chunk[1:4]) + chunksz := binary.LittleEndian.Uint32(fourbytes) + chunk_type := chunk[0] + + switch true { + case chunk_type == 0xff: + { // stream identifier + + streamHeader := chunk[:snappyStreamHeaderSz] + if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) { + panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.") + } else { + //fmt.Printf("got streaming snappy magic header just fine.\n") + } + chunk = chunk[snappyStreamHeaderSz:] + continue + } + case chunk_type == 0x00: + { // compressed data + if verbose { + fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4) + } + + //crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)]) + section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)] + + dec, ok := snappy.Decode(nil, section) + if ok != nil { + panic("could not decode snappy stream") + } + // fmt.Printf("ok, b is %#v , %#v\n", ok, dec) + + // spit out decoded text + n, err := w.Write(dec) + if err != nil { + panic(err) + } + if n != len(dec) { + panic("could not write all bytes to stdout") + } + + // TODO: verify the crc32 rotated checksum? + + // move to next header + chunk = chunk[(headerSz + int(chunksz)):] + continue + } + case chunk_type == 0x01: + { // uncompressed data + + //crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)]) + section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)] + + n, err := w.Write(section) + if err != nil { + panic(err) + } + if n != int(chunksz-crc32Sz) { + panic("could not write all bytes to stdout") + } + + chunk = chunk[(headerSz + int(chunksz)):] + continue + } + case chunk_type == 0xfe: + fallthrough // padding, just skip it + case chunk_type >= 0x80 && chunk_type <= 0xfd: + { // Reserved skippable chunks + chunk = chunk[(headerSz + int(chunksz)):] + continue + } + + default: + panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type)) + } + + } // end for{} + + return nil +} + +// 0xff 0x06 0x00 0x00 sNaPpY +var SnappyStreamHeaderMagic = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59} + +const CHUNK_MAX = 65536 +const _STREAM_TO_STREAM_BLOCK_SIZE = CHUNK_MAX +const _STREAM_IDENTIFIER = `sNaPpY` +const _COMPRESSED_CHUNK = 0x00 +const _UNCOMPRESSED_CHUNK = 0x01 +const _IDENTIFIER_CHUNK = 0xff +const _RESERVED_UNSKIPPABLE0 = 0x02 // chunk ranges are [inclusive, exclusive) +const _RESERVED_UNSKIPPABLE1 = 0x80 +const _RESERVED_SKIPPABLE0 = 0x80 +const _RESERVED_SKIPPABLE1 = 0xff + +// the minimum percent of bytes compression must save to be enabled in automatic +// mode +const _COMPRESSION_THRESHOLD = .125 + +var crctab *crc32.Table + +func init() { + crctab = crc32.MakeTable(crc32.Castagnoli) // this is correct table, matches the crc32c.c code used by python +} + +func masked_crc32c(data []byte) uint32 { + + // see the framing format specification, http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt + var crc uint32 = crc32.Checksum(data, crctab) + return (uint32((crc>>15)|(crc<<17)) + 0xa282ead8) +} + +func ReadSnappyStreamCompressedFile(filename string) ([]byte, error) { + + snappyFile, err := Open(filename) + if err != nil { + return []byte{}, err + } + + var bb bytes.Buffer + _, err = bb.ReadFrom(snappyFile) + if err == io.EOF { + err = nil + } + if err != nil { + panic(err) + } + + return bb.Bytes(), err +} |