summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/Smerity
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/Smerity')
-rw-r--r--vendor/github.com/Smerity/govarint/LICENSE22
-rw-r--r--vendor/github.com/Smerity/govarint/README.md67
-rw-r--r--vendor/github.com/Smerity/govarint/govarint.go229
3 files changed, 318 insertions, 0 deletions
diff --git a/vendor/github.com/Smerity/govarint/LICENSE b/vendor/github.com/Smerity/govarint/LICENSE
new file mode 100644
index 0000000000..be09cac865
--- /dev/null
+++ b/vendor/github.com/Smerity/govarint/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Stephen Merity
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/Smerity/govarint/README.md b/vendor/github.com/Smerity/govarint/README.md
new file mode 100644
index 0000000000..5b82dccb2f
--- /dev/null
+++ b/vendor/github.com/Smerity/govarint/README.md
@@ -0,0 +1,67 @@
+# Govarint
+
+This project aims to provide a simple API for the performant encoding and decoding of 32 and 64 bit integers using a variety of algorithms.
+
+[![](http://i.imgur.com/mpgC23U.jpg)](https://www.flickr.com/photos/tsevis/8648521649/)
+
+## Usage
+
+Each integer encoding algorithm conforms to an encoding and decoding interface.
+The interfaces also specify the size of the unsigned integer, either 32 or 64 bits, and will be referred to as XX below.
+To create an encoder:
+
+ NewU32Base128Encoder(w io.Writer)
+ NewU64Base128Encoder(w io.Writer)
+ NewU32GroupVarintEncoder(w io.Writer)
+
+For encoders, the only two commands are `PutUXX` and `Close`.
+`Close` must be called as some integer encoding algorithms write in multiples.
+
+ var buf bytes.Buffer
+ enc := NewU32Base128Encoder(&buf)
+ enc.PutU32(117)
+ enc.PutU32(343)
+ enc.Close()
+
+To create a decoder:
+
+ NewU32Base128Decoder(r io.ByteReader)
+ NewU64Base128Decoder(r io.ByteReader)
+ NewU32GroupVarintDecoder(r io.ByteReader)
+
+For decoders, the only command is `GetUXX`.
+`GetUXX` returns the value and any potential errors.
+When reading is complete, `GetUXX` will return an `EOF` (End Of File).
+
+ dec := NewU32Base128Decoder(&buf)
+ x, err := dec.GetU32()
+
+## Use Cases
+
+Using fixed width integers, such as uint32 and uint64, usually waste large amounts of space, especially when encoding small values.
+Optimally, smaller numbers should take less space to represent.
+
+Using integer encoding algorithms is especially common in specific applications, such as storing edge lists or indexes for search engines.
+In these situations, you have a sorted list of numbers that you want to keep as compactly as possible in memory.
+Additionally, by storing only the difference between the given number and the previous (delta encoding), the numbers are quite small, and thus compress well.
+
+For an explicit example, the Web Data Commons Hyperlink Graph contains 128 billion edges linking page A to page B, where each page is represented by a 32 bit integer.
+By converting all these edges to 64 bit integers (32 | 32), sorting them, and then using delta encoding, memory usage can be reduced from 64 bits per edge down to only 9 bits per edge using the Base128 integer encoding algorithm.
+This figure improves even further if compressed using conventional compression algorithms (3 bits per edge).
+
+## Encodings supported
+
+`govarint` supports:
+
++ Base128 [32, 64] - each byte uses 7 bits for encoding the integer and 1 bit for indicating if the integer requires another byte
++ Group Varint [32] - integers are encoded in blocks of four - one byte encodes the size of the following four integers, then the values of the four integers follows
+
+Group Varint consistently beats Base128 in decompression speed but Base128 may offer improved compression ratios depending on the distribution of the supplied integers.
+
+## Tests
+
+ go test -v -bench=.
+
+## License
+
+MIT License, as per `LICENSE`
diff --git a/vendor/github.com/Smerity/govarint/govarint.go b/vendor/github.com/Smerity/govarint/govarint.go
new file mode 100644
index 0000000000..61328a337b
--- /dev/null
+++ b/vendor/github.com/Smerity/govarint/govarint.go
@@ -0,0 +1,229 @@
+package govarint
+
+import "encoding/binary"
+import "io"
+
+type U32VarintEncoder interface {
+ PutU32(x uint32) int
+ Close()
+}
+
+type U32VarintDecoder interface {
+ GetU32() (uint32, error)
+}
+
+///
+
+type U64VarintEncoder interface {
+ PutU64(x uint64) int
+ Close()
+}
+
+type U64VarintDecoder interface {
+ GetU64() (uint64, error)
+}
+
+///
+
+type U32GroupVarintEncoder struct {
+ w io.Writer
+ index int
+ store [4]uint32
+ temp [17]byte
+}
+
+func NewU32GroupVarintEncoder(w io.Writer) *U32GroupVarintEncoder { return &U32GroupVarintEncoder{w: w} }
+
+func (b *U32GroupVarintEncoder) Flush() (int, error) {
+ // TODO: Is it more efficient to have a tailored version that's called only in Close()?
+ // If index is zero, there are no integers to flush
+ if b.index == 0 {
+ return 0, nil
+ }
+ // In the case we're flushing (the group isn't of size four), the non-values should be zero
+ // This ensures the unused entries are all zero in the sizeByte
+ for i := b.index; i < 4; i++ {
+ b.store[i] = 0
+ }
+ length := 1
+ // We need to reset the size byte to zero as we only bitwise OR into it, we don't overwrite it
+ b.temp[0] = 0
+ for i, x := range b.store {
+ size := byte(0)
+ shifts := []byte{24, 16, 8, 0}
+ for _, shift := range shifts {
+ // Always writes at least one byte -- the first one (shift = 0)
+ // Will write more bytes until the rest of the integer is all zeroes
+ if (x>>shift) != 0 || shift == 0 {
+ size += 1
+ b.temp[length] = byte(x >> shift)
+ length += 1
+ }
+ }
+ // We store the size in two of the eight bits in the first byte (sizeByte)
+ // 0 means there is one byte in total, hence why we subtract one from size
+ b.temp[0] |= (size - 1) << (uint8(3-i) * 2)
+ }
+ // If we're flushing without a full group of four, remove the unused bytes we computed
+ // This enables us to realize it's a partial group on decoding thanks to EOF
+ if b.index != 4 {
+ length -= 4 - b.index
+ }
+ _, err := b.w.Write(b.temp[:length])
+ return length, err
+}
+
+func (b *U32GroupVarintEncoder) PutU32(x uint32) (int, error) {
+ bytesWritten := 0
+ b.store[b.index] = x
+ b.index += 1
+ if b.index == 4 {
+ n, err := b.Flush()
+ if err != nil {
+ return n, err
+ }
+ bytesWritten += n
+ b.index = 0
+ }
+ return bytesWritten, nil
+}
+
+func (b *U32GroupVarintEncoder) Close() {
+ // On Close, we flush any remaining values that might not have been in a full group
+ b.Flush()
+}
+
+///
+
+type U32GroupVarintDecoder struct {
+ r io.ByteReader
+ group [4]uint32
+ pos int
+ finished bool
+ capacity int
+}
+
+func NewU32GroupVarintDecoder(r io.ByteReader) *U32GroupVarintDecoder {
+ return &U32GroupVarintDecoder{r: r, pos: 4, capacity: 4}
+}
+
+func (b *U32GroupVarintDecoder) getGroup() error {
+ // We should always receive a sizeByte if there are more values to read
+ sizeByte, err := b.r.ReadByte()
+ if err != nil {
+ return err
+ }
+ // Calculate the size of the four incoming 32 bit integers
+ // 0b00 means 1 byte to read, 0b01 = 2, etc
+ b.group[0] = uint32((sizeByte >> 6) & 3)
+ b.group[1] = uint32((sizeByte >> 4) & 3)
+ b.group[2] = uint32((sizeByte >> 2) & 3)
+ b.group[3] = uint32(sizeByte & 3)
+ //
+ for index, size := range b.group {
+ b.group[index] = 0
+ // Any error that occurs in earlier byte reads should be repeated at the end one
+ // Hence we only catch and report the final ReadByte's error
+ var err error
+ switch size {
+ case 0:
+ var x byte
+ x, err = b.r.ReadByte()
+ b.group[index] = uint32(x)
+ case 1:
+ var x, y byte
+ x, _ = b.r.ReadByte()
+ y, err = b.r.ReadByte()
+ b.group[index] = uint32(x)<<8 | uint32(y)
+ case 2:
+ var x, y, z byte
+ x, _ = b.r.ReadByte()
+ y, _ = b.r.ReadByte()
+ z, err = b.r.ReadByte()
+ b.group[index] = uint32(x)<<16 | uint32(y)<<8 | uint32(z)
+ case 3:
+ var x, y, z, zz byte
+ x, _ = b.r.ReadByte()
+ y, _ = b.r.ReadByte()
+ z, _ = b.r.ReadByte()
+ zz, err = b.r.ReadByte()
+ b.group[index] = uint32(x)<<24 | uint32(y)<<16 | uint32(z)<<8 | uint32(zz)
+ }
+ if err != nil {
+ if err == io.EOF {
+ // If we hit EOF here, we have found a partial group
+ // We've return any valid entries we have read and return EOF once we run out
+ b.capacity = index
+ b.finished = true
+ break
+ } else {
+ return err
+ }
+ }
+ }
+ // Reset the pos pointer to the beginning of the read values
+ b.pos = 0
+ return nil
+}
+
+func (b *U32GroupVarintDecoder) GetU32() (uint32, error) {
+ // Check if we have any more values to give out - if not, let's get them
+ if b.pos == b.capacity {
+ // If finished is set, there is nothing else to do
+ if b.finished {
+ return 0, io.EOF
+ }
+ err := b.getGroup()
+ if err != nil {
+ return 0, err
+ }
+ }
+ // Increment pointer and return the value stored at that point
+ b.pos += 1
+ return b.group[b.pos-1], nil
+}
+
+///
+
+type Base128Encoder struct {
+ w io.Writer
+ tmpBytes []byte
+}
+
+func NewU32Base128Encoder(w io.Writer) *Base128Encoder {
+ return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen32)}
+}
+func NewU64Base128Encoder(w io.Writer) *Base128Encoder {
+ return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen64)}
+}
+
+func (b *Base128Encoder) PutU32(x uint32) (int, error) {
+ writtenBytes := binary.PutUvarint(b.tmpBytes, uint64(x))
+ return b.w.Write(b.tmpBytes[:writtenBytes])
+}
+
+func (b *Base128Encoder) PutU64(x uint64) (int, error) {
+ writtenBytes := binary.PutUvarint(b.tmpBytes, x)
+ return b.w.Write(b.tmpBytes[:writtenBytes])
+}
+
+func (b *Base128Encoder) Close() {
+}
+
+///
+
+type Base128Decoder struct {
+ r io.ByteReader
+}
+
+func NewU32Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
+func NewU64Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
+
+func (b *Base128Decoder) GetU32() (uint32, error) {
+ v, err := binary.ReadUvarint(b.r)
+ return uint32(v), err
+}
+
+func (b *Base128Decoder) GetU64() (uint64, error) {
+ return binary.ReadUvarint(b.r)
+}