diff options
Diffstat (limited to 'vendor/github.com/blevesearch/zap/v14/intcoder.go')
-rw-r--r-- | vendor/github.com/blevesearch/zap/v14/intcoder.go | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/vendor/github.com/blevesearch/zap/v14/intcoder.go b/vendor/github.com/blevesearch/zap/v14/intcoder.go new file mode 100644 index 0000000000..c3c488fb74 --- /dev/null +++ b/vendor/github.com/blevesearch/zap/v14/intcoder.go @@ -0,0 +1,206 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "encoding/binary" + "io" +) + +// We can safely use 0 to represent termNotEncoded since 0 +// could never be a valid address for term location information. +// (stored field index is always non-empty and earlier in the +// file) +const termNotEncoded = 0 + +type chunkedIntCoder struct { + final []byte + chunkSize uint64 + chunkBuf bytes.Buffer + chunkLens []uint64 + currChunk uint64 + + buf []byte +} + +// newChunkedIntCoder returns a new chunk int coder which packs data into +// chunks based on the provided chunkSize and supports up to the specified +// maxDocNum +func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder { + total := maxDocNum/chunkSize + 1 + rv := &chunkedIntCoder{ + chunkSize: chunkSize, + chunkLens: make([]uint64, total), + final: make([]byte, 0, 64), + } + + return rv +} + +// Reset lets you reuse this chunked int coder. buffers are reset and reused +// from previous use. you cannot change the chunk size or max doc num. +func (c *chunkedIntCoder) Reset() { + c.final = c.final[:0] + c.chunkBuf.Reset() + c.currChunk = 0 + for i := range c.chunkLens { + c.chunkLens[i] = 0 + } +} + +// SetChunkSize changes the chunk size. It is only valid to do so +// with a new chunkedIntCoder, or immediately after calling Reset() +func (c *chunkedIntCoder) SetChunkSize(chunkSize uint64, maxDocNum uint64) { + total := int(maxDocNum/chunkSize + 1) + c.chunkSize = chunkSize + if cap(c.chunkLens) < total { + c.chunkLens = make([]uint64, total) + } else { + c.chunkLens = c.chunkLens[:total] + } +} + +// Add encodes the provided integers into the correct chunk for the provided +// doc num. You MUST call Add() with increasing docNums. +func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error { + chunk := docNum / c.chunkSize + if chunk != c.currChunk { + // starting a new chunk + c.Close() + c.chunkBuf.Reset() + c.currChunk = chunk + } + + if len(c.buf) < binary.MaxVarintLen64 { + c.buf = make([]byte, binary.MaxVarintLen64) + } + + for _, val := range vals { + wb := binary.PutUvarint(c.buf, val) + _, err := c.chunkBuf.Write(c.buf[:wb]) + if err != nil { + return err + } + } + + return nil +} + +func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error { + chunk := docNum / c.chunkSize + if chunk != c.currChunk { + // starting a new chunk + c.Close() + c.chunkBuf.Reset() + c.currChunk = chunk + } + + _, err := c.chunkBuf.Write(buf) + return err +} + +// Close indicates you are done calling Add() this allows the final chunk +// to be encoded. +func (c *chunkedIntCoder) Close() { + encodingBytes := c.chunkBuf.Bytes() + c.chunkLens[c.currChunk] = uint64(len(encodingBytes)) + c.final = append(c.final, encodingBytes...) + c.currChunk = uint64(cap(c.chunkLens)) // sentinel to detect double close +} + +// Write commits all the encoded chunked integers to the provided writer. +func (c *chunkedIntCoder) Write(w io.Writer) (int, error) { + bufNeeded := binary.MaxVarintLen64 * (1 + len(c.chunkLens)) + if len(c.buf) < bufNeeded { + c.buf = make([]byte, bufNeeded) + } + buf := c.buf + + // convert the chunk lengths into chunk offsets + chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens) + + // write out the number of chunks & each chunk offsets + n := binary.PutUvarint(buf, uint64(len(chunkOffsets))) + for _, chunkOffset := range chunkOffsets { + n += binary.PutUvarint(buf[n:], chunkOffset) + } + + tw, err := w.Write(buf[:n]) + if err != nil { + return tw, err + } + + // write out the data + nw, err := w.Write(c.final) + tw += nw + if err != nil { + return tw, err + } + return tw, nil +} + +// writeAt commits all the encoded chunked integers to the provided writer +// and returns the starting offset, total bytes written and an error +func (c *chunkedIntCoder) writeAt(w io.Writer) (uint64, int, error) { + startOffset := uint64(termNotEncoded) + if len(c.final) <= 0 { + return startOffset, 0, nil + } + + if chw := w.(*CountHashWriter); chw != nil { + startOffset = uint64(chw.Count()) + } + + tw, err := c.Write(w) + return startOffset, tw, err +} + +func (c *chunkedIntCoder) FinalSize() int { + return len(c.final) +} + +// modifyLengthsToEndOffsets converts the chunk length array +// to a chunk offset array. The readChunkBoundary +// will figure out the start and end of every chunk from +// these offsets. Starting offset of i'th index is stored +// in i-1'th position except for 0'th index and ending offset +// is stored at i'th index position. +// For 0'th element, starting position is always zero. +// eg: +// Lens -> 5 5 5 5 => 5 10 15 20 +// Lens -> 0 5 0 5 => 0 5 5 10 +// Lens -> 0 0 0 5 => 0 0 0 5 +// Lens -> 5 0 0 0 => 5 5 5 5 +// Lens -> 0 5 0 0 => 0 5 5 5 +// Lens -> 0 0 5 0 => 0 0 5 5 +func modifyLengthsToEndOffsets(lengths []uint64) []uint64 { + var runningOffset uint64 + var index, i int + for i = 1; i <= len(lengths); i++ { + runningOffset += lengths[i-1] + lengths[index] = runningOffset + index++ + } + return lengths +} + +func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) { + var start uint64 + if chunk > 0 { + start = offsets[chunk-1] + } + return start, offsets[chunk] +} |