summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go')
-rw-r--r--vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go282
1 files changed, 282 insertions, 0 deletions
diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
index 82edeb8982..18a563e8d6 100644
--- a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
+++ b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
@@ -3,6 +3,7 @@
package roaring
import (
+ "encoding/binary"
"errors"
"io"
"reflect"
@@ -132,3 +133,284 @@ func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
// return result
return
}
+
+// FromBuffer creates a bitmap from its serialized version stored in buffer.
+// It uses CRoaring's frozen bitmap format.
+//
+// The format specification is available here:
+// https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/src/roaring.c#L2756-L2783
+//
+// The provided byte array (buf) is expected to be a constant.
+// The function makes the best effort attempt not to copy data.
+// Only little endian is supported. The function will err if it detects a big
+// endian serialized file.
+// You should take care not to modify buff as it will likely result in
+// unexpected program behavior.
+// If said buffer comes from a memory map, it's advisable to give it read
+// only permissions, either at creation or by calling Mprotect from the
+// golang.org/x/sys/unix package.
+//
+// Resulting bitmaps are effectively immutable in the following sense:
+// a copy-on-write marker is used so that when you modify the resulting
+// bitmap, copies of selected data (containers) are made.
+// You should *not* change the copy-on-write status of the resulting
+// bitmaps (SetCopyOnWrite).
+//
+// If buf becomes unavailable, then a bitmap created with
+// FromBuffer would be effectively broken. Furthermore, any
+// bitmap derived from this bitmap (e.g., via Or, And) might
+// also be broken. Thus, before making buf unavailable, you should
+// call CloneCopyOnWriteContainers on all such bitmaps.
+//
+func (rb *Bitmap) FrozenView(buf []byte) error {
+ return rb.highlowcontainer.frozenView(buf)
+}
+
+/* Verbatim specification from CRoaring.
+ *
+ * FROZEN SERIALIZATION FORMAT DESCRIPTION
+ *
+ * -- (beginning must be aligned by 32 bytes) --
+ * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
+ * <run_data> rle16_t[total number of rle elements in all run containers]
+ * <array_data> uint16_t[total number of array elements in all array containers]
+ * <keys> uint16_t[num_containers]
+ * <counts> uint16_t[num_containers]
+ * <typecodes> uint8_t[num_containers]
+ * <header> uint32_t
+ *
+ * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
+ * and the number of containers (17 bits).
+ *
+ * <counts> stores number of elements for every container.
+ * Its meaning depends on container type.
+ * For array and bitset containers, this value is the container cardinality minus one.
+ * For run container, it is the number of rle_t elements (n_runs).
+ *
+ * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
+ * all containers of respective type.
+ *
+ * <*_data> and <keys> are kept close together because they are not accessed
+ * during deserilization. This may reduce IO in case of large mmaped bitmaps.
+ * All members have their native alignments during deserilization except <header>,
+ * which is not guaranteed to be aligned by 4 bytes.
+ */
+const FROZEN_COOKIE = 13766
+
+var (
+ FrozenBitmapInvalidCookie = errors.New("header does not contain the FROZEN_COOKIE")
+ FrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported")
+ FrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap")
+ FrozenBitmapOverpopulated = errors.New("too many containers")
+ FrozenBitmapUnexpectedData = errors.New("spurious data in input")
+ FrozenBitmapInvalidTypecode = errors.New("unrecognized typecode")
+ FrozenBitmapBufferTooSmall = errors.New("buffer too small")
+)
+
+func (ra *roaringArray) frozenView(buf []byte) error {
+ if len(buf) < 4 {
+ return FrozenBitmapIncomplete
+ }
+
+ headerBE := binary.BigEndian.Uint32(buf[len(buf)-4:])
+ if headerBE & 0x7fff == FROZEN_COOKIE {
+ return FrozenBitmapBigEndian
+ }
+
+ header := binary.LittleEndian.Uint32(buf[len(buf)-4:])
+ buf = buf[:len(buf)-4]
+
+ if header & 0x7fff != FROZEN_COOKIE {
+ return FrozenBitmapInvalidCookie
+ }
+
+ nCont := int(header >> 15)
+ if nCont > (1 << 16) {
+ return FrozenBitmapOverpopulated
+ }
+
+ // 1 byte per type, 2 bytes per key, 2 bytes per count.
+ if len(buf) < 5*nCont {
+ return FrozenBitmapIncomplete
+ }
+
+ types := buf[len(buf)-nCont:]
+ buf = buf[:len(buf)-nCont]
+
+ counts := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
+ buf = buf[:len(buf)-2*nCont]
+
+ keys := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
+ buf = buf[:len(buf)-2*nCont]
+
+ nBitmap, nArray, nRun := uint64(0), uint64(0), uint64(0)
+ nArrayEl, nRunEl := uint64(0), uint64(0)
+ for i, t := range types {
+ switch (t) {
+ case 1:
+ nBitmap++
+ case 2:
+ nArray++
+ nArrayEl += uint64(counts[i])+1
+ case 3:
+ nRun++
+ nRunEl += uint64(counts[i])
+ default:
+ return FrozenBitmapInvalidTypecode
+ }
+ }
+
+ if uint64(len(buf)) < (1 << 13)*nBitmap + 4*nRunEl + 2*nArrayEl {
+ return FrozenBitmapIncomplete
+ }
+
+ bitsetsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBitmap])
+ buf = buf[(1 << 13)*nBitmap:]
+
+ runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
+ buf = buf[4*nRunEl:]
+
+ arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
+ buf = buf[2*nArrayEl:]
+
+ if len(buf) != 0 {
+ return FrozenBitmapUnexpectedData
+ }
+
+ // TODO: maybe arena_alloc all this.
+ containers := make([]container, nCont)
+ bitsets := make([]bitmapContainer, nBitmap)
+ arrays := make([]arrayContainer, nArray)
+ runs := make([]runContainer16, nRun)
+ needCOW := make([]bool, nCont)
+
+ iBitset, iArray, iRun := uint64(0), uint64(0), uint64(0)
+ for i, t := range types {
+ needCOW[i] = true
+
+ switch (t) {
+ case 1:
+ containers[i] = &bitsets[iBitset]
+ bitsets[iBitset].cardinality = int(counts[i])+1
+ bitsets[iBitset].bitmap = bitsetsArena[:1024]
+ bitsetsArena = bitsetsArena[1024:]
+ iBitset++
+ case 2:
+ containers[i] = &arrays[iArray]
+ arrays[iArray].content = arraysArena[:counts[i]+1]
+ arraysArena = arraysArena[counts[i]+1:]
+ iArray++
+ case 3:
+ containers[i] = &runs[iRun]
+ runs[iRun].iv = runsArena[:counts[i]]
+ runsArena = runsArena[counts[i]:]
+ iRun++
+ }
+ }
+
+ // Not consuming the full input is a bug.
+ if iBitset != nBitmap || len(bitsetsArena) != 0 ||
+ iArray != nArray || len(arraysArena) != 0 ||
+ iRun != nRun || len(runsArena) != 0 {
+ panic("we missed something")
+ }
+
+ ra.keys = keys
+ ra.containers = containers
+ ra.needCopyOnWrite = needCOW
+ ra.copyOnWrite = true
+
+ return nil
+}
+
+func (bm *Bitmap) GetFrozenSizeInBytes() uint64 {
+ nBits, nArrayEl, nRunEl := uint64(0), uint64(0), uint64(0)
+ for _, c := range bm.highlowcontainer.containers {
+ switch v := c.(type) {
+ case *bitmapContainer:
+ nBits++
+ case *arrayContainer:
+ nArrayEl += uint64(len(v.content))
+ case *runContainer16:
+ nRunEl += uint64(len(v.iv))
+ }
+ }
+ return 4 + 5*uint64(len(bm.highlowcontainer.containers)) +
+ (nBits << 13) + 2*nArrayEl + 4*nRunEl
+}
+
+func (bm *Bitmap) Freeze() ([]byte, error) {
+ sz := bm.GetFrozenSizeInBytes()
+ buf := make([]byte, sz)
+ _, err := bm.FreezeTo(buf)
+ return buf, err
+}
+
+func (bm *Bitmap) FreezeTo(buf []byte) (int, error) {
+ containers := bm.highlowcontainer.containers
+ nCont := len(containers)
+
+ nBits, nArrayEl, nRunEl := 0, 0, 0
+ for _, c := range containers {
+ switch v := c.(type) {
+ case *bitmapContainer:
+ nBits++
+ case *arrayContainer:
+ nArrayEl += len(v.content)
+ case *runContainer16:
+ nRunEl += len(v.iv)
+ }
+ }
+
+ serialSize := 4 + 5*nCont + (1 << 13)*nBits + 4*nRunEl + 2*nArrayEl
+ if len(buf) < serialSize {
+ return 0, FrozenBitmapBufferTooSmall
+ }
+
+ bitsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBits])
+ buf = buf[(1 << 13)*nBits:]
+
+ runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
+ buf = buf[4*nRunEl:]
+
+ arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
+ buf = buf[2*nArrayEl:]
+
+ keys := byteSliceAsUint16Slice(buf[:2*nCont])
+ buf = buf[2*nCont:]
+
+ counts := byteSliceAsUint16Slice(buf[:2*nCont])
+ buf = buf[2*nCont:]
+
+ types := buf[:nCont]
+ buf = buf[nCont:]
+
+ header := uint32(FROZEN_COOKIE|(nCont << 15))
+ binary.LittleEndian.PutUint32(buf[:4], header)
+
+ copy(keys, bm.highlowcontainer.keys[:])
+
+ for i, c := range containers {
+ switch v := c.(type) {
+ case *bitmapContainer:
+ copy(bitsArena, v.bitmap)
+ bitsArena = bitsArena[1024:]
+ counts[i] = uint16(v.cardinality-1)
+ types[i] = 1
+ case *arrayContainer:
+ copy(arraysArena, v.content)
+ arraysArena = arraysArena[len(v.content):]
+ elems := len(v.content)
+ counts[i] = uint16(elems)-1
+ types[i] = 2
+ case *runContainer16:
+ copy(runsArena, v.iv)
+ runs := len(v.iv)
+ runsArena = runsArena[runs:]
+ counts[i] = uint16(runs)
+ types[i] = 3
+ }
+ }
+
+ return serialSize, nil
+}