You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

roaringarray.go 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837
  1. package roaring
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. snappy "github.com/glycerine/go-unsnap-stream"
  7. "github.com/tinylib/msgp/msgp"
  8. "io"
  9. )
  10. //go:generate msgp -unexported
  11. type container interface {
  12. addOffset(uint16) []container
  13. clone() container
  14. and(container) container
  15. andCardinality(container) int
  16. iand(container) container // i stands for inplace
  17. andNot(container) container
  18. iandNot(container) container // i stands for inplace
  19. getCardinality() int
  20. // rank returns the number of integers that are
  21. // smaller or equal to x. rank(infinity) would be getCardinality().
  22. rank(uint16) int
  23. iadd(x uint16) bool // inplace, returns true if x was new.
  24. iaddReturnMinimized(uint16) container // may change return type to minimize storage.
  25. //addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
  26. iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
  27. iremove(x uint16) bool // inplace, returns true if x was present.
  28. iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
  29. not(start, final int) container // range is [firstOfRange,lastOfRange)
  30. inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
  31. xor(r container) container
  32. getShortIterator() shortPeekable
  33. getReverseIterator() shortIterable
  34. getManyIterator() manyIterable
  35. contains(i uint16) bool
  36. maximum() uint16
  37. minimum() uint16
  38. // equals is now logical equals; it does not require the
  39. // same underlying container types, but compares across
  40. // any of the implementations.
  41. equals(r container) bool
  42. fillLeastSignificant16bits(array []uint32, i int, mask uint32)
  43. or(r container) container
  44. orCardinality(r container) int
  45. isFull() bool
  46. ior(r container) container // i stands for inplace
  47. intersects(r container) bool // whether the two containers intersect
  48. lazyOR(r container) container
  49. lazyIOR(r container) container
  50. getSizeInBytes() int
  51. //removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
  52. iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
  53. selectInt(x uint16) int // selectInt returns the xth integer in the container
  54. serializedSizeInBytes() int
  55. writeTo(io.Writer) (int, error)
  56. numberOfRuns() int
  57. toEfficientContainer() container
  58. String() string
  59. containerType() contype
  60. }
  61. type contype uint8
  62. const (
  63. bitmapContype contype = iota
  64. arrayContype
  65. run16Contype
  66. run32Contype
  67. )
  68. // careful: range is [firstOfRange,lastOfRange]
  69. func rangeOfOnes(start, last int) container {
  70. if start > MaxUint16 {
  71. panic("rangeOfOnes called with start > MaxUint16")
  72. }
  73. if last > MaxUint16 {
  74. panic("rangeOfOnes called with last > MaxUint16")
  75. }
  76. if start < 0 {
  77. panic("rangeOfOnes called with start < 0")
  78. }
  79. if last < 0 {
  80. panic("rangeOfOnes called with last < 0")
  81. }
  82. return newRunContainer16Range(uint16(start), uint16(last))
  83. }
  84. type roaringArray struct {
  85. keys []uint16
  86. containers []container `msg:"-"` // don't try to serialize directly.
  87. needCopyOnWrite []bool
  88. copyOnWrite bool
  89. // conserz is used at serialization time
  90. // to serialize containers. Otherwise empty.
  91. conserz []containerSerz
  92. }
  93. // containerSerz facilitates serializing container (tricky to
  94. // serialize because it is an interface) by providing a
  95. // light wrapper with a type identifier.
  96. type containerSerz struct {
  97. t contype `msg:"t"` // type
  98. r msgp.Raw `msg:"r"` // Raw msgpack of the actual container type
  99. }
  100. func newRoaringArray() *roaringArray {
  101. return &roaringArray{}
  102. }
  103. // runOptimize compresses the element containers to minimize space consumed.
  104. // Q: how does this interact with copyOnWrite and needCopyOnWrite?
  105. // A: since we aren't changing the logical content, just the representation,
  106. // we don't bother to check the needCopyOnWrite bits. We replace
  107. // (possibly all) elements of ra.containers in-place with space
  108. // optimized versions.
  109. func (ra *roaringArray) runOptimize() {
  110. for i := range ra.containers {
  111. ra.containers[i] = ra.containers[i].toEfficientContainer()
  112. }
  113. }
  114. func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
  115. ra.keys = append(ra.keys, key)
  116. ra.containers = append(ra.containers, value)
  117. ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
  118. }
  119. func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
  120. mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
  121. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
  122. }
  123. func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
  124. // cow only if the two request it, or if we already have a lightweight copy
  125. copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
  126. if !copyonwrite {
  127. // since there is no copy-on-write, we need to clone the container (this is important)
  128. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
  129. } else {
  130. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
  131. if !sa.needsCopyOnWrite(startingindex) {
  132. sa.setNeedsCopyOnWrite(startingindex)
  133. }
  134. }
  135. }
  136. func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
  137. for i := startingindex; i < end; i++ {
  138. ra.appendWithoutCopy(sa, i)
  139. }
  140. }
  141. func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
  142. for i := startingindex; i < end; i++ {
  143. ra.appendCopy(sa, i)
  144. }
  145. }
  146. func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
  147. // cow only if the two request it, or if we already have a lightweight copy
  148. copyonwrite := ra.copyOnWrite && sa.copyOnWrite
  149. for i := 0; i < sa.size(); i++ {
  150. if sa.keys[i] >= stoppingKey {
  151. break
  152. }
  153. thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
  154. if thiscopyonewrite {
  155. ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
  156. if !sa.needsCopyOnWrite(i) {
  157. sa.setNeedsCopyOnWrite(i)
  158. }
  159. } else {
  160. // since there is no copy-on-write, we need to clone the container (this is important)
  161. ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
  162. }
  163. }
  164. }
  165. func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
  166. // cow only if the two request it, or if we already have a lightweight copy
  167. copyonwrite := ra.copyOnWrite && sa.copyOnWrite
  168. startLocation := sa.getIndex(beforeStart)
  169. if startLocation >= 0 {
  170. startLocation++
  171. } else {
  172. startLocation = -startLocation - 1
  173. }
  174. for i := startLocation; i < sa.size(); i++ {
  175. thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
  176. if thiscopyonewrite {
  177. ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
  178. if !sa.needsCopyOnWrite(i) {
  179. sa.setNeedsCopyOnWrite(i)
  180. }
  181. } else {
  182. // since there is no copy-on-write, we need to clone the container (this is important)
  183. ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
  184. }
  185. }
  186. }
  187. func (ra *roaringArray) removeIndexRange(begin, end int) {
  188. if end <= begin {
  189. return
  190. }
  191. r := end - begin
  192. copy(ra.keys[begin:], ra.keys[end:])
  193. copy(ra.containers[begin:], ra.containers[end:])
  194. copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
  195. ra.resize(len(ra.keys) - r)
  196. }
  197. func (ra *roaringArray) resize(newsize int) {
  198. for k := newsize; k < len(ra.containers); k++ {
  199. ra.containers[k] = nil
  200. }
  201. ra.keys = ra.keys[:newsize]
  202. ra.containers = ra.containers[:newsize]
  203. ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
  204. }
  205. func (ra *roaringArray) clear() {
  206. ra.resize(0)
  207. ra.copyOnWrite = false
  208. ra.conserz = nil
  209. }
  210. func (ra *roaringArray) clone() *roaringArray {
  211. sa := roaringArray{}
  212. sa.copyOnWrite = ra.copyOnWrite
  213. // this is where copyOnWrite is used.
  214. if ra.copyOnWrite {
  215. sa.keys = make([]uint16, len(ra.keys))
  216. copy(sa.keys, ra.keys)
  217. sa.containers = make([]container, len(ra.containers))
  218. copy(sa.containers, ra.containers)
  219. sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
  220. ra.markAllAsNeedingCopyOnWrite()
  221. sa.markAllAsNeedingCopyOnWrite()
  222. // sa.needCopyOnWrite is shared
  223. } else {
  224. // make a full copy
  225. sa.keys = make([]uint16, len(ra.keys))
  226. copy(sa.keys, ra.keys)
  227. sa.containers = make([]container, len(ra.containers))
  228. for i := range sa.containers {
  229. sa.containers[i] = ra.containers[i].clone()
  230. }
  231. sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
  232. }
  233. return &sa
  234. }
  235. // clone all containers which have needCopyOnWrite set to true
  236. // This can be used to make sure it is safe to munmap a []byte
  237. // that the roaring array may still have a reference to.
  238. func (ra *roaringArray) cloneCopyOnWriteContainers() {
  239. for i, needCopyOnWrite := range ra.needCopyOnWrite {
  240. if needCopyOnWrite {
  241. ra.containers[i] = ra.containers[i].clone()
  242. ra.needCopyOnWrite[i] = false
  243. }
  244. }
  245. }
  246. // unused function:
  247. //func (ra *roaringArray) containsKey(x uint16) bool {
  248. // return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
  249. //}
  250. func (ra *roaringArray) getContainer(x uint16) container {
  251. i := ra.binarySearch(0, int64(len(ra.keys)), x)
  252. if i < 0 {
  253. return nil
  254. }
  255. return ra.containers[i]
  256. }
  257. func (ra *roaringArray) getContainerAtIndex(i int) container {
  258. return ra.containers[i]
  259. }
  260. func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
  261. c := ra.getContainerAtIndex(i)
  262. switch t := c.(type) {
  263. case *arrayContainer:
  264. c = t.toBitmapContainer()
  265. case *runContainer16:
  266. if !t.isFull() {
  267. c = t.toBitmapContainer()
  268. }
  269. case *bitmapContainer:
  270. if needsWriteable && ra.needCopyOnWrite[i] {
  271. c = ra.containers[i].clone()
  272. }
  273. }
  274. return c
  275. }
  276. func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
  277. if ra.needCopyOnWrite[i] {
  278. ra.containers[i] = ra.containers[i].clone()
  279. ra.needCopyOnWrite[i] = false
  280. }
  281. return ra.containers[i]
  282. }
  283. func (ra *roaringArray) getIndex(x uint16) int {
  284. // before the binary search, we optimize for frequent cases
  285. size := len(ra.keys)
  286. if (size == 0) || (ra.keys[size-1] == x) {
  287. return size - 1
  288. }
  289. return ra.binarySearch(0, int64(size), x)
  290. }
  291. func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
  292. return ra.keys[i]
  293. }
  294. func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
  295. ra.keys = append(ra.keys, 0)
  296. ra.containers = append(ra.containers, nil)
  297. copy(ra.keys[i+1:], ra.keys[i:])
  298. copy(ra.containers[i+1:], ra.containers[i:])
  299. ra.keys[i] = key
  300. ra.containers[i] = value
  301. ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
  302. copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
  303. ra.needCopyOnWrite[i] = false
  304. }
  305. func (ra *roaringArray) remove(key uint16) bool {
  306. i := ra.binarySearch(0, int64(len(ra.keys)), key)
  307. if i >= 0 { // if a new key
  308. ra.removeAtIndex(i)
  309. return true
  310. }
  311. return false
  312. }
  313. func (ra *roaringArray) removeAtIndex(i int) {
  314. copy(ra.keys[i:], ra.keys[i+1:])
  315. copy(ra.containers[i:], ra.containers[i+1:])
  316. copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
  317. ra.resize(len(ra.keys) - 1)
  318. }
  319. func (ra *roaringArray) setContainerAtIndex(i int, c container) {
  320. ra.containers[i] = c
  321. }
  322. func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
  323. ra.keys[i] = key
  324. ra.containers[i] = c
  325. ra.needCopyOnWrite[i] = mustCopyOnWrite
  326. }
  327. func (ra *roaringArray) size() int {
  328. return len(ra.keys)
  329. }
  330. func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
  331. low := begin
  332. high := end - 1
  333. for low+16 <= high {
  334. middleIndex := low + (high-low)/2 // avoid overflow
  335. middleValue := ra.keys[middleIndex]
  336. if middleValue < ikey {
  337. low = middleIndex + 1
  338. } else if middleValue > ikey {
  339. high = middleIndex - 1
  340. } else {
  341. return int(middleIndex)
  342. }
  343. }
  344. for ; low <= high; low++ {
  345. val := ra.keys[low]
  346. if val >= ikey {
  347. if val == ikey {
  348. return int(low)
  349. }
  350. break
  351. }
  352. }
  353. return -int(low + 1)
  354. }
  355. func (ra *roaringArray) equals(o interface{}) bool {
  356. srb, ok := o.(roaringArray)
  357. if ok {
  358. if srb.size() != ra.size() {
  359. return false
  360. }
  361. for i, k := range ra.keys {
  362. if k != srb.keys[i] {
  363. return false
  364. }
  365. }
  366. for i, c := range ra.containers {
  367. if !c.equals(srb.containers[i]) {
  368. return false
  369. }
  370. }
  371. return true
  372. }
  373. return false
  374. }
  375. func (ra *roaringArray) headerSize() uint64 {
  376. size := uint64(len(ra.keys))
  377. if ra.hasRunCompression() {
  378. if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
  379. return 4 + (size+7)/8 + 4*size
  380. }
  381. return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
  382. }
  383. return 4 + 4 + 8*size
  384. }
  385. // should be dirt cheap
  386. func (ra *roaringArray) serializedSizeInBytes() uint64 {
  387. answer := ra.headerSize()
  388. for _, c := range ra.containers {
  389. answer += uint64(c.serializedSizeInBytes())
  390. }
  391. return answer
  392. }
  393. //
  394. // spec: https://github.com/RoaringBitmap/RoaringFormatSpec
  395. //
  396. func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
  397. hasRun := ra.hasRunCompression()
  398. isRunSizeInBytes := 0
  399. cookieSize := 8
  400. if hasRun {
  401. cookieSize = 4
  402. isRunSizeInBytes = (len(ra.keys) + 7) / 8
  403. }
  404. descriptiveHeaderSize := 4 * len(ra.keys)
  405. preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
  406. buf := make([]byte, preambleSize+4*len(ra.keys))
  407. nw := 0
  408. if hasRun {
  409. binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
  410. nw += 2
  411. binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
  412. nw += 2
  413. // compute isRun bitmap
  414. var ir []byte
  415. isRun := newBitmapContainer()
  416. for i, c := range ra.containers {
  417. switch c.(type) {
  418. case *runContainer16:
  419. isRun.iadd(uint16(i))
  420. }
  421. }
  422. // convert to little endian
  423. ir = isRun.asLittleEndianByteSlice()[:isRunSizeInBytes]
  424. nw += copy(buf[nw:], ir)
  425. } else {
  426. binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
  427. nw += 4
  428. binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
  429. nw += 4
  430. }
  431. // descriptive header
  432. for i, key := range ra.keys {
  433. binary.LittleEndian.PutUint16(buf[nw:], key)
  434. nw += 2
  435. c := ra.containers[i]
  436. binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
  437. nw += 2
  438. }
  439. startOffset := int64(preambleSize + 4*len(ra.keys))
  440. if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
  441. // offset header
  442. for _, c := range ra.containers {
  443. binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
  444. nw += 4
  445. switch rc := c.(type) {
  446. case *runContainer16:
  447. startOffset += 2 + int64(len(rc.iv))*4
  448. default:
  449. startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
  450. }
  451. }
  452. }
  453. written, err := w.Write(buf[:nw])
  454. if err != nil {
  455. return n, err
  456. }
  457. n += int64(written)
  458. for _, c := range ra.containers {
  459. written, err := c.writeTo(w)
  460. if err != nil {
  461. return n, err
  462. }
  463. n += int64(written)
  464. }
  465. return n, nil
  466. }
  467. //
  468. // spec: https://github.com/RoaringBitmap/RoaringFormatSpec
  469. //
  470. func (ra *roaringArray) toBytes() ([]byte, error) {
  471. var buf bytes.Buffer
  472. _, err := ra.writeTo(&buf)
  473. return buf.Bytes(), err
  474. }
  475. func (ra *roaringArray) readFrom(stream byteInput) (int64, error) {
  476. cookie, err := stream.readUInt32()
  477. if err != nil {
  478. return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
  479. }
  480. var size uint32
  481. var isRunBitmap []byte
  482. if cookie&0x0000FFFF == serialCookie {
  483. size = uint32(uint16(cookie>>16) + 1)
  484. // create is-run-container bitmap
  485. isRunBitmapSize := (int(size) + 7) / 8
  486. isRunBitmap, err = stream.next(isRunBitmapSize)
  487. if err != nil {
  488. return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
  489. }
  490. } else if cookie == serialCookieNoRunContainer {
  491. size, err = stream.readUInt32()
  492. if err != nil {
  493. return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
  494. }
  495. } else {
  496. return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
  497. }
  498. if size > (1 << 16) {
  499. return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
  500. }
  501. // descriptive header
  502. buf, err := stream.next(2 * 2 * int(size))
  503. if err != nil {
  504. return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
  505. }
  506. keycard := byteSliceAsUint16Slice(buf)
  507. if isRunBitmap == nil || size >= noOffsetThreshold {
  508. if err := stream.skipBytes(int(size) * 4); err != nil {
  509. return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
  510. }
  511. }
  512. // Allocate slices upfront as number of containers is known
  513. if cap(ra.containers) >= int(size) {
  514. ra.containers = ra.containers[:size]
  515. } else {
  516. ra.containers = make([]container, size)
  517. }
  518. if cap(ra.keys) >= int(size) {
  519. ra.keys = ra.keys[:size]
  520. } else {
  521. ra.keys = make([]uint16, size)
  522. }
  523. if cap(ra.needCopyOnWrite) >= int(size) {
  524. ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
  525. } else {
  526. ra.needCopyOnWrite = make([]bool, size)
  527. }
  528. for i := uint32(0); i < size; i++ {
  529. key := keycard[2*i]
  530. card := int(keycard[2*i+1]) + 1
  531. ra.keys[i] = key
  532. ra.needCopyOnWrite[i] = true
  533. if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
  534. // run container
  535. nr, err := stream.readUInt16()
  536. if err != nil {
  537. return 0, fmt.Errorf("failed to read runtime container size: %s", err)
  538. }
  539. buf, err := stream.next(int(nr) * 4)
  540. if err != nil {
  541. return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
  542. }
  543. nb := runContainer16{
  544. iv: byteSliceAsInterval16Slice(buf),
  545. card: int64(card),
  546. }
  547. ra.containers[i] = &nb
  548. } else if card > arrayDefaultMaxSize {
  549. // bitmap container
  550. buf, err := stream.next(arrayDefaultMaxSize * 2)
  551. if err != nil {
  552. return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
  553. }
  554. nb := bitmapContainer{
  555. cardinality: card,
  556. bitmap: byteSliceAsUint64Slice(buf),
  557. }
  558. ra.containers[i] = &nb
  559. } else {
  560. // array container
  561. buf, err := stream.next(card * 2)
  562. if err != nil {
  563. return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err)
  564. }
  565. nb := arrayContainer{
  566. byteSliceAsUint16Slice(buf),
  567. }
  568. ra.containers[i] = &nb
  569. }
  570. }
  571. return stream.getReadBytes(), nil
  572. }
  573. func (ra *roaringArray) hasRunCompression() bool {
  574. for _, c := range ra.containers {
  575. switch c.(type) {
  576. case *runContainer16:
  577. return true
  578. }
  579. }
  580. return false
  581. }
  582. func (ra *roaringArray) writeToMsgpack(stream io.Writer) error {
  583. ra.conserz = make([]containerSerz, len(ra.containers))
  584. for i, v := range ra.containers {
  585. switch cn := v.(type) {
  586. case *bitmapContainer:
  587. bts, err := cn.MarshalMsg(nil)
  588. if err != nil {
  589. return err
  590. }
  591. ra.conserz[i].t = bitmapContype
  592. ra.conserz[i].r = bts
  593. case *arrayContainer:
  594. bts, err := cn.MarshalMsg(nil)
  595. if err != nil {
  596. return err
  597. }
  598. ra.conserz[i].t = arrayContype
  599. ra.conserz[i].r = bts
  600. case *runContainer16:
  601. bts, err := cn.MarshalMsg(nil)
  602. if err != nil {
  603. return err
  604. }
  605. ra.conserz[i].t = run16Contype
  606. ra.conserz[i].r = bts
  607. default:
  608. panic(fmt.Errorf("Unrecognized container implementation: %T", cn))
  609. }
  610. }
  611. w := snappy.NewWriter(stream)
  612. err := msgp.Encode(w, ra)
  613. ra.conserz = nil
  614. return err
  615. }
  616. func (ra *roaringArray) readFromMsgpack(stream io.Reader) error {
  617. r := snappy.NewReader(stream)
  618. err := msgp.Decode(r, ra)
  619. if err != nil {
  620. return err
  621. }
  622. if len(ra.containers) != len(ra.keys) {
  623. ra.containers = make([]container, len(ra.keys))
  624. }
  625. for i, v := range ra.conserz {
  626. switch v.t {
  627. case bitmapContype:
  628. c := &bitmapContainer{}
  629. _, err = c.UnmarshalMsg(v.r)
  630. if err != nil {
  631. return err
  632. }
  633. ra.containers[i] = c
  634. case arrayContype:
  635. c := &arrayContainer{}
  636. _, err = c.UnmarshalMsg(v.r)
  637. if err != nil {
  638. return err
  639. }
  640. ra.containers[i] = c
  641. case run16Contype:
  642. c := &runContainer16{}
  643. _, err = c.UnmarshalMsg(v.r)
  644. if err != nil {
  645. return err
  646. }
  647. ra.containers[i] = c
  648. default:
  649. return fmt.Errorf("unrecognized contype serialization code: '%v'", v.t)
  650. }
  651. }
  652. ra.conserz = nil
  653. return nil
  654. }
  655. func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
  656. lower := pos + 1
  657. if lower >= len(ra.keys) || ra.keys[lower] >= min {
  658. return lower
  659. }
  660. spansize := 1
  661. for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
  662. spansize *= 2
  663. }
  664. var upper int
  665. if lower+spansize < len(ra.keys) {
  666. upper = lower + spansize
  667. } else {
  668. upper = len(ra.keys) - 1
  669. }
  670. if ra.keys[upper] == min {
  671. return upper
  672. }
  673. if ra.keys[upper] < min {
  674. // means
  675. // array
  676. // has no
  677. // item
  678. // >= min
  679. // pos = array.length;
  680. return len(ra.keys)
  681. }
  682. // we know that the next-smallest span was too small
  683. lower += (spansize >> 1)
  684. mid := 0
  685. for lower+1 != upper {
  686. mid = (lower + upper) >> 1
  687. if ra.keys[mid] == min {
  688. return mid
  689. } else if ra.keys[mid] < min {
  690. lower = mid
  691. } else {
  692. upper = mid
  693. }
  694. }
  695. return upper
  696. }
  697. func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
  698. for i := range ra.needCopyOnWrite {
  699. ra.needCopyOnWrite[i] = true
  700. }
  701. }
  702. func (ra *roaringArray) needsCopyOnWrite(i int) bool {
  703. return ra.needCopyOnWrite[i]
  704. }
  705. func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
  706. ra.needCopyOnWrite[i] = true
  707. }