You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

roaringarray.go 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834
  1. package roaring
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. snappy "github.com/glycerine/go-unsnap-stream"
  8. "github.com/tinylib/msgp/msgp"
  9. )
  10. //go:generate msgp -unexported
  11. type container interface {
  12. addOffset(uint16) []container
  13. clone() container
  14. and(container) container
  15. andCardinality(container) int
  16. iand(container) container // i stands for inplace
  17. andNot(container) container
  18. iandNot(container) container // i stands for inplace
  19. getCardinality() int
  20. // rank returns the number of integers that are
  21. // smaller or equal to x. rank(infinity) would be getCardinality().
  22. rank(uint16) int
  23. iadd(x uint16) bool // inplace, returns true if x was new.
  24. iaddReturnMinimized(uint16) container // may change return type to minimize storage.
  25. //addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
  26. iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
  27. iremove(x uint16) bool // inplace, returns true if x was present.
  28. iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
  29. not(start, final int) container // range is [firstOfRange,lastOfRange)
  30. inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
  31. xor(r container) container
  32. getShortIterator() shortPeekable
  33. iterate(cb func(x uint16) bool) bool
  34. getReverseIterator() shortIterable
  35. getManyIterator() manyIterable
  36. contains(i uint16) bool
  37. maximum() uint16
  38. minimum() uint16
  39. // equals is now logical equals; it does not require the
  40. // same underlying container types, but compares across
  41. // any of the implementations.
  42. equals(r container) bool
  43. fillLeastSignificant16bits(array []uint32, i int, mask uint32)
  44. or(r container) container
  45. orCardinality(r container) int
  46. isFull() bool
  47. ior(r container) container // i stands for inplace
  48. intersects(r container) bool // whether the two containers intersect
  49. lazyOR(r container) container
  50. lazyIOR(r container) container
  51. getSizeInBytes() int
  52. //removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
  53. iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
  54. selectInt(x uint16) int // selectInt returns the xth integer in the container
  55. serializedSizeInBytes() int
  56. writeTo(io.Writer) (int, error)
  57. numberOfRuns() int
  58. toEfficientContainer() container
  59. String() string
  60. containerType() contype
  61. }
  62. type contype uint8
  63. const (
  64. bitmapContype contype = iota
  65. arrayContype
  66. run16Contype
  67. run32Contype
  68. )
  69. // careful: range is [firstOfRange,lastOfRange]
  70. func rangeOfOnes(start, last int) container {
  71. if start > MaxUint16 {
  72. panic("rangeOfOnes called with start > MaxUint16")
  73. }
  74. if last > MaxUint16 {
  75. panic("rangeOfOnes called with last > MaxUint16")
  76. }
  77. if start < 0 {
  78. panic("rangeOfOnes called with start < 0")
  79. }
  80. if last < 0 {
  81. panic("rangeOfOnes called with last < 0")
  82. }
  83. return newRunContainer16Range(uint16(start), uint16(last))
  84. }
  85. type roaringArray struct {
  86. keys []uint16
  87. containers []container `msg:"-"` // don't try to serialize directly.
  88. needCopyOnWrite []bool
  89. copyOnWrite bool
  90. // conserz is used at serialization time
  91. // to serialize containers. Otherwise empty.
  92. conserz []containerSerz
  93. }
  94. // containerSerz facilitates serializing container (tricky to
  95. // serialize because it is an interface) by providing a
  96. // light wrapper with a type identifier.
  97. type containerSerz struct {
  98. t contype `msg:"t"` // type
  99. r msgp.Raw `msg:"r"` // Raw msgpack of the actual container type
  100. }
  101. func newRoaringArray() *roaringArray {
  102. return &roaringArray{}
  103. }
  104. // runOptimize compresses the element containers to minimize space consumed.
  105. // Q: how does this interact with copyOnWrite and needCopyOnWrite?
  106. // A: since we aren't changing the logical content, just the representation,
  107. // we don't bother to check the needCopyOnWrite bits. We replace
  108. // (possibly all) elements of ra.containers in-place with space
  109. // optimized versions.
  110. func (ra *roaringArray) runOptimize() {
  111. for i := range ra.containers {
  112. ra.containers[i] = ra.containers[i].toEfficientContainer()
  113. }
  114. }
  115. func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
  116. ra.keys = append(ra.keys, key)
  117. ra.containers = append(ra.containers, value)
  118. ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
  119. }
  120. func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
  121. mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
  122. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
  123. }
  124. func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
  125. // cow only if the two request it, or if we already have a lightweight copy
  126. copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
  127. if !copyonwrite {
  128. // since there is no copy-on-write, we need to clone the container (this is important)
  129. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
  130. } else {
  131. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
  132. if !sa.needsCopyOnWrite(startingindex) {
  133. sa.setNeedsCopyOnWrite(startingindex)
  134. }
  135. }
  136. }
  137. func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
  138. for i := startingindex; i < end; i++ {
  139. ra.appendWithoutCopy(sa, i)
  140. }
  141. }
  142. func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
  143. for i := startingindex; i < end; i++ {
  144. ra.appendCopy(sa, i)
  145. }
  146. }
  147. func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
  148. // cow only if the two request it, or if we already have a lightweight copy
  149. copyonwrite := ra.copyOnWrite && sa.copyOnWrite
  150. for i := 0; i < sa.size(); i++ {
  151. if sa.keys[i] >= stoppingKey {
  152. break
  153. }
  154. thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
  155. if thiscopyonewrite {
  156. ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
  157. if !sa.needsCopyOnWrite(i) {
  158. sa.setNeedsCopyOnWrite(i)
  159. }
  160. } else {
  161. // since there is no copy-on-write, we need to clone the container (this is important)
  162. ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
  163. }
  164. }
  165. }
  166. func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
  167. // cow only if the two request it, or if we already have a lightweight copy
  168. copyonwrite := ra.copyOnWrite && sa.copyOnWrite
  169. startLocation := sa.getIndex(beforeStart)
  170. if startLocation >= 0 {
  171. startLocation++
  172. } else {
  173. startLocation = -startLocation - 1
  174. }
  175. for i := startLocation; i < sa.size(); i++ {
  176. thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
  177. if thiscopyonewrite {
  178. ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
  179. if !sa.needsCopyOnWrite(i) {
  180. sa.setNeedsCopyOnWrite(i)
  181. }
  182. } else {
  183. // since there is no copy-on-write, we need to clone the container (this is important)
  184. ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
  185. }
  186. }
  187. }
  188. func (ra *roaringArray) removeIndexRange(begin, end int) {
  189. if end <= begin {
  190. return
  191. }
  192. r := end - begin
  193. copy(ra.keys[begin:], ra.keys[end:])
  194. copy(ra.containers[begin:], ra.containers[end:])
  195. copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
  196. ra.resize(len(ra.keys) - r)
  197. }
  198. func (ra *roaringArray) resize(newsize int) {
  199. for k := newsize; k < len(ra.containers); k++ {
  200. ra.containers[k] = nil
  201. }
  202. ra.keys = ra.keys[:newsize]
  203. ra.containers = ra.containers[:newsize]
  204. ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
  205. }
  206. func (ra *roaringArray) clear() {
  207. ra.resize(0)
  208. ra.copyOnWrite = false
  209. ra.conserz = nil
  210. }
  211. func (ra *roaringArray) clone() *roaringArray {
  212. sa := roaringArray{}
  213. sa.copyOnWrite = ra.copyOnWrite
  214. // this is where copyOnWrite is used.
  215. if ra.copyOnWrite {
  216. sa.keys = make([]uint16, len(ra.keys))
  217. copy(sa.keys, ra.keys)
  218. sa.containers = make([]container, len(ra.containers))
  219. copy(sa.containers, ra.containers)
  220. sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
  221. ra.markAllAsNeedingCopyOnWrite()
  222. sa.markAllAsNeedingCopyOnWrite()
  223. // sa.needCopyOnWrite is shared
  224. } else {
  225. // make a full copy
  226. sa.keys = make([]uint16, len(ra.keys))
  227. copy(sa.keys, ra.keys)
  228. sa.containers = make([]container, len(ra.containers))
  229. for i := range sa.containers {
  230. sa.containers[i] = ra.containers[i].clone()
  231. }
  232. sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
  233. }
  234. return &sa
  235. }
  236. // clone all containers which have needCopyOnWrite set to true
  237. // This can be used to make sure it is safe to munmap a []byte
  238. // that the roaring array may still have a reference to.
  239. func (ra *roaringArray) cloneCopyOnWriteContainers() {
  240. for i, needCopyOnWrite := range ra.needCopyOnWrite {
  241. if needCopyOnWrite {
  242. ra.containers[i] = ra.containers[i].clone()
  243. ra.needCopyOnWrite[i] = false
  244. }
  245. }
  246. }
  247. // unused function:
  248. //func (ra *roaringArray) containsKey(x uint16) bool {
  249. // return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
  250. //}
  251. func (ra *roaringArray) getContainer(x uint16) container {
  252. i := ra.binarySearch(0, int64(len(ra.keys)), x)
  253. if i < 0 {
  254. return nil
  255. }
  256. return ra.containers[i]
  257. }
  258. func (ra *roaringArray) getContainerAtIndex(i int) container {
  259. return ra.containers[i]
  260. }
  261. func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
  262. c := ra.getContainerAtIndex(i)
  263. switch t := c.(type) {
  264. case *arrayContainer:
  265. c = t.toBitmapContainer()
  266. case *runContainer16:
  267. if !t.isFull() {
  268. c = t.toBitmapContainer()
  269. }
  270. case *bitmapContainer:
  271. if needsWriteable && ra.needCopyOnWrite[i] {
  272. c = ra.containers[i].clone()
  273. }
  274. }
  275. return c
  276. }
  277. func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
  278. if ra.needCopyOnWrite[i] {
  279. ra.containers[i] = ra.containers[i].clone()
  280. ra.needCopyOnWrite[i] = false
  281. }
  282. return ra.containers[i]
  283. }
  284. func (ra *roaringArray) getIndex(x uint16) int {
  285. // before the binary search, we optimize for frequent cases
  286. size := len(ra.keys)
  287. if (size == 0) || (ra.keys[size-1] == x) {
  288. return size - 1
  289. }
  290. return ra.binarySearch(0, int64(size), x)
  291. }
  292. func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
  293. return ra.keys[i]
  294. }
  295. func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
  296. ra.keys = append(ra.keys, 0)
  297. ra.containers = append(ra.containers, nil)
  298. copy(ra.keys[i+1:], ra.keys[i:])
  299. copy(ra.containers[i+1:], ra.containers[i:])
  300. ra.keys[i] = key
  301. ra.containers[i] = value
  302. ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
  303. copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
  304. ra.needCopyOnWrite[i] = false
  305. }
  306. func (ra *roaringArray) remove(key uint16) bool {
  307. i := ra.binarySearch(0, int64(len(ra.keys)), key)
  308. if i >= 0 { // if a new key
  309. ra.removeAtIndex(i)
  310. return true
  311. }
  312. return false
  313. }
  314. func (ra *roaringArray) removeAtIndex(i int) {
  315. copy(ra.keys[i:], ra.keys[i+1:])
  316. copy(ra.containers[i:], ra.containers[i+1:])
  317. copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
  318. ra.resize(len(ra.keys) - 1)
  319. }
  320. func (ra *roaringArray) setContainerAtIndex(i int, c container) {
  321. ra.containers[i] = c
  322. }
  323. func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
  324. ra.keys[i] = key
  325. ra.containers[i] = c
  326. ra.needCopyOnWrite[i] = mustCopyOnWrite
  327. }
  328. func (ra *roaringArray) size() int {
  329. return len(ra.keys)
  330. }
  331. func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
  332. low := begin
  333. high := end - 1
  334. for low+16 <= high {
  335. middleIndex := low + (high-low)/2 // avoid overflow
  336. middleValue := ra.keys[middleIndex]
  337. if middleValue < ikey {
  338. low = middleIndex + 1
  339. } else if middleValue > ikey {
  340. high = middleIndex - 1
  341. } else {
  342. return int(middleIndex)
  343. }
  344. }
  345. for ; low <= high; low++ {
  346. val := ra.keys[low]
  347. if val >= ikey {
  348. if val == ikey {
  349. return int(low)
  350. }
  351. break
  352. }
  353. }
  354. return -int(low + 1)
  355. }
  356. func (ra *roaringArray) equals(o interface{}) bool {
  357. srb, ok := o.(roaringArray)
  358. if ok {
  359. if srb.size() != ra.size() {
  360. return false
  361. }
  362. for i, k := range ra.keys {
  363. if k != srb.keys[i] {
  364. return false
  365. }
  366. }
  367. for i, c := range ra.containers {
  368. if !c.equals(srb.containers[i]) {
  369. return false
  370. }
  371. }
  372. return true
  373. }
  374. return false
  375. }
  376. func (ra *roaringArray) headerSize() uint64 {
  377. size := uint64(len(ra.keys))
  378. if ra.hasRunCompression() {
  379. if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
  380. return 4 + (size+7)/8 + 4*size
  381. }
  382. return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
  383. }
  384. return 4 + 4 + 8*size
  385. }
  386. // should be dirt cheap
  387. func (ra *roaringArray) serializedSizeInBytes() uint64 {
  388. answer := ra.headerSize()
  389. for _, c := range ra.containers {
  390. answer += uint64(c.serializedSizeInBytes())
  391. }
  392. return answer
  393. }
  394. //
  395. // spec: https://github.com/RoaringBitmap/RoaringFormatSpec
  396. //
  397. func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
  398. hasRun := ra.hasRunCompression()
  399. isRunSizeInBytes := 0
  400. cookieSize := 8
  401. if hasRun {
  402. cookieSize = 4
  403. isRunSizeInBytes = (len(ra.keys) + 7) / 8
  404. }
  405. descriptiveHeaderSize := 4 * len(ra.keys)
  406. preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
  407. buf := make([]byte, preambleSize+4*len(ra.keys))
  408. nw := 0
  409. if hasRun {
  410. binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
  411. nw += 2
  412. binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
  413. nw += 2
  414. // compute isRun bitmap without temporary allocation
  415. var runbitmapslice = buf[nw:nw+isRunSizeInBytes]
  416. for i, c := range ra.containers {
  417. switch c.(type) {
  418. case *runContainer16:
  419. runbitmapslice[i / 8] |= 1<<(uint(i)%8)
  420. }
  421. }
  422. nw += isRunSizeInBytes
  423. } else {
  424. binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
  425. nw += 4
  426. binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
  427. nw += 4
  428. }
  429. // descriptive header
  430. for i, key := range ra.keys {
  431. binary.LittleEndian.PutUint16(buf[nw:], key)
  432. nw += 2
  433. c := ra.containers[i]
  434. binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
  435. nw += 2
  436. }
  437. startOffset := int64(preambleSize + 4*len(ra.keys))
  438. if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
  439. // offset header
  440. for _, c := range ra.containers {
  441. binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
  442. nw += 4
  443. switch rc := c.(type) {
  444. case *runContainer16:
  445. startOffset += 2 + int64(len(rc.iv))*4
  446. default:
  447. startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
  448. }
  449. }
  450. }
  451. written, err := w.Write(buf[:nw])
  452. if err != nil {
  453. return n, err
  454. }
  455. n += int64(written)
  456. for _, c := range ra.containers {
  457. written, err := c.writeTo(w)
  458. if err != nil {
  459. return n, err
  460. }
  461. n += int64(written)
  462. }
  463. return n, nil
  464. }
  465. //
  466. // spec: https://github.com/RoaringBitmap/RoaringFormatSpec
  467. //
  468. func (ra *roaringArray) toBytes() ([]byte, error) {
  469. var buf bytes.Buffer
  470. _, err := ra.writeTo(&buf)
  471. return buf.Bytes(), err
  472. }
  473. func (ra *roaringArray) readFrom(stream byteInput) (int64, error) {
  474. cookie, err := stream.readUInt32()
  475. if err != nil {
  476. return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
  477. }
  478. var size uint32
  479. var isRunBitmap []byte
  480. if cookie&0x0000FFFF == serialCookie {
  481. size = uint32(uint16(cookie>>16) + 1)
  482. // create is-run-container bitmap
  483. isRunBitmapSize := (int(size) + 7) / 8
  484. isRunBitmap, err = stream.next(isRunBitmapSize)
  485. if err != nil {
  486. return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
  487. }
  488. } else if cookie == serialCookieNoRunContainer {
  489. size, err = stream.readUInt32()
  490. if err != nil {
  491. return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
  492. }
  493. } else {
  494. return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
  495. }
  496. if size > (1 << 16) {
  497. return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
  498. }
  499. // descriptive header
  500. buf, err := stream.next(2 * 2 * int(size))
  501. if err != nil {
  502. return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
  503. }
  504. keycard := byteSliceAsUint16Slice(buf)
  505. if isRunBitmap == nil || size >= noOffsetThreshold {
  506. if err := stream.skipBytes(int(size) * 4); err != nil {
  507. return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
  508. }
  509. }
  510. // Allocate slices upfront as number of containers is known
  511. if cap(ra.containers) >= int(size) {
  512. ra.containers = ra.containers[:size]
  513. } else {
  514. ra.containers = make([]container, size)
  515. }
  516. if cap(ra.keys) >= int(size) {
  517. ra.keys = ra.keys[:size]
  518. } else {
  519. ra.keys = make([]uint16, size)
  520. }
  521. if cap(ra.needCopyOnWrite) >= int(size) {
  522. ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
  523. } else {
  524. ra.needCopyOnWrite = make([]bool, size)
  525. }
  526. for i := uint32(0); i < size; i++ {
  527. key := keycard[2*i]
  528. card := int(keycard[2*i+1]) + 1
  529. ra.keys[i] = key
  530. ra.needCopyOnWrite[i] = true
  531. if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
  532. // run container
  533. nr, err := stream.readUInt16()
  534. if err != nil {
  535. return 0, fmt.Errorf("failed to read runtime container size: %s", err)
  536. }
  537. buf, err := stream.next(int(nr) * 4)
  538. if err != nil {
  539. return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
  540. }
  541. nb := runContainer16{
  542. iv: byteSliceAsInterval16Slice(buf),
  543. card: int64(card),
  544. }
  545. ra.containers[i] = &nb
  546. } else if card > arrayDefaultMaxSize {
  547. // bitmap container
  548. buf, err := stream.next(arrayDefaultMaxSize * 2)
  549. if err != nil {
  550. return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
  551. }
  552. nb := bitmapContainer{
  553. cardinality: card,
  554. bitmap: byteSliceAsUint64Slice(buf),
  555. }
  556. ra.containers[i] = &nb
  557. } else {
  558. // array container
  559. buf, err := stream.next(card * 2)
  560. if err != nil {
  561. return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err)
  562. }
  563. nb := arrayContainer{
  564. byteSliceAsUint16Slice(buf),
  565. }
  566. ra.containers[i] = &nb
  567. }
  568. }
  569. return stream.getReadBytes(), nil
  570. }
  571. func (ra *roaringArray) hasRunCompression() bool {
  572. for _, c := range ra.containers {
  573. switch c.(type) {
  574. case *runContainer16:
  575. return true
  576. }
  577. }
  578. return false
  579. }
  580. func (ra *roaringArray) writeToMsgpack(stream io.Writer) error {
  581. ra.conserz = make([]containerSerz, len(ra.containers))
  582. for i, v := range ra.containers {
  583. switch cn := v.(type) {
  584. case *bitmapContainer:
  585. bts, err := cn.MarshalMsg(nil)
  586. if err != nil {
  587. return err
  588. }
  589. ra.conserz[i].t = bitmapContype
  590. ra.conserz[i].r = bts
  591. case *arrayContainer:
  592. bts, err := cn.MarshalMsg(nil)
  593. if err != nil {
  594. return err
  595. }
  596. ra.conserz[i].t = arrayContype
  597. ra.conserz[i].r = bts
  598. case *runContainer16:
  599. bts, err := cn.MarshalMsg(nil)
  600. if err != nil {
  601. return err
  602. }
  603. ra.conserz[i].t = run16Contype
  604. ra.conserz[i].r = bts
  605. default:
  606. panic(fmt.Errorf("Unrecognized container implementation: %T", cn))
  607. }
  608. }
  609. w := snappy.NewWriter(stream)
  610. err := msgp.Encode(w, ra)
  611. ra.conserz = nil
  612. return err
  613. }
  614. func (ra *roaringArray) readFromMsgpack(stream io.Reader) error {
  615. r := snappy.NewReader(stream)
  616. err := msgp.Decode(r, ra)
  617. if err != nil {
  618. return err
  619. }
  620. if len(ra.containers) != len(ra.keys) {
  621. ra.containers = make([]container, len(ra.keys))
  622. }
  623. for i, v := range ra.conserz {
  624. switch v.t {
  625. case bitmapContype:
  626. c := &bitmapContainer{}
  627. _, err = c.UnmarshalMsg(v.r)
  628. if err != nil {
  629. return err
  630. }
  631. ra.containers[i] = c
  632. case arrayContype:
  633. c := &arrayContainer{}
  634. _, err = c.UnmarshalMsg(v.r)
  635. if err != nil {
  636. return err
  637. }
  638. ra.containers[i] = c
  639. case run16Contype:
  640. c := &runContainer16{}
  641. _, err = c.UnmarshalMsg(v.r)
  642. if err != nil {
  643. return err
  644. }
  645. ra.containers[i] = c
  646. default:
  647. return fmt.Errorf("unrecognized contype serialization code: '%v'", v.t)
  648. }
  649. }
  650. ra.conserz = nil
  651. return nil
  652. }
  653. func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
  654. lower := pos + 1
  655. if lower >= len(ra.keys) || ra.keys[lower] >= min {
  656. return lower
  657. }
  658. spansize := 1
  659. for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
  660. spansize *= 2
  661. }
  662. var upper int
  663. if lower+spansize < len(ra.keys) {
  664. upper = lower + spansize
  665. } else {
  666. upper = len(ra.keys) - 1
  667. }
  668. if ra.keys[upper] == min {
  669. return upper
  670. }
  671. if ra.keys[upper] < min {
  672. // means
  673. // array
  674. // has no
  675. // item
  676. // >= min
  677. // pos = array.length;
  678. return len(ra.keys)
  679. }
  680. // we know that the next-smallest span was too small
  681. lower += (spansize >> 1)
  682. mid := 0
  683. for lower+1 != upper {
  684. mid = (lower + upper) >> 1
  685. if ra.keys[mid] == min {
  686. return mid
  687. } else if ra.keys[mid] < min {
  688. lower = mid
  689. } else {
  690. upper = mid
  691. }
  692. }
  693. return upper
  694. }
  695. func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
  696. for i := range ra.needCopyOnWrite {
  697. ra.needCopyOnWrite[i] = true
  698. }
  699. }
  700. func (ra *roaringArray) needsCopyOnWrite(i int) bool {
  701. return ra.needCopyOnWrite[i]
  702. }
  703. func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
  704. ra.needCopyOnWrite[i] = true
  705. }