You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

roaring.go 46KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557
  1. // Package roaring is an implementation of Roaring Bitmaps in Go.
  2. // They provide fast compressed bitmap data structures (also called bitset).
  3. // They are ideally suited to represent sets of integers over
  4. // relatively small ranges.
  5. // See http://roaringbitmap.org for details.
  6. package roaring
  7. import (
  8. "bytes"
  9. "encoding/base64"
  10. "fmt"
  11. "io"
  12. "strconv"
  13. "sync"
  14. )
  15. // Bitmap represents a compressed bitmap where you can add integers.
  16. type Bitmap struct {
  17. highlowcontainer roaringArray
  18. }
  19. // ToBase64 serializes a bitmap as Base64
  20. func (rb *Bitmap) ToBase64() (string, error) {
  21. buf := new(bytes.Buffer)
  22. _, err := rb.WriteTo(buf)
  23. return base64.StdEncoding.EncodeToString(buf.Bytes()), err
  24. }
  25. // FromBase64 deserializes a bitmap from Base64
  26. func (rb *Bitmap) FromBase64(str string) (int64, error) {
  27. data, err := base64.StdEncoding.DecodeString(str)
  28. if err != nil {
  29. return 0, err
  30. }
  31. buf := bytes.NewBuffer(data)
  32. return rb.ReadFrom(buf)
  33. }
  34. // WriteTo writes a serialized version of this bitmap to stream.
  35. // The format is compatible with other RoaringBitmap
  36. // implementations (Java, C) and is documented here:
  37. // https://github.com/RoaringBitmap/RoaringFormatSpec
  38. func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
  39. return rb.highlowcontainer.writeTo(stream)
  40. }
  41. // ToBytes returns an array of bytes corresponding to what is written
  42. // when calling WriteTo
  43. func (rb *Bitmap) ToBytes() ([]byte, error) {
  44. return rb.highlowcontainer.toBytes()
  45. }
  46. // Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized
  47. // version of this bitmap to stream. The format is not
  48. // compatible with the WriteTo() format, and is
  49. // experimental: it may produce smaller on disk
  50. // footprint and/or be faster to read, depending
  51. // on your content. Currently only the Go roaring
  52. // implementation supports this format.
  53. func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) {
  54. return 0, rb.highlowcontainer.writeToMsgpack(stream)
  55. }
  56. // ReadFrom reads a serialized version of this bitmap from stream.
  57. // The format is compatible with other RoaringBitmap
  58. // implementations (Java, C) and is documented here:
  59. // https://github.com/RoaringBitmap/RoaringFormatSpec
  60. func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
  61. stream := byteInputAdapterPool.Get().(*byteInputAdapter)
  62. stream.reset(reader)
  63. p, err = rb.highlowcontainer.readFrom(stream)
  64. byteInputAdapterPool.Put(stream)
  65. return
  66. }
  67. // FromBuffer creates a bitmap from its serialized version stored in buffer
  68. //
  69. // The format specification is available here:
  70. // https://github.com/RoaringBitmap/RoaringFormatSpec
  71. //
  72. // The provided byte array (buf) is expected to be a constant.
  73. // The function makes the best effort attempt not to copy data.
  74. // You should take care not to modify buff as it will
  75. // likely result in unexpected program behavior.
  76. //
  77. // Resulting bitmaps are effectively immutable in the following sense:
  78. // a copy-on-write marker is used so that when you modify the resulting
  79. // bitmap, copies of selected data (containers) are made.
  80. // You should *not* change the copy-on-write status of the resulting
  81. // bitmaps (SetCopyOnWrite).
  82. //
  83. // If buf becomes unavailable, then a bitmap created with
  84. // FromBuffer would be effectively broken. Furthermore, any
  85. // bitmap derived from this bitmap (e.g., via Or, And) might
  86. // also be broken. Thus, before making buf unavailable, you should
  87. // call CloneCopyOnWriteContainers on all such bitmaps.
  88. //
  89. func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
  90. stream := byteBufferPool.Get().(*byteBuffer)
  91. stream.reset(buf)
  92. p, err = rb.highlowcontainer.readFrom(stream)
  93. byteBufferPool.Put(stream)
  94. return
  95. }
  96. var (
  97. byteBufferPool = sync.Pool{
  98. New: func() interface{} {
  99. return &byteBuffer{}
  100. },
  101. }
  102. byteInputAdapterPool = sync.Pool{
  103. New: func() interface{} {
  104. return &byteInputAdapter{}
  105. },
  106. }
  107. )
  108. // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
  109. func (rb *Bitmap) RunOptimize() {
  110. rb.highlowcontainer.runOptimize()
  111. }
  112. // HasRunCompression returns true if the bitmap benefits from run compression
  113. func (rb *Bitmap) HasRunCompression() bool {
  114. return rb.highlowcontainer.hasRunCompression()
  115. }
  116. // Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized
  117. // version of this bitmap from stream. The format is
  118. // expected is that written by the WriteToMsgpack()
  119. // call; see additional notes there.
  120. func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) {
  121. return 0, rb.highlowcontainer.readFromMsgpack(stream)
  122. }
  123. // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
  124. // (same as ToBytes)
  125. func (rb *Bitmap) MarshalBinary() ([]byte, error) {
  126. return rb.ToBytes()
  127. }
  128. // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap
  129. func (rb *Bitmap) UnmarshalBinary(data []byte) error {
  130. r := bytes.NewReader(data)
  131. _, err := rb.ReadFrom(r)
  132. return err
  133. }
  134. // NewBitmap creates a new empty Bitmap (see also New)
  135. func NewBitmap() *Bitmap {
  136. return &Bitmap{}
  137. }
  138. // New creates a new empty Bitmap (same as NewBitmap)
  139. func New() *Bitmap {
  140. return &Bitmap{}
  141. }
  142. // Clear resets the Bitmap to be logically empty, but may retain
  143. // some memory allocations that may speed up future operations
  144. func (rb *Bitmap) Clear() {
  145. rb.highlowcontainer.clear()
  146. }
  147. // ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order
  148. func (rb *Bitmap) ToArray() []uint32 {
  149. array := make([]uint32, rb.GetCardinality())
  150. pos := 0
  151. pos2 := 0
  152. for pos < rb.highlowcontainer.size() {
  153. hs := uint32(rb.highlowcontainer.getKeyAtIndex(pos)) << 16
  154. c := rb.highlowcontainer.getContainerAtIndex(pos)
  155. pos++
  156. c.fillLeastSignificant16bits(array, pos2, hs)
  157. pos2 += c.getCardinality()
  158. }
  159. return array
  160. }
  161. // GetSizeInBytes estimates the memory usage of the Bitmap. Note that this
  162. // might differ slightly from the amount of bytes required for persistent storage
  163. func (rb *Bitmap) GetSizeInBytes() uint64 {
  164. size := uint64(8)
  165. for _, c := range rb.highlowcontainer.containers {
  166. size += uint64(2) + uint64(c.getSizeInBytes())
  167. }
  168. return size
  169. }
  170. // GetSerializedSizeInBytes computes the serialized size in bytes
  171. // of the Bitmap. It should correspond to the
  172. // number of bytes written when invoking WriteTo. You can expect
  173. // that this function is much cheaper computationally than WriteTo.
  174. func (rb *Bitmap) GetSerializedSizeInBytes() uint64 {
  175. return rb.highlowcontainer.serializedSizeInBytes()
  176. }
  177. // BoundSerializedSizeInBytes returns an upper bound on the serialized size in bytes
  178. // assuming that one wants to store "cardinality" integers in [0, universe_size)
  179. func BoundSerializedSizeInBytes(cardinality uint64, universeSize uint64) uint64 {
  180. contnbr := (universeSize + uint64(65535)) / uint64(65536)
  181. if contnbr > cardinality {
  182. contnbr = cardinality
  183. // we can't have more containers than we have values
  184. }
  185. headermax := 8*contnbr + 4
  186. if 4 > (contnbr+7)/8 {
  187. headermax += 4
  188. } else {
  189. headermax += (contnbr + 7) / 8
  190. }
  191. valsarray := uint64(arrayContainerSizeInBytes(int(cardinality)))
  192. valsbitmap := contnbr * uint64(bitmapContainerSizeInBytes())
  193. valsbest := valsarray
  194. if valsbest > valsbitmap {
  195. valsbest = valsbitmap
  196. }
  197. return valsbest + headermax
  198. }
  199. // IntIterable allows you to iterate over the values in a Bitmap
  200. type IntIterable interface {
  201. HasNext() bool
  202. Next() uint32
  203. }
  204. // IntPeekable allows you to look at the next value without advancing and
  205. // advance as long as the next value is smaller than minval
  206. type IntPeekable interface {
  207. IntIterable
  208. // PeekNext peeks the next value without advancing the iterator
  209. PeekNext() uint32
  210. // AdvanceIfNeeded advances as long as the next value is smaller than minval
  211. AdvanceIfNeeded(minval uint32)
  212. }
  213. type intIterator struct {
  214. pos int
  215. hs uint32
  216. iter shortPeekable
  217. highlowcontainer *roaringArray
  218. }
  219. // HasNext returns true if there are more integers to iterate over
  220. func (ii *intIterator) HasNext() bool {
  221. return ii.pos < ii.highlowcontainer.size()
  222. }
  223. func (ii *intIterator) init() {
  224. if ii.highlowcontainer.size() > ii.pos {
  225. ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getShortIterator()
  226. ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
  227. }
  228. }
  229. // Next returns the next integer
  230. func (ii *intIterator) Next() uint32 {
  231. x := uint32(ii.iter.next()) | ii.hs
  232. if !ii.iter.hasNext() {
  233. ii.pos = ii.pos + 1
  234. ii.init()
  235. }
  236. return x
  237. }
  238. // PeekNext peeks the next value without advancing the iterator
  239. func (ii *intIterator) PeekNext() uint32 {
  240. return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs
  241. }
  242. // AdvanceIfNeeded advances as long as the next value is smaller than minval
  243. func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
  244. to := minval >> 16
  245. for ii.HasNext() && (ii.hs>>16) < to {
  246. ii.pos++
  247. ii.init()
  248. }
  249. if ii.HasNext() && (ii.hs>>16) == to {
  250. ii.iter.advanceIfNeeded(lowbits(minval))
  251. if !ii.iter.hasNext() {
  252. ii.pos++
  253. ii.init()
  254. }
  255. }
  256. }
  257. func newIntIterator(a *Bitmap) *intIterator {
  258. p := new(intIterator)
  259. p.pos = 0
  260. p.highlowcontainer = &a.highlowcontainer
  261. p.init()
  262. return p
  263. }
  264. type intReverseIterator struct {
  265. pos int
  266. hs uint32
  267. iter shortIterable
  268. highlowcontainer *roaringArray
  269. }
  270. // HasNext returns true if there are more integers to iterate over
  271. func (ii *intReverseIterator) HasNext() bool {
  272. return ii.pos >= 0
  273. }
  274. func (ii *intReverseIterator) init() {
  275. if ii.pos >= 0 {
  276. ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator()
  277. ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
  278. } else {
  279. ii.iter = nil
  280. }
  281. }
  282. // Next returns the next integer
  283. func (ii *intReverseIterator) Next() uint32 {
  284. x := uint32(ii.iter.next()) | ii.hs
  285. if !ii.iter.hasNext() {
  286. ii.pos = ii.pos - 1
  287. ii.init()
  288. }
  289. return x
  290. }
  291. func newIntReverseIterator(a *Bitmap) *intReverseIterator {
  292. p := new(intReverseIterator)
  293. p.highlowcontainer = &a.highlowcontainer
  294. p.pos = a.highlowcontainer.size() - 1
  295. p.init()
  296. return p
  297. }
  298. // ManyIntIterable allows you to iterate over the values in a Bitmap
  299. type ManyIntIterable interface {
  300. // pass in a buffer to fill up with values, returns how many values were returned
  301. NextMany([]uint32) int
  302. }
  303. type manyIntIterator struct {
  304. pos int
  305. hs uint32
  306. iter manyIterable
  307. highlowcontainer *roaringArray
  308. }
  309. func (ii *manyIntIterator) init() {
  310. if ii.highlowcontainer.size() > ii.pos {
  311. ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getManyIterator()
  312. ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
  313. } else {
  314. ii.iter = nil
  315. }
  316. }
  317. func (ii *manyIntIterator) NextMany(buf []uint32) int {
  318. n := 0
  319. for n < len(buf) {
  320. if ii.iter == nil {
  321. break
  322. }
  323. moreN := ii.iter.nextMany(ii.hs, buf[n:])
  324. n += moreN
  325. if moreN == 0 {
  326. ii.pos = ii.pos + 1
  327. ii.init()
  328. }
  329. }
  330. return n
  331. }
  332. func newManyIntIterator(a *Bitmap) *manyIntIterator {
  333. p := new(manyIntIterator)
  334. p.pos = 0
  335. p.highlowcontainer = &a.highlowcontainer
  336. p.init()
  337. return p
  338. }
  339. // String creates a string representation of the Bitmap
  340. func (rb *Bitmap) String() string {
  341. // inspired by https://github.com/fzandona/goroar/
  342. var buffer bytes.Buffer
  343. start := []byte("{")
  344. buffer.Write(start)
  345. i := rb.Iterator()
  346. counter := 0
  347. if i.HasNext() {
  348. counter = counter + 1
  349. buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10))
  350. }
  351. for i.HasNext() {
  352. buffer.WriteString(",")
  353. counter = counter + 1
  354. // to avoid exhausting the memory
  355. if counter > 0x40000 {
  356. buffer.WriteString("...")
  357. break
  358. }
  359. buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10))
  360. }
  361. buffer.WriteString("}")
  362. return buffer.String()
  363. }
  364. // Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns
  365. // false, the iteration is halted.
  366. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
  367. // There is no guarantee as to what order the values will be iterated
  368. func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
  369. for i := 0; i < rb.highlowcontainer.size(); i++ {
  370. hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16
  371. c := rb.highlowcontainer.getContainerAtIndex(i)
  372. var shouldContinue bool
  373. // This is hacky but it avoids allocations from invoking an interface method with a closure
  374. switch t := c.(type) {
  375. case *arrayContainer:
  376. shouldContinue = t.iterate(func(x uint16) bool {
  377. return cb(uint32(x) | hs)
  378. })
  379. case *runContainer16:
  380. shouldContinue = t.iterate(func(x uint16) bool {
  381. return cb(uint32(x) | hs)
  382. })
  383. case *bitmapContainer:
  384. shouldContinue = t.iterate(func(x uint16) bool {
  385. return cb(uint32(x) | hs)
  386. })
  387. }
  388. if !shouldContinue {
  389. break
  390. }
  391. }
  392. }
  393. // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
  394. // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
  395. func (rb *Bitmap) Iterator() IntPeekable {
  396. return newIntIterator(rb)
  397. }
  398. // ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order;
  399. // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
  400. func (rb *Bitmap) ReverseIterator() IntIterable {
  401. return newIntReverseIterator(rb)
  402. }
  403. // ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order;
  404. // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
  405. func (rb *Bitmap) ManyIterator() ManyIntIterable {
  406. return newManyIntIterator(rb)
  407. }
  408. // Clone creates a copy of the Bitmap
  409. func (rb *Bitmap) Clone() *Bitmap {
  410. ptr := new(Bitmap)
  411. ptr.highlowcontainer = *rb.highlowcontainer.clone()
  412. return ptr
  413. }
  414. // Minimum get the smallest value stored in this roaring bitmap, assumes that it is not empty
  415. func (rb *Bitmap) Minimum() uint32 {
  416. return uint32(rb.highlowcontainer.containers[0].minimum()) | (uint32(rb.highlowcontainer.keys[0]) << 16)
  417. }
  418. // Maximum get the largest value stored in this roaring bitmap, assumes that it is not empty
  419. func (rb *Bitmap) Maximum() uint32 {
  420. lastindex := len(rb.highlowcontainer.containers) - 1
  421. return uint32(rb.highlowcontainer.containers[lastindex].maximum()) | (uint32(rb.highlowcontainer.keys[lastindex]) << 16)
  422. }
  423. // Contains returns true if the integer is contained in the bitmap
  424. func (rb *Bitmap) Contains(x uint32) bool {
  425. hb := highbits(x)
  426. c := rb.highlowcontainer.getContainer(hb)
  427. return c != nil && c.contains(lowbits(x))
  428. }
  429. // ContainsInt returns true if the integer is contained in the bitmap (this is a convenience method, the parameter is casted to uint32 and Contains is called)
  430. func (rb *Bitmap) ContainsInt(x int) bool {
  431. return rb.Contains(uint32(x))
  432. }
  433. // Equals returns true if the two bitmaps contain the same integers
  434. func (rb *Bitmap) Equals(o interface{}) bool {
  435. srb, ok := o.(*Bitmap)
  436. if ok {
  437. return srb.highlowcontainer.equals(rb.highlowcontainer)
  438. }
  439. return false
  440. }
  441. // AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
  442. func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) {
  443. return AddOffset64(x, int64(offset))
  444. }
  445. // AddOffset64 adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
  446. // If offset + element is outside of the range [0,2^32), that the element will be dropped
  447. func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) {
  448. // we need "offset" to be a long because we want to support values
  449. // between -0xFFFFFFFF up to +-0xFFFFFFFF
  450. var containerOffset64 int64
  451. if offset < 0 {
  452. containerOffset64 = (offset - (1 << 16) + 1) / (1 << 16)
  453. } else {
  454. containerOffset64 = offset >> 16
  455. }
  456. if containerOffset64 >= (1<<16) || containerOffset64 <= -(1<<16) {
  457. return New()
  458. }
  459. containerOffset := int32(containerOffset64)
  460. inOffset := (uint16)(offset - containerOffset64*(1<<16))
  461. if inOffset == 0 {
  462. answer = x.Clone()
  463. for pos := 0; pos < answer.highlowcontainer.size(); pos++ {
  464. key := int32(answer.highlowcontainer.getKeyAtIndex(pos))
  465. key += containerOffset
  466. if key >= 0 && key <= MaxUint16 {
  467. answer.highlowcontainer.keys[pos] = uint16(key)
  468. }
  469. }
  470. } else {
  471. answer = New()
  472. for pos := 0; pos < x.highlowcontainer.size(); pos++ {
  473. key := int32(x.highlowcontainer.getKeyAtIndex(pos))
  474. key += containerOffset
  475. c := x.highlowcontainer.getContainerAtIndex(pos)
  476. offsetted := c.addOffset(inOffset)
  477. if offsetted[0].getCardinality() > 0 && (key >= 0 && key <= MaxUint16) {
  478. curSize := answer.highlowcontainer.size()
  479. lastkey := int32(0)
  480. if curSize > 0 {
  481. lastkey = int32(answer.highlowcontainer.getKeyAtIndex(curSize - 1))
  482. }
  483. if curSize > 0 && lastkey == key {
  484. prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
  485. orrseult := prev.ior(offsetted[0])
  486. answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult)
  487. } else {
  488. answer.highlowcontainer.appendContainer(uint16(key), offsetted[0], false)
  489. }
  490. }
  491. if offsetted[1].getCardinality() > 0 && ((key+1) >= 0 && (key+1) <= MaxUint16) {
  492. answer.highlowcontainer.appendContainer(uint16(key+1), offsetted[1], false)
  493. }
  494. }
  495. }
  496. return answer
  497. }
  498. // Add the integer x to the bitmap
  499. func (rb *Bitmap) Add(x uint32) {
  500. hb := highbits(x)
  501. ra := &rb.highlowcontainer
  502. i := ra.getIndex(hb)
  503. if i >= 0 {
  504. var c container
  505. c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x))
  506. rb.highlowcontainer.setContainerAtIndex(i, c)
  507. } else {
  508. newac := newArrayContainer()
  509. rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
  510. }
  511. }
  512. // add the integer x to the bitmap, return the container and its index
  513. func (rb *Bitmap) addwithptr(x uint32) (int, container) {
  514. hb := highbits(x)
  515. ra := &rb.highlowcontainer
  516. i := ra.getIndex(hb)
  517. var c container
  518. if i >= 0 {
  519. c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x))
  520. rb.highlowcontainer.setContainerAtIndex(i, c)
  521. return i, c
  522. }
  523. newac := newArrayContainer()
  524. c = newac.iaddReturnMinimized(lowbits(x))
  525. rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, c)
  526. return -i - 1, c
  527. }
  528. // CheckedAdd adds the integer x to the bitmap and return true if it was added (false if the integer was already present)
  529. func (rb *Bitmap) CheckedAdd(x uint32) bool {
  530. // TODO: add unit tests for this method
  531. hb := highbits(x)
  532. i := rb.highlowcontainer.getIndex(hb)
  533. if i >= 0 {
  534. C := rb.highlowcontainer.getWritableContainerAtIndex(i)
  535. oldcard := C.getCardinality()
  536. C = C.iaddReturnMinimized(lowbits(x))
  537. rb.highlowcontainer.setContainerAtIndex(i, C)
  538. return C.getCardinality() > oldcard
  539. }
  540. newac := newArrayContainer()
  541. rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
  542. return true
  543. }
  544. // AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add)
  545. func (rb *Bitmap) AddInt(x int) {
  546. rb.Add(uint32(x))
  547. }
  548. // Remove the integer x from the bitmap
  549. func (rb *Bitmap) Remove(x uint32) {
  550. hb := highbits(x)
  551. i := rb.highlowcontainer.getIndex(hb)
  552. if i >= 0 {
  553. c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveReturnMinimized(lowbits(x))
  554. rb.highlowcontainer.setContainerAtIndex(i, c)
  555. if rb.highlowcontainer.getContainerAtIndex(i).getCardinality() == 0 {
  556. rb.highlowcontainer.removeAtIndex(i)
  557. }
  558. }
  559. }
  560. // CheckedRemove removes the integer x from the bitmap and return true if the integer was effectively remove (and false if the integer was not present)
  561. func (rb *Bitmap) CheckedRemove(x uint32) bool {
  562. // TODO: add unit tests for this method
  563. hb := highbits(x)
  564. i := rb.highlowcontainer.getIndex(hb)
  565. if i >= 0 {
  566. C := rb.highlowcontainer.getWritableContainerAtIndex(i)
  567. oldcard := C.getCardinality()
  568. C = C.iremoveReturnMinimized(lowbits(x))
  569. rb.highlowcontainer.setContainerAtIndex(i, C)
  570. if rb.highlowcontainer.getContainerAtIndex(i).getCardinality() == 0 {
  571. rb.highlowcontainer.removeAtIndex(i)
  572. return true
  573. }
  574. return C.getCardinality() < oldcard
  575. }
  576. return false
  577. }
  578. // IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0))
  579. func (rb *Bitmap) IsEmpty() bool {
  580. return rb.highlowcontainer.size() == 0
  581. }
  582. // GetCardinality returns the number of integers contained in the bitmap
  583. func (rb *Bitmap) GetCardinality() uint64 {
  584. size := uint64(0)
  585. for _, c := range rb.highlowcontainer.containers {
  586. size += uint64(c.getCardinality())
  587. }
  588. return size
  589. }
  590. // Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be GetCardinality())
  591. func (rb *Bitmap) Rank(x uint32) uint64 {
  592. size := uint64(0)
  593. for i := 0; i < rb.highlowcontainer.size(); i++ {
  594. key := rb.highlowcontainer.getKeyAtIndex(i)
  595. if key > highbits(x) {
  596. return size
  597. }
  598. if key < highbits(x) {
  599. size += uint64(rb.highlowcontainer.getContainerAtIndex(i).getCardinality())
  600. } else {
  601. return size + uint64(rb.highlowcontainer.getContainerAtIndex(i).rank(lowbits(x)))
  602. }
  603. }
  604. return size
  605. }
  606. // Select returns the xth integer in the bitmap
  607. func (rb *Bitmap) Select(x uint32) (uint32, error) {
  608. if rb.GetCardinality() <= uint64(x) {
  609. return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality())
  610. }
  611. remaining := x
  612. for i := 0; i < rb.highlowcontainer.size(); i++ {
  613. c := rb.highlowcontainer.getContainerAtIndex(i)
  614. if remaining >= uint32(c.getCardinality()) {
  615. remaining -= uint32(c.getCardinality())
  616. } else {
  617. key := rb.highlowcontainer.getKeyAtIndex(i)
  618. return uint32(key)<<16 + uint32(c.selectInt(uint16(remaining))), nil
  619. }
  620. }
  621. return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality())
  622. }
  623. // And computes the intersection between two bitmaps and stores the result in the current bitmap
  624. func (rb *Bitmap) And(x2 *Bitmap) {
  625. pos1 := 0
  626. pos2 := 0
  627. intersectionsize := 0
  628. length1 := rb.highlowcontainer.size()
  629. length2 := x2.highlowcontainer.size()
  630. main:
  631. for {
  632. if pos1 < length1 && pos2 < length2 {
  633. s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
  634. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  635. for {
  636. if s1 == s2 {
  637. c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1)
  638. c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
  639. diff := c1.iand(c2)
  640. if diff.getCardinality() > 0 {
  641. rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false)
  642. intersectionsize++
  643. }
  644. pos1++
  645. pos2++
  646. if (pos1 == length1) || (pos2 == length2) {
  647. break main
  648. }
  649. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  650. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  651. } else if s1 < s2 {
  652. pos1 = rb.highlowcontainer.advanceUntil(s2, pos1)
  653. if pos1 == length1 {
  654. break main
  655. }
  656. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  657. } else { //s1 > s2
  658. pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
  659. if pos2 == length2 {
  660. break main
  661. }
  662. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  663. }
  664. }
  665. } else {
  666. break
  667. }
  668. }
  669. rb.highlowcontainer.resize(intersectionsize)
  670. }
  671. // OrCardinality returns the cardinality of the union between two bitmaps, bitmaps are not modified
  672. func (rb *Bitmap) OrCardinality(x2 *Bitmap) uint64 {
  673. pos1 := 0
  674. pos2 := 0
  675. length1 := rb.highlowcontainer.size()
  676. length2 := x2.highlowcontainer.size()
  677. answer := uint64(0)
  678. main:
  679. for {
  680. if (pos1 < length1) && (pos2 < length2) {
  681. s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
  682. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  683. for {
  684. if s1 < s2 {
  685. answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality())
  686. pos1++
  687. if pos1 == length1 {
  688. break main
  689. }
  690. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  691. } else if s1 > s2 {
  692. answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality())
  693. pos2++
  694. if pos2 == length2 {
  695. break main
  696. }
  697. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  698. } else {
  699. // TODO: could be faster if we did not have to materialize the container
  700. answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)).getCardinality())
  701. pos1++
  702. pos2++
  703. if (pos1 == length1) || (pos2 == length2) {
  704. break main
  705. }
  706. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  707. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  708. }
  709. }
  710. } else {
  711. break
  712. }
  713. }
  714. for ; pos1 < length1; pos1++ {
  715. answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality())
  716. }
  717. for ; pos2 < length2; pos2++ {
  718. answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality())
  719. }
  720. return answer
  721. }
  722. // AndCardinality returns the cardinality of the intersection between two bitmaps, bitmaps are not modified
  723. func (rb *Bitmap) AndCardinality(x2 *Bitmap) uint64 {
  724. pos1 := 0
  725. pos2 := 0
  726. answer := uint64(0)
  727. length1 := rb.highlowcontainer.size()
  728. length2 := x2.highlowcontainer.size()
  729. main:
  730. for {
  731. if pos1 < length1 && pos2 < length2 {
  732. s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
  733. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  734. for {
  735. if s1 == s2 {
  736. c1 := rb.highlowcontainer.getContainerAtIndex(pos1)
  737. c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
  738. answer += uint64(c1.andCardinality(c2))
  739. pos1++
  740. pos2++
  741. if (pos1 == length1) || (pos2 == length2) {
  742. break main
  743. }
  744. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  745. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  746. } else if s1 < s2 {
  747. pos1 = rb.highlowcontainer.advanceUntil(s2, pos1)
  748. if pos1 == length1 {
  749. break main
  750. }
  751. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  752. } else { //s1 > s2
  753. pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
  754. if pos2 == length2 {
  755. break main
  756. }
  757. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  758. }
  759. }
  760. } else {
  761. break
  762. }
  763. }
  764. return answer
  765. }
  766. // Intersects checks whether two bitmap intersects, bitmaps are not modified
  767. func (rb *Bitmap) Intersects(x2 *Bitmap) bool {
  768. pos1 := 0
  769. pos2 := 0
  770. length1 := rb.highlowcontainer.size()
  771. length2 := x2.highlowcontainer.size()
  772. main:
  773. for {
  774. if pos1 < length1 && pos2 < length2 {
  775. s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
  776. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  777. for {
  778. if s1 == s2 {
  779. c1 := rb.highlowcontainer.getContainerAtIndex(pos1)
  780. c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
  781. if c1.intersects(c2) {
  782. return true
  783. }
  784. pos1++
  785. pos2++
  786. if (pos1 == length1) || (pos2 == length2) {
  787. break main
  788. }
  789. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  790. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  791. } else if s1 < s2 {
  792. pos1 = rb.highlowcontainer.advanceUntil(s2, pos1)
  793. if pos1 == length1 {
  794. break main
  795. }
  796. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  797. } else { //s1 > s2
  798. pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
  799. if pos2 == length2 {
  800. break main
  801. }
  802. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  803. }
  804. }
  805. } else {
  806. break
  807. }
  808. }
  809. return false
  810. }
  811. // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap
  812. func (rb *Bitmap) Xor(x2 *Bitmap) {
  813. pos1 := 0
  814. pos2 := 0
  815. length1 := rb.highlowcontainer.size()
  816. length2 := x2.highlowcontainer.size()
  817. for {
  818. if (pos1 < length1) && (pos2 < length2) {
  819. s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
  820. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  821. if s1 < s2 {
  822. pos1 = rb.highlowcontainer.advanceUntil(s2, pos1)
  823. if pos1 == length1 {
  824. break
  825. }
  826. } else if s1 > s2 {
  827. c := x2.highlowcontainer.getWritableContainerAtIndex(pos2)
  828. rb.highlowcontainer.insertNewKeyValueAt(pos1, x2.highlowcontainer.getKeyAtIndex(pos2), c)
  829. length1++
  830. pos1++
  831. pos2++
  832. } else {
  833. // TODO: couple be computed in-place for reduced memory usage
  834. c := rb.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2))
  835. if c.getCardinality() > 0 {
  836. rb.highlowcontainer.setContainerAtIndex(pos1, c)
  837. pos1++
  838. } else {
  839. rb.highlowcontainer.removeAtIndex(pos1)
  840. length1--
  841. }
  842. pos2++
  843. }
  844. } else {
  845. break
  846. }
  847. }
  848. if pos1 == length1 {
  849. rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
  850. }
  851. }
  852. // Or computes the union between two bitmaps and stores the result in the current bitmap
  853. func (rb *Bitmap) Or(x2 *Bitmap) {
  854. pos1 := 0
  855. pos2 := 0
  856. length1 := rb.highlowcontainer.size()
  857. length2 := x2.highlowcontainer.size()
  858. main:
  859. for (pos1 < length1) && (pos2 < length2) {
  860. s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
  861. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  862. for {
  863. if s1 < s2 {
  864. pos1++
  865. if pos1 == length1 {
  866. break main
  867. }
  868. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  869. } else if s1 > s2 {
  870. rb.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone())
  871. pos1++
  872. length1++
  873. pos2++
  874. if pos2 == length2 {
  875. break main
  876. }
  877. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  878. } else {
  879. rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getWritableContainerAtIndex(pos1).ior(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
  880. pos1++
  881. pos2++
  882. if (pos1 == length1) || (pos2 == length2) {
  883. break main
  884. }
  885. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  886. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  887. }
  888. }
  889. }
  890. if pos1 == length1 {
  891. rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
  892. }
  893. }
  894. // AndNot computes the difference between two bitmaps and stores the result in the current bitmap
  895. func (rb *Bitmap) AndNot(x2 *Bitmap) {
  896. pos1 := 0
  897. pos2 := 0
  898. intersectionsize := 0
  899. length1 := rb.highlowcontainer.size()
  900. length2 := x2.highlowcontainer.size()
  901. main:
  902. for {
  903. if pos1 < length1 && pos2 < length2 {
  904. s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
  905. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  906. for {
  907. if s1 == s2 {
  908. c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1)
  909. c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
  910. diff := c1.iandNot(c2)
  911. if diff.getCardinality() > 0 {
  912. rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false)
  913. intersectionsize++
  914. }
  915. pos1++
  916. pos2++
  917. if (pos1 == length1) || (pos2 == length2) {
  918. break main
  919. }
  920. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  921. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  922. } else if s1 < s2 {
  923. c1 := rb.highlowcontainer.getContainerAtIndex(pos1)
  924. mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1)
  925. rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite)
  926. intersectionsize++
  927. pos1++
  928. if pos1 == length1 {
  929. break main
  930. }
  931. s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
  932. } else { //s1 > s2
  933. pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
  934. if pos2 == length2 {
  935. break main
  936. }
  937. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  938. }
  939. }
  940. } else {
  941. break
  942. }
  943. }
  944. // TODO:implement as a copy
  945. for pos1 < length1 {
  946. c1 := rb.highlowcontainer.getContainerAtIndex(pos1)
  947. s1 := rb.highlowcontainer.getKeyAtIndex(pos1)
  948. mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1)
  949. rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite)
  950. intersectionsize++
  951. pos1++
  952. }
  953. rb.highlowcontainer.resize(intersectionsize)
  954. }
  955. // Or computes the union between two bitmaps and returns the result
  956. func Or(x1, x2 *Bitmap) *Bitmap {
  957. answer := NewBitmap()
  958. pos1 := 0
  959. pos2 := 0
  960. length1 := x1.highlowcontainer.size()
  961. length2 := x2.highlowcontainer.size()
  962. main:
  963. for (pos1 < length1) && (pos2 < length2) {
  964. s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
  965. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  966. for {
  967. if s1 < s2 {
  968. answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
  969. pos1++
  970. if pos1 == length1 {
  971. break main
  972. }
  973. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  974. } else if s1 > s2 {
  975. answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
  976. pos2++
  977. if pos2 == length2 {
  978. break main
  979. }
  980. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  981. } else {
  982. answer.highlowcontainer.appendContainer(s1, x1.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
  983. pos1++
  984. pos2++
  985. if (pos1 == length1) || (pos2 == length2) {
  986. break main
  987. }
  988. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  989. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  990. }
  991. }
  992. }
  993. if pos1 == length1 {
  994. answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
  995. } else if pos2 == length2 {
  996. answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
  997. }
  998. return answer
  999. }
  1000. // And computes the intersection between two bitmaps and returns the result
  1001. func And(x1, x2 *Bitmap) *Bitmap {
  1002. answer := NewBitmap()
  1003. pos1 := 0
  1004. pos2 := 0
  1005. length1 := x1.highlowcontainer.size()
  1006. length2 := x2.highlowcontainer.size()
  1007. main:
  1008. for pos1 < length1 && pos2 < length2 {
  1009. s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
  1010. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  1011. for {
  1012. if s1 == s2 {
  1013. C := x1.highlowcontainer.getContainerAtIndex(pos1)
  1014. C = C.and(x2.highlowcontainer.getContainerAtIndex(pos2))
  1015. if C.getCardinality() > 0 {
  1016. answer.highlowcontainer.appendContainer(s1, C, false)
  1017. }
  1018. pos1++
  1019. pos2++
  1020. if (pos1 == length1) || (pos2 == length2) {
  1021. break main
  1022. }
  1023. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  1024. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  1025. } else if s1 < s2 {
  1026. pos1 = x1.highlowcontainer.advanceUntil(s2, pos1)
  1027. if pos1 == length1 {
  1028. break main
  1029. }
  1030. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  1031. } else { // s1 > s2
  1032. pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
  1033. if pos2 == length2 {
  1034. break main
  1035. }
  1036. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  1037. }
  1038. }
  1039. }
  1040. return answer
  1041. }
  1042. // Xor computes the symmetric difference between two bitmaps and returns the result
  1043. func Xor(x1, x2 *Bitmap) *Bitmap {
  1044. answer := NewBitmap()
  1045. pos1 := 0
  1046. pos2 := 0
  1047. length1 := x1.highlowcontainer.size()
  1048. length2 := x2.highlowcontainer.size()
  1049. for {
  1050. if (pos1 < length1) && (pos2 < length2) {
  1051. s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
  1052. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  1053. if s1 < s2 {
  1054. answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
  1055. pos1++
  1056. } else if s1 > s2 {
  1057. answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
  1058. pos2++
  1059. } else {
  1060. c := x1.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2))
  1061. if c.getCardinality() > 0 {
  1062. answer.highlowcontainer.appendContainer(s1, c, false)
  1063. }
  1064. pos1++
  1065. pos2++
  1066. }
  1067. } else {
  1068. break
  1069. }
  1070. }
  1071. if pos1 == length1 {
  1072. answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
  1073. } else if pos2 == length2 {
  1074. answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
  1075. }
  1076. return answer
  1077. }
  1078. // AndNot computes the difference between two bitmaps and returns the result
  1079. func AndNot(x1, x2 *Bitmap) *Bitmap {
  1080. answer := NewBitmap()
  1081. pos1 := 0
  1082. pos2 := 0
  1083. length1 := x1.highlowcontainer.size()
  1084. length2 := x2.highlowcontainer.size()
  1085. main:
  1086. for {
  1087. if pos1 < length1 && pos2 < length2 {
  1088. s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
  1089. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  1090. for {
  1091. if s1 < s2 {
  1092. answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
  1093. pos1++
  1094. if pos1 == length1 {
  1095. break main
  1096. }
  1097. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  1098. } else if s1 == s2 {
  1099. c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
  1100. c2 := x2.highlowcontainer.getContainerAtIndex(pos2)
  1101. diff := c1.andNot(c2)
  1102. if diff.getCardinality() > 0 {
  1103. answer.highlowcontainer.appendContainer(s1, diff, false)
  1104. }
  1105. pos1++
  1106. pos2++
  1107. if (pos1 == length1) || (pos2 == length2) {
  1108. break main
  1109. }
  1110. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  1111. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  1112. } else { //s1 > s2
  1113. pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
  1114. if pos2 == length2 {
  1115. break main
  1116. }
  1117. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  1118. }
  1119. }
  1120. } else {
  1121. break
  1122. }
  1123. }
  1124. if pos2 == length2 {
  1125. answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
  1126. }
  1127. return answer
  1128. }
  1129. // AddMany add all of the values in dat
  1130. func (rb *Bitmap) AddMany(dat []uint32) {
  1131. if len(dat) == 0 {
  1132. return
  1133. }
  1134. prev := dat[0]
  1135. idx, c := rb.addwithptr(prev)
  1136. for _, i := range dat[1:] {
  1137. if highbits(prev) == highbits(i) {
  1138. c = c.iaddReturnMinimized(lowbits(i))
  1139. rb.highlowcontainer.setContainerAtIndex(idx, c)
  1140. } else {
  1141. idx, c = rb.addwithptr(i)
  1142. }
  1143. prev = i
  1144. }
  1145. }
  1146. // BitmapOf generates a new bitmap filled with the specified integers
  1147. func BitmapOf(dat ...uint32) *Bitmap {
  1148. ans := NewBitmap()
  1149. ans.AddMany(dat)
  1150. return ans
  1151. }
  1152. // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed,
  1153. // and any integer present in the range and not in the bitmap is added.
  1154. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
  1155. // while uint64(0x100000000) cannot be represented as a 32-bit value.
  1156. func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
  1157. if rangeEnd > MaxUint32+1 {
  1158. panic("rangeEnd > MaxUint32+1")
  1159. }
  1160. if rangeStart > MaxUint32+1 {
  1161. panic("rangeStart > MaxUint32+1")
  1162. }
  1163. if rangeStart >= rangeEnd {
  1164. return
  1165. }
  1166. hbStart := uint32(highbits(uint32(rangeStart)))
  1167. lbStart := uint32(lowbits(uint32(rangeStart)))
  1168. hbLast := uint32(highbits(uint32(rangeEnd - 1)))
  1169. lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
  1170. var max uint32 = maxLowBit
  1171. for hb := hbStart; hb <= hbLast; hb++ {
  1172. var containerStart uint32
  1173. if hb == hbStart {
  1174. containerStart = uint32(lbStart)
  1175. }
  1176. containerLast := max
  1177. if hb == hbLast {
  1178. containerLast = uint32(lbLast)
  1179. }
  1180. i := rb.highlowcontainer.getIndex(uint16(hb))
  1181. if i >= 0 {
  1182. c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1)
  1183. if c.getCardinality() > 0 {
  1184. rb.highlowcontainer.setContainerAtIndex(i, c)
  1185. } else {
  1186. rb.highlowcontainer.removeAtIndex(i)
  1187. }
  1188. } else { // *think* the range of ones must never be
  1189. // empty.
  1190. rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
  1191. }
  1192. }
  1193. }
  1194. // FlipInt calls Flip after casting the parameters (convenience method)
  1195. func (rb *Bitmap) FlipInt(rangeStart, rangeEnd int) {
  1196. rb.Flip(uint64(rangeStart), uint64(rangeEnd))
  1197. }
  1198. // AddRange adds the integers in [rangeStart, rangeEnd) to the bitmap.
  1199. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
  1200. // while uint64(0x100000000) cannot be represented as a 32-bit value.
  1201. func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) {
  1202. if rangeStart >= rangeEnd {
  1203. return
  1204. }
  1205. if rangeEnd-1 > MaxUint32 {
  1206. panic("rangeEnd-1 > MaxUint32")
  1207. }
  1208. hbStart := uint32(highbits(uint32(rangeStart)))
  1209. lbStart := uint32(lowbits(uint32(rangeStart)))
  1210. hbLast := uint32(highbits(uint32(rangeEnd - 1)))
  1211. lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
  1212. var max uint32 = maxLowBit
  1213. for hb := hbStart; hb <= hbLast; hb++ {
  1214. containerStart := uint32(0)
  1215. if hb == hbStart {
  1216. containerStart = lbStart
  1217. }
  1218. containerLast := max
  1219. if hb == hbLast {
  1220. containerLast = lbLast
  1221. }
  1222. i := rb.highlowcontainer.getIndex(uint16(hb))
  1223. if i >= 0 {
  1224. c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1)
  1225. rb.highlowcontainer.setContainerAtIndex(i, c)
  1226. } else { // *think* the range of ones must never be
  1227. // empty.
  1228. rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
  1229. }
  1230. }
  1231. }
  1232. // RemoveRange removes the integers in [rangeStart, rangeEnd) from the bitmap.
  1233. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
  1234. // while uint64(0x100000000) cannot be represented as a 32-bit value.
  1235. func (rb *Bitmap) RemoveRange(rangeStart, rangeEnd uint64) {
  1236. if rangeStart >= rangeEnd {
  1237. return
  1238. }
  1239. if rangeEnd-1 > MaxUint32 {
  1240. // logically, we should assume that the user wants to
  1241. // remove all values from rangeStart to infinity
  1242. // see https://github.com/RoaringBitmap/roaring/issues/141
  1243. rangeEnd = uint64(0x100000000)
  1244. }
  1245. hbStart := uint32(highbits(uint32(rangeStart)))
  1246. lbStart := uint32(lowbits(uint32(rangeStart)))
  1247. hbLast := uint32(highbits(uint32(rangeEnd - 1)))
  1248. lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
  1249. var max uint32 = maxLowBit
  1250. if hbStart == hbLast {
  1251. i := rb.highlowcontainer.getIndex(uint16(hbStart))
  1252. if i < 0 {
  1253. return
  1254. }
  1255. c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveRange(int(lbStart), int(lbLast+1))
  1256. if c.getCardinality() > 0 {
  1257. rb.highlowcontainer.setContainerAtIndex(i, c)
  1258. } else {
  1259. rb.highlowcontainer.removeAtIndex(i)
  1260. }
  1261. return
  1262. }
  1263. ifirst := rb.highlowcontainer.getIndex(uint16(hbStart))
  1264. ilast := rb.highlowcontainer.getIndex(uint16(hbLast))
  1265. if ifirst >= 0 {
  1266. if lbStart != 0 {
  1267. c := rb.highlowcontainer.getWritableContainerAtIndex(ifirst).iremoveRange(int(lbStart), int(max+1))
  1268. if c.getCardinality() > 0 {
  1269. rb.highlowcontainer.setContainerAtIndex(ifirst, c)
  1270. ifirst++
  1271. }
  1272. }
  1273. } else {
  1274. ifirst = -ifirst - 1
  1275. }
  1276. if ilast >= 0 {
  1277. if lbLast != max {
  1278. c := rb.highlowcontainer.getWritableContainerAtIndex(ilast).iremoveRange(int(0), int(lbLast+1))
  1279. if c.getCardinality() > 0 {
  1280. rb.highlowcontainer.setContainerAtIndex(ilast, c)
  1281. } else {
  1282. ilast++
  1283. }
  1284. } else {
  1285. ilast++
  1286. }
  1287. } else {
  1288. ilast = -ilast - 1
  1289. }
  1290. rb.highlowcontainer.removeIndexRange(ifirst, ilast)
  1291. }
  1292. // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed,
  1293. // and any integer present in the range and not in the bitmap is added, a new bitmap is returned leaving
  1294. // the current bitmap unchanged.
  1295. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
  1296. // while uint64(0x100000000) cannot be represented as a 32-bit value.
  1297. func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
  1298. if rangeStart >= rangeEnd {
  1299. return bm.Clone()
  1300. }
  1301. if rangeStart > MaxUint32 {
  1302. panic("rangeStart > MaxUint32")
  1303. }
  1304. if rangeEnd-1 > MaxUint32 {
  1305. panic("rangeEnd-1 > MaxUint32")
  1306. }
  1307. answer := NewBitmap()
  1308. hbStart := uint32(highbits(uint32(rangeStart)))
  1309. lbStart := uint32(lowbits(uint32(rangeStart)))
  1310. hbLast := uint32(highbits(uint32(rangeEnd - 1)))
  1311. lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
  1312. // copy the containers before the active area
  1313. answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart))
  1314. var max uint32 = maxLowBit
  1315. for hb := hbStart; hb <= hbLast; hb++ {
  1316. var containerStart uint32
  1317. if hb == hbStart {
  1318. containerStart = uint32(lbStart)
  1319. }
  1320. containerLast := max
  1321. if hb == hbLast {
  1322. containerLast = uint32(lbLast)
  1323. }
  1324. i := bm.highlowcontainer.getIndex(uint16(hb))
  1325. j := answer.highlowcontainer.getIndex(uint16(hb))
  1326. if i >= 0 {
  1327. c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1)
  1328. if c.getCardinality() > 0 {
  1329. answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c)
  1330. }
  1331. } else { // *think* the range of ones must never be
  1332. // empty.
  1333. answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb),
  1334. rangeOfOnes(int(containerStart), int(containerLast)))
  1335. }
  1336. }
  1337. // copy the containers after the active area.
  1338. answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast))
  1339. return answer
  1340. }
  1341. // SetCopyOnWrite sets this bitmap to use copy-on-write so that copies are fast and memory conscious
  1342. // if the parameter is true, otherwise we leave the default where hard copies are made
  1343. // (copy-on-write requires extra care in a threaded context).
  1344. // Calling SetCopyOnWrite(true) on a bitmap created with FromBuffer is unsafe.
  1345. func (rb *Bitmap) SetCopyOnWrite(val bool) {
  1346. rb.highlowcontainer.copyOnWrite = val
  1347. }
  1348. // GetCopyOnWrite gets this bitmap's copy-on-write property
  1349. func (rb *Bitmap) GetCopyOnWrite() (val bool) {
  1350. return rb.highlowcontainer.copyOnWrite
  1351. }
  1352. // CloneCopyOnWriteContainers clones all containers which have
  1353. // needCopyOnWrite set to true.
  1354. // This can be used to make sure it is safe to munmap a []byte
  1355. // that the roaring array may still have a reference to, after
  1356. // calling FromBuffer.
  1357. // More generally this function is useful if you call FromBuffer
  1358. // to construct a bitmap with a backing array buf
  1359. // and then later discard the buf array. Note that you should call
  1360. // CloneCopyOnWriteContainers on all bitmaps that were derived
  1361. // from the 'FromBuffer' bitmap since they map have dependencies
  1362. // on the buf array as well.
  1363. func (rb *Bitmap) CloneCopyOnWriteContainers() {
  1364. rb.highlowcontainer.cloneCopyOnWriteContainers()
  1365. }
  1366. // FlipInt calls Flip after casting the parameters (convenience method)
  1367. func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
  1368. return Flip(bm, uint64(rangeStart), uint64(rangeEnd))
  1369. }
  1370. // Statistics provides details on the container types in use.
  1371. type Statistics struct {
  1372. Cardinality uint64
  1373. Containers uint64
  1374. ArrayContainers uint64
  1375. ArrayContainerBytes uint64
  1376. ArrayContainerValues uint64
  1377. BitmapContainers uint64
  1378. BitmapContainerBytes uint64
  1379. BitmapContainerValues uint64
  1380. RunContainers uint64
  1381. RunContainerBytes uint64
  1382. RunContainerValues uint64
  1383. }
  1384. // Stats returns details on container type usage in a Statistics struct.
  1385. func (rb *Bitmap) Stats() Statistics {
  1386. stats := Statistics{}
  1387. stats.Containers = uint64(len(rb.highlowcontainer.containers))
  1388. for _, c := range rb.highlowcontainer.containers {
  1389. stats.Cardinality += uint64(c.getCardinality())
  1390. switch c.(type) {
  1391. case *arrayContainer:
  1392. stats.ArrayContainers++
  1393. stats.ArrayContainerBytes += uint64(c.getSizeInBytes())
  1394. stats.ArrayContainerValues += uint64(c.getCardinality())
  1395. case *bitmapContainer:
  1396. stats.BitmapContainers++
  1397. stats.BitmapContainerBytes += uint64(c.getSizeInBytes())
  1398. stats.BitmapContainerValues += uint64(c.getCardinality())
  1399. case *runContainer16:
  1400. stats.RunContainers++
  1401. stats.RunContainerBytes += uint64(c.getSizeInBytes())
  1402. stats.RunContainerValues += uint64(c.getCardinality())
  1403. }
  1404. }
  1405. return stats
  1406. }