You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fastaggregation.go 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. package roaring
  2. import (
  3. "container/heap"
  4. )
  5. // Or function that requires repairAfterLazy
  6. func lazyOR(x1, x2 *Bitmap) *Bitmap {
  7. answer := NewBitmap()
  8. pos1 := 0
  9. pos2 := 0
  10. length1 := x1.highlowcontainer.size()
  11. length2 := x2.highlowcontainer.size()
  12. main:
  13. for (pos1 < length1) && (pos2 < length2) {
  14. s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
  15. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  16. for {
  17. if s1 < s2 {
  18. answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
  19. pos1++
  20. if pos1 == length1 {
  21. break main
  22. }
  23. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  24. } else if s1 > s2 {
  25. answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
  26. pos2++
  27. if pos2 == length2 {
  28. break main
  29. }
  30. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  31. } else {
  32. c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
  33. switch t := c1.(type) {
  34. case *arrayContainer:
  35. c1 = t.toBitmapContainer()
  36. case *runContainer16:
  37. if !t.isFull() {
  38. c1 = t.toBitmapContainer()
  39. }
  40. }
  41. answer.highlowcontainer.appendContainer(s1, c1.lazyOR(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
  42. pos1++
  43. pos2++
  44. if (pos1 == length1) || (pos2 == length2) {
  45. break main
  46. }
  47. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  48. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  49. }
  50. }
  51. }
  52. if pos1 == length1 {
  53. answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
  54. } else if pos2 == length2 {
  55. answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
  56. }
  57. return answer
  58. }
  59. // In-place Or function that requires repairAfterLazy
  60. func (x1 *Bitmap) lazyOR(x2 *Bitmap) *Bitmap {
  61. pos1 := 0
  62. pos2 := 0
  63. length1 := x1.highlowcontainer.size()
  64. length2 := x2.highlowcontainer.size()
  65. main:
  66. for (pos1 < length1) && (pos2 < length2) {
  67. s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
  68. s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
  69. for {
  70. if s1 < s2 {
  71. pos1++
  72. if pos1 == length1 {
  73. break main
  74. }
  75. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  76. } else if s1 > s2 {
  77. x1.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone())
  78. pos2++
  79. pos1++
  80. length1++
  81. if pos2 == length2 {
  82. break main
  83. }
  84. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  85. } else {
  86. c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
  87. switch t := c1.(type) {
  88. case *arrayContainer:
  89. c1 = t.toBitmapContainer()
  90. case *runContainer16:
  91. if !t.isFull() {
  92. c1 = t.toBitmapContainer()
  93. }
  94. case *bitmapContainer:
  95. c1 = x1.highlowcontainer.getWritableContainerAtIndex(pos1)
  96. }
  97. x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2))
  98. x1.highlowcontainer.needCopyOnWrite[pos1] = false
  99. pos1++
  100. pos2++
  101. if (pos1 == length1) || (pos2 == length2) {
  102. break main
  103. }
  104. s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
  105. s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
  106. }
  107. }
  108. }
  109. if pos1 == length1 {
  110. x1.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
  111. }
  112. return x1
  113. }
  114. // to be called after lazy aggregates
  115. func (x1 *Bitmap) repairAfterLazy() {
  116. for pos := 0; pos < x1.highlowcontainer.size(); pos++ {
  117. c := x1.highlowcontainer.getContainerAtIndex(pos)
  118. switch c.(type) {
  119. case *bitmapContainer:
  120. if c.(*bitmapContainer).cardinality == invalidCardinality {
  121. c = x1.highlowcontainer.getWritableContainerAtIndex(pos)
  122. c.(*bitmapContainer).computeCardinality()
  123. if c.(*bitmapContainer).getCardinality() <= arrayDefaultMaxSize {
  124. x1.highlowcontainer.setContainerAtIndex(pos, c.(*bitmapContainer).toArrayContainer())
  125. } else if c.(*bitmapContainer).isFull() {
  126. x1.highlowcontainer.setContainerAtIndex(pos, newRunContainer16Range(0, MaxUint16))
  127. }
  128. }
  129. }
  130. }
  131. }
  132. // FastAnd computes the intersection between many bitmaps quickly
  133. // Compared to the And function, it can take many bitmaps as input, thus saving the trouble
  134. // of manually calling "And" many times.
  135. func FastAnd(bitmaps ...*Bitmap) *Bitmap {
  136. if len(bitmaps) == 0 {
  137. return NewBitmap()
  138. } else if len(bitmaps) == 1 {
  139. return bitmaps[0].Clone()
  140. }
  141. answer := And(bitmaps[0], bitmaps[1])
  142. for _, bm := range bitmaps[2:] {
  143. answer.And(bm)
  144. }
  145. return answer
  146. }
  147. // FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
  148. // It might also be faster than calling Or repeatedly.
  149. func FastOr(bitmaps ...*Bitmap) *Bitmap {
  150. if len(bitmaps) == 0 {
  151. return NewBitmap()
  152. } else if len(bitmaps) == 1 {
  153. return bitmaps[0].Clone()
  154. }
  155. answer := lazyOR(bitmaps[0], bitmaps[1])
  156. for _, bm := range bitmaps[2:] {
  157. answer = answer.lazyOR(bm)
  158. }
  159. // here is where repairAfterLazy is called.
  160. answer.repairAfterLazy()
  161. return answer
  162. }
  163. // HeapOr computes the union between many bitmaps quickly using a heap.
  164. // It might be faster than calling Or repeatedly.
  165. func HeapOr(bitmaps ...*Bitmap) *Bitmap {
  166. if len(bitmaps) == 0 {
  167. return NewBitmap()
  168. }
  169. // TODO: for better speed, we could do the operation lazily, see Java implementation
  170. pq := make(priorityQueue, len(bitmaps))
  171. for i, bm := range bitmaps {
  172. pq[i] = &item{bm, i}
  173. }
  174. heap.Init(&pq)
  175. for pq.Len() > 1 {
  176. x1 := heap.Pop(&pq).(*item)
  177. x2 := heap.Pop(&pq).(*item)
  178. heap.Push(&pq, &item{Or(x1.value, x2.value), 0})
  179. }
  180. return heap.Pop(&pq).(*item).value
  181. }
  182. // HeapXor computes the symmetric difference between many bitmaps quickly (as opposed to calling Xor repeated).
  183. // Internally, this function uses a heap.
  184. // It might be faster than calling Xor repeatedly.
  185. func HeapXor(bitmaps ...*Bitmap) *Bitmap {
  186. if len(bitmaps) == 0 {
  187. return NewBitmap()
  188. }
  189. pq := make(priorityQueue, len(bitmaps))
  190. for i, bm := range bitmaps {
  191. pq[i] = &item{bm, i}
  192. }
  193. heap.Init(&pq)
  194. for pq.Len() > 1 {
  195. x1 := heap.Pop(&pq).(*item)
  196. x2 := heap.Pop(&pq).(*item)
  197. heap.Push(&pq, &item{Xor(x1.value, x2.value), 0})
  198. }
  199. return heap.Pop(&pq).(*item).value
  200. }