You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parallel.go 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. package roaring
  2. import (
  3. "container/heap"
  4. "fmt"
  5. "runtime"
  6. "sync"
  7. )
  8. var defaultWorkerCount = runtime.NumCPU()
  9. type bitmapContainerKey struct {
  10. key uint16
  11. idx int
  12. bitmap *Bitmap
  13. }
  14. type multipleContainers struct {
  15. key uint16
  16. containers []container
  17. idx int
  18. }
  19. type keyedContainer struct {
  20. key uint16
  21. container container
  22. idx int
  23. }
  24. type bitmapContainerHeap []bitmapContainerKey
  25. func (h bitmapContainerHeap) Len() int { return len(h) }
  26. func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key }
  27. func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
  28. func (h *bitmapContainerHeap) Push(x interface{}) {
  29. // Push and Pop use pointer receivers because they modify the slice's length,
  30. // not just its contents.
  31. *h = append(*h, x.(bitmapContainerKey))
  32. }
  33. func (h *bitmapContainerHeap) Pop() interface{} {
  34. old := *h
  35. n := len(old)
  36. x := old[n-1]
  37. *h = old[0 : n-1]
  38. return x
  39. }
  40. func (h bitmapContainerHeap) Peek() bitmapContainerKey {
  41. return h[0]
  42. }
  43. func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) {
  44. k := h.Peek()
  45. key = k.key
  46. container = k.bitmap.highlowcontainer.containers[k.idx]
  47. newIdx := k.idx + 1
  48. if newIdx < k.bitmap.highlowcontainer.size() {
  49. k = bitmapContainerKey{
  50. k.bitmap.highlowcontainer.keys[newIdx],
  51. newIdx,
  52. k.bitmap,
  53. }
  54. (*h)[0] = k
  55. heap.Fix(h, 0)
  56. } else {
  57. heap.Pop(h)
  58. }
  59. return
  60. }
  61. func (h *bitmapContainerHeap) Next(containers []container) multipleContainers {
  62. if h.Len() == 0 {
  63. return multipleContainers{}
  64. }
  65. key, container := h.popIncrementing()
  66. containers = append(containers, container)
  67. for h.Len() > 0 && key == h.Peek().key {
  68. _, container = h.popIncrementing()
  69. containers = append(containers, container)
  70. }
  71. return multipleContainers{
  72. key,
  73. containers,
  74. -1,
  75. }
  76. }
  77. func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap {
  78. // Initialize heap
  79. var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps))
  80. for _, bitmap := range bitmaps {
  81. if !bitmap.IsEmpty() {
  82. key := bitmapContainerKey{
  83. bitmap.highlowcontainer.keys[0],
  84. 0,
  85. bitmap,
  86. }
  87. h = append(h, key)
  88. }
  89. }
  90. heap.Init(&h)
  91. return h
  92. }
  93. func repairAfterLazy(c container) container {
  94. switch t := c.(type) {
  95. case *bitmapContainer:
  96. if t.cardinality == invalidCardinality {
  97. t.computeCardinality()
  98. }
  99. if t.getCardinality() <= arrayDefaultMaxSize {
  100. return t.toArrayContainer()
  101. } else if c.(*bitmapContainer).isFull() {
  102. return newRunContainer16Range(0, MaxUint16)
  103. }
  104. }
  105. return c
  106. }
  107. func toBitmapContainer(c container) container {
  108. switch t := c.(type) {
  109. case *arrayContainer:
  110. return t.toBitmapContainer()
  111. case *runContainer16:
  112. if !t.isFull() {
  113. return t.toBitmapContainer()
  114. }
  115. }
  116. return c
  117. }
  118. func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
  119. expectedKeys := -1
  120. appendedKeys := 0
  121. keys := make([]uint16, 0)
  122. containers := make([]container, 0)
  123. for appendedKeys != expectedKeys {
  124. select {
  125. case item := <-resultChan:
  126. if len(keys) <= item.idx {
  127. keys = append(keys, make([]uint16, item.idx-len(keys)+1)...)
  128. containers = append(containers, make([]container, item.idx-len(containers)+1)...)
  129. }
  130. keys[item.idx] = item.key
  131. containers[item.idx] = item.container
  132. appendedKeys++
  133. case msg := <-expectedKeysChan:
  134. expectedKeys = msg
  135. }
  136. }
  137. answer := &Bitmap{
  138. roaringArray{
  139. make([]uint16, 0, expectedKeys),
  140. make([]container, 0, expectedKeys),
  141. make([]bool, 0, expectedKeys),
  142. false,
  143. nil,
  144. },
  145. }
  146. for i := range keys {
  147. if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
  148. answer.highlowcontainer.appendContainer(keys[i], containers[i], false)
  149. }
  150. }
  151. bitmapChan <- answer
  152. }
  153. // ParHeapOr computes the union (OR) of all provided bitmaps in parallel,
  154. // where the parameter "parallelism" determines how many workers are to be used
  155. // (if it is set to 0, a default number of workers is chosen)
  156. // ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr
  157. func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
  158. bitmapCount := len(bitmaps)
  159. if bitmapCount == 0 {
  160. return NewBitmap()
  161. } else if bitmapCount == 1 {
  162. return bitmaps[0].Clone()
  163. }
  164. if parallelism == 0 {
  165. parallelism = defaultWorkerCount
  166. }
  167. h := newBitmapContainerHeap(bitmaps...)
  168. bitmapChan := make(chan *Bitmap)
  169. inputChan := make(chan multipleContainers, 128)
  170. resultChan := make(chan keyedContainer, 32)
  171. expectedKeysChan := make(chan int)
  172. pool := sync.Pool{
  173. New: func() interface{} {
  174. return make([]container, 0, len(bitmaps))
  175. },
  176. }
  177. orFunc := func() {
  178. // Assumes only structs with >=2 containers are passed
  179. for input := range inputChan {
  180. c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1])
  181. for _, next := range input.containers[2:] {
  182. c = c.lazyIOR(next)
  183. }
  184. c = repairAfterLazy(c)
  185. kx := keyedContainer{
  186. input.key,
  187. c,
  188. input.idx,
  189. }
  190. resultChan <- kx
  191. pool.Put(input.containers[:0])
  192. }
  193. }
  194. go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
  195. for i := 0; i < parallelism; i++ {
  196. go orFunc()
  197. }
  198. idx := 0
  199. for h.Len() > 0 {
  200. ck := h.Next(pool.Get().([]container))
  201. if len(ck.containers) == 1 {
  202. resultChan <- keyedContainer{
  203. ck.key,
  204. ck.containers[0],
  205. idx,
  206. }
  207. pool.Put(ck.containers[:0])
  208. } else {
  209. ck.idx = idx
  210. inputChan <- ck
  211. }
  212. idx++
  213. }
  214. expectedKeysChan <- idx
  215. bitmap := <-bitmapChan
  216. close(inputChan)
  217. close(resultChan)
  218. close(expectedKeysChan)
  219. return bitmap
  220. }
  221. // ParAnd computes the intersection (AND) of all provided bitmaps in parallel,
  222. // where the parameter "parallelism" determines how many workers are to be used
  223. // (if it is set to 0, a default number of workers is chosen)
  224. func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
  225. bitmapCount := len(bitmaps)
  226. if bitmapCount == 0 {
  227. return NewBitmap()
  228. } else if bitmapCount == 1 {
  229. return bitmaps[0].Clone()
  230. }
  231. if parallelism == 0 {
  232. parallelism = defaultWorkerCount
  233. }
  234. h := newBitmapContainerHeap(bitmaps...)
  235. bitmapChan := make(chan *Bitmap)
  236. inputChan := make(chan multipleContainers, 128)
  237. resultChan := make(chan keyedContainer, 32)
  238. expectedKeysChan := make(chan int)
  239. andFunc := func() {
  240. // Assumes only structs with >=2 containers are passed
  241. for input := range inputChan {
  242. c := input.containers[0].and(input.containers[1])
  243. for _, next := range input.containers[2:] {
  244. if c.getCardinality() == 0 {
  245. break
  246. }
  247. c = c.iand(next)
  248. }
  249. // Send a nil explicitly if the result of the intersection is an empty container
  250. if c.getCardinality() == 0 {
  251. c = nil
  252. }
  253. kx := keyedContainer{
  254. input.key,
  255. c,
  256. input.idx,
  257. }
  258. resultChan <- kx
  259. }
  260. }
  261. go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
  262. for i := 0; i < parallelism; i++ {
  263. go andFunc()
  264. }
  265. idx := 0
  266. for h.Len() > 0 {
  267. ck := h.Next(make([]container, 0, 4))
  268. if len(ck.containers) == bitmapCount {
  269. ck.idx = idx
  270. inputChan <- ck
  271. idx++
  272. }
  273. }
  274. expectedKeysChan <- idx
  275. bitmap := <-bitmapChan
  276. close(inputChan)
  277. close(resultChan)
  278. close(expectedKeysChan)
  279. return bitmap
  280. }
  281. // ParOr computes the union (OR) of all provided bitmaps in parallel,
  282. // where the parameter "parallelism" determines how many workers are to be used
  283. // (if it is set to 0, a default number of workers is chosen)
  284. func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
  285. var lKey uint16 = MaxUint16
  286. var hKey uint16 = 0
  287. bitmapsFiltered := bitmaps[:0]
  288. for _, b := range bitmaps {
  289. if !b.IsEmpty() {
  290. bitmapsFiltered = append(bitmapsFiltered, b)
  291. }
  292. }
  293. bitmaps = bitmapsFiltered
  294. for _, b := range bitmaps {
  295. lKey = minOfUint16(lKey, b.highlowcontainer.keys[0])
  296. hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
  297. }
  298. if lKey == MaxUint16 && hKey == 0 {
  299. return New()
  300. } else if len(bitmaps) == 1 {
  301. return bitmaps[0]
  302. }
  303. keyRange := hKey - lKey + 1
  304. if keyRange == 1 {
  305. // revert to FastOr. Since the key range is 0
  306. // no container-level aggregation parallelism is achievable
  307. return FastOr(bitmaps...)
  308. }
  309. if parallelism == 0 {
  310. parallelism = defaultWorkerCount
  311. }
  312. var chunkSize int
  313. var chunkCount int
  314. if parallelism*4 > int(keyRange) {
  315. chunkSize = 1
  316. chunkCount = int(keyRange)
  317. } else {
  318. chunkCount = parallelism * 4
  319. chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
  320. }
  321. if chunkCount*chunkSize < int(keyRange) {
  322. // it's fine to panic to indicate an implementation error
  323. panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
  324. }
  325. chunks := make([]*roaringArray, chunkCount)
  326. chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
  327. chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
  328. orFunc := func() {
  329. for spec := range chunkSpecChan {
  330. ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
  331. for _, b := range bitmaps[2:] {
  332. ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
  333. }
  334. for i, c := range ra.containers {
  335. ra.containers[i] = repairAfterLazy(c)
  336. }
  337. chunkChan <- parChunk{ra, spec.idx}
  338. }
  339. }
  340. for i := 0; i < parallelism; i++ {
  341. go orFunc()
  342. }
  343. go func() {
  344. for i := 0; i < chunkCount; i++ {
  345. spec := parChunkSpec{
  346. start: uint16(int(lKey) + i*chunkSize),
  347. end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
  348. idx: int(i),
  349. }
  350. chunkSpecChan <- spec
  351. }
  352. }()
  353. chunksRemaining := chunkCount
  354. for chunk := range chunkChan {
  355. chunks[chunk.idx] = chunk.ra
  356. chunksRemaining--
  357. if chunksRemaining == 0 {
  358. break
  359. }
  360. }
  361. close(chunkChan)
  362. close(chunkSpecChan)
  363. containerCount := 0
  364. for _, chunk := range chunks {
  365. containerCount += chunk.size()
  366. }
  367. result := Bitmap{
  368. roaringArray{
  369. containers: make([]container, containerCount),
  370. keys: make([]uint16, containerCount),
  371. needCopyOnWrite: make([]bool, containerCount),
  372. },
  373. }
  374. resultOffset := 0
  375. for _, chunk := range chunks {
  376. copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
  377. copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
  378. copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
  379. resultOffset += chunk.size()
  380. }
  381. return &result
  382. }
  383. type parChunkSpec struct {
  384. start uint16
  385. end uint16
  386. idx int
  387. }
  388. type parChunk struct {
  389. ra *roaringArray
  390. idx int
  391. }
  392. func (c parChunk) size() int {
  393. return c.ra.size()
  394. }
  395. func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int {
  396. for idx, key := range ra.keys {
  397. if key >= start && key <= last {
  398. return idx
  399. } else if key > last {
  400. break
  401. }
  402. }
  403. return ra.size()
  404. }
  405. func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
  406. answer := newRoaringArray()
  407. length1 := ra1.size()
  408. length2 := ra2.size()
  409. idx1 := parNaiveStartAt(ra1, start, last)
  410. idx2 := parNaiveStartAt(ra2, start, last)
  411. var key1 uint16
  412. var key2 uint16
  413. if idx1 < length1 && idx2 < length2 {
  414. key1 = ra1.getKeyAtIndex(idx1)
  415. key2 = ra2.getKeyAtIndex(idx2)
  416. for key1 <= last && key2 <= last {
  417. if key1 < key2 {
  418. answer.appendCopy(*ra1, idx1)
  419. idx1++
  420. if idx1 == length1 {
  421. break
  422. }
  423. key1 = ra1.getKeyAtIndex(idx1)
  424. } else if key1 > key2 {
  425. answer.appendCopy(*ra2, idx2)
  426. idx2++
  427. if idx2 == length2 {
  428. break
  429. }
  430. key2 = ra2.getKeyAtIndex(idx2)
  431. } else {
  432. c1 := ra1.getFastContainerAtIndex(idx1, false)
  433. answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
  434. idx1++
  435. idx2++
  436. if idx1 == length1 || idx2 == length2 {
  437. break
  438. }
  439. key1 = ra1.getKeyAtIndex(idx1)
  440. key2 = ra2.getKeyAtIndex(idx2)
  441. }
  442. }
  443. }
  444. if idx2 < length2 {
  445. key2 = ra2.getKeyAtIndex(idx2)
  446. for key2 <= last {
  447. answer.appendCopy(*ra2, idx2)
  448. idx2++
  449. if idx2 == length2 {
  450. break
  451. }
  452. key2 = ra2.getKeyAtIndex(idx2)
  453. }
  454. }
  455. if idx1 < length1 {
  456. key1 = ra1.getKeyAtIndex(idx1)
  457. for key1 <= last {
  458. answer.appendCopy(*ra1, idx1)
  459. idx1++
  460. if idx1 == length1 {
  461. break
  462. }
  463. key1 = ra1.getKeyAtIndex(idx1)
  464. }
  465. }
  466. return answer
  467. }
  468. func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
  469. length1 := ra1.size()
  470. length2 := ra2.size()
  471. idx1 := 0
  472. idx2 := parNaiveStartAt(ra2, start, last)
  473. var key1 uint16
  474. var key2 uint16
  475. if idx1 < length1 && idx2 < length2 {
  476. key1 = ra1.getKeyAtIndex(idx1)
  477. key2 = ra2.getKeyAtIndex(idx2)
  478. for key1 <= last && key2 <= last {
  479. if key1 < key2 {
  480. idx1++
  481. if idx1 >= length1 {
  482. break
  483. }
  484. key1 = ra1.getKeyAtIndex(idx1)
  485. } else if key1 > key2 {
  486. ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
  487. ra1.needCopyOnWrite[idx1] = true
  488. idx2++
  489. idx1++
  490. length1++
  491. if idx2 >= length2 {
  492. break
  493. }
  494. key2 = ra2.getKeyAtIndex(idx2)
  495. } else {
  496. c1 := ra1.getFastContainerAtIndex(idx1, true)
  497. ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
  498. ra1.needCopyOnWrite[idx1] = false
  499. idx1++
  500. idx2++
  501. if idx1 >= length1 || idx2 >= length2 {
  502. break
  503. }
  504. key1 = ra1.getKeyAtIndex(idx1)
  505. key2 = ra2.getKeyAtIndex(idx2)
  506. }
  507. }
  508. }
  509. if idx2 < length2 {
  510. key2 = ra2.getKeyAtIndex(idx2)
  511. for key2 <= last {
  512. ra1.appendCopy(*ra2, idx2)
  513. idx2++
  514. if idx2 >= length2 {
  515. break
  516. }
  517. key2 = ra2.getKeyAtIndex(idx2)
  518. }
  519. }
  520. return ra1
  521. }