You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rle.go 42KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667
  1. package roaring
  2. //
  3. // Copyright (c) 2016 by the roaring authors.
  4. // Licensed under the Apache License, Version 2.0.
  5. //
  6. // We derive a few lines of code from the sort.Search
  7. // function in the golang standard library. That function
  8. // is Copyright 2009 The Go Authors, and licensed
  9. // under the following BSD-style license.
  10. /*
  11. Copyright (c) 2009 The Go Authors. All rights reserved.
  12. Redistribution and use in source and binary forms, with or without
  13. modification, are permitted provided that the following conditions are
  14. met:
  15. * Redistributions of source code must retain the above copyright
  16. notice, this list of conditions and the following disclaimer.
  17. * Redistributions in binary form must reproduce the above
  18. copyright notice, this list of conditions and the following disclaimer
  19. in the documentation and/or other materials provided with the
  20. distribution.
  21. * Neither the name of Google Inc. nor the names of its
  22. contributors may be used to endorse or promote products derived from
  23. this software without specific prior written permission.
  24. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  28. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  30. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  31. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  32. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  33. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  34. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35. */
  36. import (
  37. "fmt"
  38. "sort"
  39. "unsafe"
  40. )
  41. //go:generate msgp -unexported
  42. // runContainer32 does run-length encoding of sets of
  43. // uint32 integers.
  44. type runContainer32 struct {
  45. iv []interval32
  46. card int64
  47. // avoid allocation during search
  48. myOpts searchOptions `msg:"-"`
  49. }
  50. // interval32 is the internal to runContainer32
  51. // structure that maintains the individual [Start, last]
  52. // closed intervals.
  53. type interval32 struct {
  54. start uint32
  55. last uint32
  56. }
  57. // runlen returns the count of integers in the interval.
  58. func (iv interval32) runlen() int64 {
  59. return 1 + int64(iv.last) - int64(iv.start)
  60. }
  61. // String produces a human viewable string of the contents.
  62. func (iv interval32) String() string {
  63. return fmt.Sprintf("[%d, %d]", iv.start, iv.last)
  64. }
  65. func ivalString32(iv []interval32) string {
  66. var s string
  67. var j int
  68. var p interval32
  69. for j, p = range iv {
  70. s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last)
  71. }
  72. return s
  73. }
  74. // String produces a human viewable string of the contents.
  75. func (rc *runContainer32) String() string {
  76. if len(rc.iv) == 0 {
  77. return "runContainer32{}"
  78. }
  79. is := ivalString32(rc.iv)
  80. return `runContainer32{` + is + `}`
  81. }
  82. // uint32Slice is a sort.Sort convenience method
  83. type uint32Slice []uint32
  84. // Len returns the length of p.
  85. func (p uint32Slice) Len() int { return len(p) }
  86. // Less returns p[i] < p[j]
  87. func (p uint32Slice) Less(i, j int) bool { return p[i] < p[j] }
  88. // Swap swaps elements i and j.
  89. func (p uint32Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  90. //msgp:ignore addHelper
  91. // addHelper helps build a runContainer32.
  92. type addHelper32 struct {
  93. runstart uint32
  94. runlen uint32
  95. actuallyAdded uint32
  96. m []interval32
  97. rc *runContainer32
  98. }
  99. func (ah *addHelper32) storeIval(runstart, runlen uint32) {
  100. mi := interval32{start: runstart, last: runstart + runlen}
  101. ah.m = append(ah.m, mi)
  102. }
  103. func (ah *addHelper32) add(cur, prev uint32, i int) {
  104. if cur == prev+1 {
  105. ah.runlen++
  106. ah.actuallyAdded++
  107. } else {
  108. if cur < prev {
  109. panic(fmt.Sprintf("newRunContainer32FromVals sees "+
  110. "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+
  111. " before calling us with alreadySorted == true.", i, cur, prev))
  112. }
  113. if cur == prev {
  114. // ignore duplicates
  115. } else {
  116. ah.actuallyAdded++
  117. ah.storeIval(ah.runstart, ah.runlen)
  118. ah.runstart = cur
  119. ah.runlen = 0
  120. }
  121. }
  122. }
  123. // newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast]
  124. func newRunContainer32Range(rangestart uint32, rangelast uint32) *runContainer32 {
  125. rc := &runContainer32{}
  126. rc.iv = append(rc.iv, interval32{start: rangestart, last: rangelast})
  127. return rc
  128. }
  129. // newRunContainer32FromVals makes a new container from vals.
  130. //
  131. // For efficiency, vals should be sorted in ascending order.
  132. // Ideally vals should not contain duplicates, but we detect and
  133. // ignore them. If vals is already sorted in ascending order, then
  134. // pass alreadySorted = true. Otherwise, for !alreadySorted,
  135. // we will sort vals before creating a runContainer32 of them.
  136. // We sort the original vals, so this will change what the
  137. // caller sees in vals as a side effect.
  138. func newRunContainer32FromVals(alreadySorted bool, vals ...uint32) *runContainer32 {
  139. // keep this in sync with newRunContainer32FromArray below
  140. rc := &runContainer32{}
  141. ah := addHelper32{rc: rc}
  142. if !alreadySorted {
  143. sort.Sort(uint32Slice(vals))
  144. }
  145. n := len(vals)
  146. var cur, prev uint32
  147. switch {
  148. case n == 0:
  149. // nothing more
  150. case n == 1:
  151. ah.m = append(ah.m, interval32{start: vals[0], last: vals[0]})
  152. ah.actuallyAdded++
  153. default:
  154. ah.runstart = vals[0]
  155. ah.actuallyAdded++
  156. for i := 1; i < n; i++ {
  157. prev = vals[i-1]
  158. cur = vals[i]
  159. ah.add(cur, prev, i)
  160. }
  161. ah.storeIval(ah.runstart, ah.runlen)
  162. }
  163. rc.iv = ah.m
  164. rc.card = int64(ah.actuallyAdded)
  165. return rc
  166. }
  167. // newRunContainer32FromBitmapContainer makes a new run container from bc,
  168. // somewhat efficiently. For reference, see the Java
  169. // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192
  170. func newRunContainer32FromBitmapContainer(bc *bitmapContainer) *runContainer32 {
  171. rc := &runContainer32{}
  172. nbrRuns := bc.numberOfRuns()
  173. if nbrRuns == 0 {
  174. return rc
  175. }
  176. rc.iv = make([]interval32, nbrRuns)
  177. longCtr := 0 // index of current long in bitmap
  178. curWord := bc.bitmap[0] // its value
  179. runCount := 0
  180. for {
  181. // potentially multiword advance to first 1 bit
  182. for curWord == 0 && longCtr < len(bc.bitmap)-1 {
  183. longCtr++
  184. curWord = bc.bitmap[longCtr]
  185. }
  186. if curWord == 0 {
  187. // wrap up, no more runs
  188. return rc
  189. }
  190. localRunStart := countTrailingZeros(curWord)
  191. runStart := localRunStart + 64*longCtr
  192. // stuff 1s into number's LSBs
  193. curWordWith1s := curWord | (curWord - 1)
  194. // find the next 0, potentially in a later word
  195. runEnd := 0
  196. for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 {
  197. longCtr++
  198. curWordWith1s = bc.bitmap[longCtr]
  199. }
  200. if curWordWith1s == maxWord {
  201. // a final unterminated run of 1s
  202. runEnd = wordSizeInBits + longCtr*64
  203. rc.iv[runCount].start = uint32(runStart)
  204. rc.iv[runCount].last = uint32(runEnd) - 1
  205. return rc
  206. }
  207. localRunEnd := countTrailingZeros(^curWordWith1s)
  208. runEnd = localRunEnd + longCtr*64
  209. rc.iv[runCount].start = uint32(runStart)
  210. rc.iv[runCount].last = uint32(runEnd) - 1
  211. runCount++
  212. // now, zero out everything right of runEnd.
  213. curWord = curWordWith1s & (curWordWith1s + 1)
  214. // We've lathered and rinsed, so repeat...
  215. }
  216. }
  217. //
  218. // newRunContainer32FromArray populates a new
  219. // runContainer32 from the contents of arr.
  220. //
  221. func newRunContainer32FromArray(arr *arrayContainer) *runContainer32 {
  222. // keep this in sync with newRunContainer32FromVals above
  223. rc := &runContainer32{}
  224. ah := addHelper32{rc: rc}
  225. n := arr.getCardinality()
  226. var cur, prev uint32
  227. switch {
  228. case n == 0:
  229. // nothing more
  230. case n == 1:
  231. ah.m = append(ah.m, interval32{start: uint32(arr.content[0]), last: uint32(arr.content[0])})
  232. ah.actuallyAdded++
  233. default:
  234. ah.runstart = uint32(arr.content[0])
  235. ah.actuallyAdded++
  236. for i := 1; i < n; i++ {
  237. prev = uint32(arr.content[i-1])
  238. cur = uint32(arr.content[i])
  239. ah.add(cur, prev, i)
  240. }
  241. ah.storeIval(ah.runstart, ah.runlen)
  242. }
  243. rc.iv = ah.m
  244. rc.card = int64(ah.actuallyAdded)
  245. return rc
  246. }
  247. // set adds the integers in vals to the set. Vals
  248. // must be sorted in increasing order; if not, you should set
  249. // alreadySorted to false, and we will sort them in place for you.
  250. // (Be aware of this side effect -- it will affect the callers
  251. // view of vals).
  252. //
  253. // If you have a small number of additions to an already
  254. // big runContainer32, calling Add() may be faster.
  255. func (rc *runContainer32) set(alreadySorted bool, vals ...uint32) {
  256. rc2 := newRunContainer32FromVals(alreadySorted, vals...)
  257. un := rc.union(rc2)
  258. rc.iv = un.iv
  259. rc.card = 0
  260. }
  261. // canMerge returns true if the intervals
  262. // a and b either overlap or they are
  263. // contiguous and so can be merged into
  264. // a single interval.
  265. func canMerge32(a, b interval32) bool {
  266. if int64(a.last)+1 < int64(b.start) {
  267. return false
  268. }
  269. return int64(b.last)+1 >= int64(a.start)
  270. }
  271. // haveOverlap differs from canMerge in that
  272. // it tells you if the intersection of a
  273. // and b would contain an element (otherwise
  274. // it would be the empty set, and we return
  275. // false).
  276. func haveOverlap32(a, b interval32) bool {
  277. if int64(a.last)+1 <= int64(b.start) {
  278. return false
  279. }
  280. return int64(b.last)+1 > int64(a.start)
  281. }
  282. // mergeInterval32s joins a and b into a
  283. // new interval, and panics if it cannot.
  284. func mergeInterval32s(a, b interval32) (res interval32) {
  285. if !canMerge32(a, b) {
  286. panic(fmt.Sprintf("cannot merge %#v and %#v", a, b))
  287. }
  288. if b.start < a.start {
  289. res.start = b.start
  290. } else {
  291. res.start = a.start
  292. }
  293. if b.last > a.last {
  294. res.last = b.last
  295. } else {
  296. res.last = a.last
  297. }
  298. return
  299. }
  300. // intersectInterval32s returns the intersection
  301. // of a and b. The isEmpty flag will be true if
  302. // a and b were disjoint.
  303. func intersectInterval32s(a, b interval32) (res interval32, isEmpty bool) {
  304. if !haveOverlap32(a, b) {
  305. isEmpty = true
  306. return
  307. }
  308. if b.start > a.start {
  309. res.start = b.start
  310. } else {
  311. res.start = a.start
  312. }
  313. if b.last < a.last {
  314. res.last = b.last
  315. } else {
  316. res.last = a.last
  317. }
  318. return
  319. }
  320. // union merges two runContainer32s, producing
  321. // a new runContainer32 with the union of rc and b.
  322. func (rc *runContainer32) union(b *runContainer32) *runContainer32 {
  323. // rc is also known as 'a' here, but golint insisted we
  324. // call it rc for consistency with the rest of the methods.
  325. var m []interval32
  326. alim := int64(len(rc.iv))
  327. blim := int64(len(b.iv))
  328. var na int64 // next from a
  329. var nb int64 // next from b
  330. // merged holds the current merge output, which might
  331. // get additional merges before being appended to m.
  332. var merged interval32
  333. var mergedUsed bool // is merged being used at the moment?
  334. var cura interval32 // currently considering this interval32 from a
  335. var curb interval32 // currently considering this interval32 from b
  336. pass := 0
  337. for na < alim && nb < blim {
  338. pass++
  339. cura = rc.iv[na]
  340. curb = b.iv[nb]
  341. if mergedUsed {
  342. mergedUpdated := false
  343. if canMerge32(cura, merged) {
  344. merged = mergeInterval32s(cura, merged)
  345. na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1)
  346. mergedUpdated = true
  347. }
  348. if canMerge32(curb, merged) {
  349. merged = mergeInterval32s(curb, merged)
  350. nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1)
  351. mergedUpdated = true
  352. }
  353. if !mergedUpdated {
  354. // we know that merged is disjoint from cura and curb
  355. m = append(m, merged)
  356. mergedUsed = false
  357. }
  358. continue
  359. } else {
  360. // !mergedUsed
  361. if !canMerge32(cura, curb) {
  362. if cura.start < curb.start {
  363. m = append(m, cura)
  364. na++
  365. } else {
  366. m = append(m, curb)
  367. nb++
  368. }
  369. } else {
  370. merged = mergeInterval32s(cura, curb)
  371. mergedUsed = true
  372. na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1)
  373. nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1)
  374. }
  375. }
  376. }
  377. var aDone, bDone bool
  378. if na >= alim {
  379. aDone = true
  380. }
  381. if nb >= blim {
  382. bDone = true
  383. }
  384. // finish by merging anything remaining into merged we can:
  385. if mergedUsed {
  386. if !aDone {
  387. aAdds:
  388. for na < alim {
  389. cura = rc.iv[na]
  390. if canMerge32(cura, merged) {
  391. merged = mergeInterval32s(cura, merged)
  392. na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1)
  393. } else {
  394. break aAdds
  395. }
  396. }
  397. }
  398. if !bDone {
  399. bAdds:
  400. for nb < blim {
  401. curb = b.iv[nb]
  402. if canMerge32(curb, merged) {
  403. merged = mergeInterval32s(curb, merged)
  404. nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1)
  405. } else {
  406. break bAdds
  407. }
  408. }
  409. }
  410. m = append(m, merged)
  411. }
  412. if na < alim {
  413. m = append(m, rc.iv[na:]...)
  414. }
  415. if nb < blim {
  416. m = append(m, b.iv[nb:]...)
  417. }
  418. res := &runContainer32{iv: m}
  419. return res
  420. }
  421. // unionCardinality returns the cardinality of the merger of two runContainer32s, the union of rc and b.
  422. func (rc *runContainer32) unionCardinality(b *runContainer32) uint64 {
  423. // rc is also known as 'a' here, but golint insisted we
  424. // call it rc for consistency with the rest of the methods.
  425. answer := uint64(0)
  426. alim := int64(len(rc.iv))
  427. blim := int64(len(b.iv))
  428. var na int64 // next from a
  429. var nb int64 // next from b
  430. // merged holds the current merge output, which might
  431. // get additional merges before being appended to m.
  432. var merged interval32
  433. var mergedUsed bool // is merged being used at the moment?
  434. var cura interval32 // currently considering this interval32 from a
  435. var curb interval32 // currently considering this interval32 from b
  436. pass := 0
  437. for na < alim && nb < blim {
  438. pass++
  439. cura = rc.iv[na]
  440. curb = b.iv[nb]
  441. if mergedUsed {
  442. mergedUpdated := false
  443. if canMerge32(cura, merged) {
  444. merged = mergeInterval32s(cura, merged)
  445. na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1)
  446. mergedUpdated = true
  447. }
  448. if canMerge32(curb, merged) {
  449. merged = mergeInterval32s(curb, merged)
  450. nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1)
  451. mergedUpdated = true
  452. }
  453. if !mergedUpdated {
  454. // we know that merged is disjoint from cura and curb
  455. //m = append(m, merged)
  456. answer += uint64(merged.last) - uint64(merged.start) + 1
  457. mergedUsed = false
  458. }
  459. continue
  460. } else {
  461. // !mergedUsed
  462. if !canMerge32(cura, curb) {
  463. if cura.start < curb.start {
  464. answer += uint64(cura.last) - uint64(cura.start) + 1
  465. //m = append(m, cura)
  466. na++
  467. } else {
  468. answer += uint64(curb.last) - uint64(curb.start) + 1
  469. //m = append(m, curb)
  470. nb++
  471. }
  472. } else {
  473. merged = mergeInterval32s(cura, curb)
  474. mergedUsed = true
  475. na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1)
  476. nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1)
  477. }
  478. }
  479. }
  480. var aDone, bDone bool
  481. if na >= alim {
  482. aDone = true
  483. }
  484. if nb >= blim {
  485. bDone = true
  486. }
  487. // finish by merging anything remaining into merged we can:
  488. if mergedUsed {
  489. if !aDone {
  490. aAdds:
  491. for na < alim {
  492. cura = rc.iv[na]
  493. if canMerge32(cura, merged) {
  494. merged = mergeInterval32s(cura, merged)
  495. na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1)
  496. } else {
  497. break aAdds
  498. }
  499. }
  500. }
  501. if !bDone {
  502. bAdds:
  503. for nb < blim {
  504. curb = b.iv[nb]
  505. if canMerge32(curb, merged) {
  506. merged = mergeInterval32s(curb, merged)
  507. nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1)
  508. } else {
  509. break bAdds
  510. }
  511. }
  512. }
  513. //m = append(m, merged)
  514. answer += uint64(merged.last) - uint64(merged.start) + 1
  515. }
  516. for _, r := range rc.iv[na:] {
  517. answer += uint64(r.last) - uint64(r.start) + 1
  518. }
  519. for _, r := range b.iv[nb:] {
  520. answer += uint64(r.last) - uint64(r.start) + 1
  521. }
  522. return answer
  523. }
  524. // indexOfIntervalAtOrAfter is a helper for union.
  525. func (rc *runContainer32) indexOfIntervalAtOrAfter(key int64, startIndex int64) int64 {
  526. rc.myOpts.startIndex = startIndex
  527. rc.myOpts.endxIndex = 0
  528. w, already, _ := rc.search(key, &rc.myOpts)
  529. if already {
  530. return w
  531. }
  532. return w + 1
  533. }
  534. // intersect returns a new runContainer32 holding the
  535. // intersection of rc (also known as 'a') and b.
  536. func (rc *runContainer32) intersect(b *runContainer32) *runContainer32 {
  537. a := rc
  538. numa := int64(len(a.iv))
  539. numb := int64(len(b.iv))
  540. res := &runContainer32{}
  541. if numa == 0 || numb == 0 {
  542. return res
  543. }
  544. if numa == 1 && numb == 1 {
  545. if !haveOverlap32(a.iv[0], b.iv[0]) {
  546. return res
  547. }
  548. }
  549. var output []interval32
  550. var acuri int64
  551. var bcuri int64
  552. astart := int64(a.iv[acuri].start)
  553. bstart := int64(b.iv[bcuri].start)
  554. var intersection interval32
  555. var leftoverstart int64
  556. var isOverlap, isLeftoverA, isLeftoverB bool
  557. var done bool
  558. pass := 0
  559. toploop:
  560. for acuri < numa && bcuri < numb {
  561. pass++
  562. isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover32(astart, int64(a.iv[acuri].last), bstart, int64(b.iv[bcuri].last))
  563. if !isOverlap {
  564. switch {
  565. case astart < bstart:
  566. acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart)
  567. if done {
  568. break toploop
  569. }
  570. astart = int64(a.iv[acuri].start)
  571. case astart > bstart:
  572. bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart)
  573. if done {
  574. break toploop
  575. }
  576. bstart = int64(b.iv[bcuri].start)
  577. //default:
  578. // panic("impossible that astart == bstart, since !isOverlap")
  579. }
  580. } else {
  581. // isOverlap
  582. output = append(output, intersection)
  583. switch {
  584. case isLeftoverA:
  585. // note that we change astart without advancing acuri,
  586. // since we need to capture any 2ndary intersections with a.iv[acuri]
  587. astart = leftoverstart
  588. bcuri++
  589. if bcuri >= numb {
  590. break toploop
  591. }
  592. bstart = int64(b.iv[bcuri].start)
  593. case isLeftoverB:
  594. // note that we change bstart without advancing bcuri,
  595. // since we need to capture any 2ndary intersections with b.iv[bcuri]
  596. bstart = leftoverstart
  597. acuri++
  598. if acuri >= numa {
  599. break toploop
  600. }
  601. astart = int64(a.iv[acuri].start)
  602. default:
  603. // neither had leftover, both completely consumed
  604. // optionally, assert for sanity:
  605. //if a.iv[acuri].endx != b.iv[bcuri].endx {
  606. // panic("huh? should only be possible that endx agree now!")
  607. //}
  608. // advance to next a interval
  609. acuri++
  610. if acuri >= numa {
  611. break toploop
  612. }
  613. astart = int64(a.iv[acuri].start)
  614. // advance to next b interval
  615. bcuri++
  616. if bcuri >= numb {
  617. break toploop
  618. }
  619. bstart = int64(b.iv[bcuri].start)
  620. }
  621. }
  622. } // end for toploop
  623. if len(output) == 0 {
  624. return res
  625. }
  626. res.iv = output
  627. return res
  628. }
  629. // intersectCardinality returns the cardinality of the
  630. // intersection of rc (also known as 'a') and b.
  631. func (rc *runContainer32) intersectCardinality(b *runContainer32) int64 {
  632. answer := int64(0)
  633. a := rc
  634. numa := int64(len(a.iv))
  635. numb := int64(len(b.iv))
  636. if numa == 0 || numb == 0 {
  637. return 0
  638. }
  639. if numa == 1 && numb == 1 {
  640. if !haveOverlap32(a.iv[0], b.iv[0]) {
  641. return 0
  642. }
  643. }
  644. var acuri int64
  645. var bcuri int64
  646. astart := int64(a.iv[acuri].start)
  647. bstart := int64(b.iv[bcuri].start)
  648. var intersection interval32
  649. var leftoverstart int64
  650. var isOverlap, isLeftoverA, isLeftoverB bool
  651. var done bool
  652. pass := 0
  653. toploop:
  654. for acuri < numa && bcuri < numb {
  655. pass++
  656. isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover32(astart, int64(a.iv[acuri].last), bstart, int64(b.iv[bcuri].last))
  657. if !isOverlap {
  658. switch {
  659. case astart < bstart:
  660. acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart)
  661. if done {
  662. break toploop
  663. }
  664. astart = int64(a.iv[acuri].start)
  665. case astart > bstart:
  666. bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart)
  667. if done {
  668. break toploop
  669. }
  670. bstart = int64(b.iv[bcuri].start)
  671. //default:
  672. // panic("impossible that astart == bstart, since !isOverlap")
  673. }
  674. } else {
  675. // isOverlap
  676. answer += int64(intersection.last) - int64(intersection.start) + 1
  677. switch {
  678. case isLeftoverA:
  679. // note that we change astart without advancing acuri,
  680. // since we need to capture any 2ndary intersections with a.iv[acuri]
  681. astart = leftoverstart
  682. bcuri++
  683. if bcuri >= numb {
  684. break toploop
  685. }
  686. bstart = int64(b.iv[bcuri].start)
  687. case isLeftoverB:
  688. // note that we change bstart without advancing bcuri,
  689. // since we need to capture any 2ndary intersections with b.iv[bcuri]
  690. bstart = leftoverstart
  691. acuri++
  692. if acuri >= numa {
  693. break toploop
  694. }
  695. astart = int64(a.iv[acuri].start)
  696. default:
  697. // neither had leftover, both completely consumed
  698. // optionally, assert for sanity:
  699. //if a.iv[acuri].endx != b.iv[bcuri].endx {
  700. // panic("huh? should only be possible that endx agree now!")
  701. //}
  702. // advance to next a interval
  703. acuri++
  704. if acuri >= numa {
  705. break toploop
  706. }
  707. astart = int64(a.iv[acuri].start)
  708. // advance to next b interval
  709. bcuri++
  710. if bcuri >= numb {
  711. break toploop
  712. }
  713. bstart = int64(b.iv[bcuri].start)
  714. }
  715. }
  716. } // end for toploop
  717. return answer
  718. }
  719. // get returns true if key is in the container.
  720. func (rc *runContainer32) contains(key uint32) bool {
  721. _, in, _ := rc.search(int64(key), nil)
  722. return in
  723. }
  724. // numIntervals returns the count of intervals in the container.
  725. func (rc *runContainer32) numIntervals() int {
  726. return len(rc.iv)
  727. }
  728. // search returns alreadyPresent to indicate if the
  729. // key is already in one of our interval32s.
  730. //
  731. // If key is alreadyPresent, then whichInterval32 tells
  732. // you where.
  733. //
  734. // If key is not already present, then whichInterval32 is
  735. // set as follows:
  736. //
  737. // a) whichInterval32 == len(rc.iv)-1 if key is beyond our
  738. // last interval32 in rc.iv;
  739. //
  740. // b) whichInterval32 == -1 if key is before our first
  741. // interval32 in rc.iv;
  742. //
  743. // c) whichInterval32 is set to the minimum index of rc.iv
  744. // which comes strictly before the key;
  745. // so rc.iv[whichInterval32].last < key,
  746. // and if whichInterval32+1 exists, then key < rc.iv[whichInterval32+1].start
  747. // (Note that whichInterval32+1 won't exist when
  748. // whichInterval32 is the last interval.)
  749. //
  750. // runContainer32.search always returns whichInterval32 < len(rc.iv).
  751. //
  752. // If not nil, opts can be used to further restrict
  753. // the search space.
  754. //
  755. func (rc *runContainer32) search(key int64, opts *searchOptions) (whichInterval32 int64, alreadyPresent bool, numCompares int) {
  756. n := int64(len(rc.iv))
  757. if n == 0 {
  758. return -1, false, 0
  759. }
  760. startIndex := int64(0)
  761. endxIndex := n
  762. if opts != nil {
  763. startIndex = opts.startIndex
  764. // let endxIndex == 0 mean no effect
  765. if opts.endxIndex > 0 {
  766. endxIndex = opts.endxIndex
  767. }
  768. }
  769. // sort.Search returns the smallest index i
  770. // in [0, n) at which f(i) is true, assuming that on the range [0, n),
  771. // f(i) == true implies f(i+1) == true.
  772. // If there is no such index, Search returns n.
  773. // For correctness, this began as verbatim snippet from
  774. // sort.Search in the Go standard lib.
  775. // We inline our comparison function for speed, and
  776. // annotate with numCompares
  777. // to observe and test that extra bounds are utilized.
  778. i, j := startIndex, endxIndex
  779. for i < j {
  780. h := i + (j-i)/2 // avoid overflow when computing h as the bisector
  781. // i <= h < j
  782. numCompares++
  783. if !(key < int64(rc.iv[h].start)) {
  784. i = h + 1
  785. } else {
  786. j = h
  787. }
  788. }
  789. below := i
  790. // end std lib snippet.
  791. // The above is a simple in-lining and annotation of:
  792. /* below := sort.Search(n,
  793. func(i int) bool {
  794. return key < rc.iv[i].start
  795. })
  796. */
  797. whichInterval32 = below - 1
  798. if below == n {
  799. // all falses => key is >= start of all interval32s
  800. // ... so does it belong to the last interval32?
  801. if key < int64(rc.iv[n-1].last)+1 {
  802. // yes, it belongs to the last interval32
  803. alreadyPresent = true
  804. return
  805. }
  806. // no, it is beyond the last interval32.
  807. // leave alreadyPreset = false
  808. return
  809. }
  810. // INVAR: key is below rc.iv[below]
  811. if below == 0 {
  812. // key is before the first first interval32.
  813. // leave alreadyPresent = false
  814. return
  815. }
  816. // INVAR: key is >= rc.iv[below-1].start and
  817. // key is < rc.iv[below].start
  818. // is key in below-1 interval32?
  819. if key >= int64(rc.iv[below-1].start) && key < int64(rc.iv[below-1].last)+1 {
  820. // yes, it is. key is in below-1 interval32.
  821. alreadyPresent = true
  822. return
  823. }
  824. // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start
  825. // leave alreadyPresent = false
  826. return
  827. }
  828. // cardinality returns the count of the integers stored in the
  829. // runContainer32.
  830. func (rc *runContainer32) cardinality() int64 {
  831. if len(rc.iv) == 0 {
  832. rc.card = 0
  833. return 0
  834. }
  835. if rc.card > 0 {
  836. return rc.card // already cached
  837. }
  838. // have to compute it
  839. var n int64
  840. for _, p := range rc.iv {
  841. n += p.runlen()
  842. }
  843. rc.card = n // cache it
  844. return n
  845. }
  846. // AsSlice decompresses the contents into a []uint32 slice.
  847. func (rc *runContainer32) AsSlice() []uint32 {
  848. s := make([]uint32, rc.cardinality())
  849. j := 0
  850. for _, p := range rc.iv {
  851. for i := p.start; i <= p.last; i++ {
  852. s[j] = i
  853. j++
  854. }
  855. }
  856. return s
  857. }
  858. // newRunContainer32 creates an empty run container.
  859. func newRunContainer32() *runContainer32 {
  860. return &runContainer32{}
  861. }
  862. // newRunContainer32CopyIv creates a run container, initializing
  863. // with a copy of the supplied iv slice.
  864. //
  865. func newRunContainer32CopyIv(iv []interval32) *runContainer32 {
  866. rc := &runContainer32{
  867. iv: make([]interval32, len(iv)),
  868. }
  869. copy(rc.iv, iv)
  870. return rc
  871. }
  872. func (rc *runContainer32) Clone() *runContainer32 {
  873. rc2 := newRunContainer32CopyIv(rc.iv)
  874. return rc2
  875. }
  876. // newRunContainer32TakeOwnership returns a new runContainer32
  877. // backed by the provided iv slice, which we will
  878. // assume exclusive control over from now on.
  879. //
  880. func newRunContainer32TakeOwnership(iv []interval32) *runContainer32 {
  881. rc := &runContainer32{
  882. iv: iv,
  883. }
  884. return rc
  885. }
  886. const baseRc32Size = int(unsafe.Sizeof(runContainer32{}))
  887. const perIntervalRc32Size = int(unsafe.Sizeof(interval32{}))
  888. const baseDiskRc32Size = int(unsafe.Sizeof(uint32(0)))
  889. // see also runContainer32SerializedSizeInBytes(numRuns int) int
  890. // getSizeInBytes returns the number of bytes of memory
  891. // required by this runContainer32.
  892. func (rc *runContainer32) getSizeInBytes() int {
  893. return perIntervalRc32Size*len(rc.iv) + baseRc32Size
  894. }
  895. // runContainer32SerializedSizeInBytes returns the number of bytes of disk
  896. // required to hold numRuns in a runContainer32.
  897. func runContainer32SerializedSizeInBytes(numRuns int) int {
  898. return perIntervalRc32Size*numRuns + baseDiskRc32Size
  899. }
  900. // Add adds a single value k to the set.
  901. func (rc *runContainer32) Add(k uint32) (wasNew bool) {
  902. // TODO comment from runContainer32.java:
  903. // it might be better and simpler to do return
  904. // toBitmapOrArrayContainer(getCardinality()).add(k)
  905. // but note that some unit tests use this method to build up test
  906. // runcontainers without calling runOptimize
  907. k64 := int64(k)
  908. index, present, _ := rc.search(k64, nil)
  909. if present {
  910. return // already there
  911. }
  912. wasNew = true
  913. // increment card if it is cached already
  914. if rc.card > 0 {
  915. rc.card++
  916. }
  917. n := int64(len(rc.iv))
  918. if index == -1 {
  919. // we may need to extend the first run
  920. if n > 0 {
  921. if rc.iv[0].start == k+1 {
  922. rc.iv[0].start = k
  923. return
  924. }
  925. }
  926. // nope, k stands alone, starting the new first interval32.
  927. rc.iv = append([]interval32{{start: k, last: k}}, rc.iv...)
  928. return
  929. }
  930. // are we off the end? handle both index == n and index == n-1:
  931. if index >= n-1 {
  932. if int64(rc.iv[n-1].last)+1 == k64 {
  933. rc.iv[n-1].last++
  934. return
  935. }
  936. rc.iv = append(rc.iv, interval32{start: k, last: k})
  937. return
  938. }
  939. // INVAR: index and index+1 both exist, and k goes between them.
  940. //
  941. // Now: add k into the middle,
  942. // possibly fusing with index or index+1 interval32
  943. // and possibly resulting in fusing of two interval32s
  944. // that had a one integer gap.
  945. left := index
  946. right := index + 1
  947. // are we fusing left and right by adding k?
  948. if int64(rc.iv[left].last)+1 == k64 && int64(rc.iv[right].start) == k64+1 {
  949. // fuse into left
  950. rc.iv[left].last = rc.iv[right].last
  951. // remove redundant right
  952. rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...)
  953. return
  954. }
  955. // are we an addition to left?
  956. if int64(rc.iv[left].last)+1 == k64 {
  957. // yes
  958. rc.iv[left].last++
  959. return
  960. }
  961. // are we an addition to right?
  962. if int64(rc.iv[right].start) == k64+1 {
  963. // yes
  964. rc.iv[right].start = k
  965. return
  966. }
  967. // k makes a standalone new interval32, inserted in the middle
  968. tail := append([]interval32{{start: k, last: k}}, rc.iv[right:]...)
  969. rc.iv = append(rc.iv[:left+1], tail...)
  970. return
  971. }
  972. //msgp:ignore runIterator
  973. // runIterator32 advice: you must call Next() at least once
  974. // before calling Cur(); and you should call HasNext()
  975. // before calling Next() to insure there are contents.
  976. type runIterator32 struct {
  977. rc *runContainer32
  978. curIndex int64
  979. curPosInIndex uint32
  980. curSeq int64
  981. }
  982. // newRunIterator32 returns a new empty run container.
  983. func (rc *runContainer32) newRunIterator32() *runIterator32 {
  984. return &runIterator32{rc: rc, curIndex: -1}
  985. }
  986. // HasNext returns false if calling Next will panic. It
  987. // returns true when there is at least one more value
  988. // available in the iteration sequence.
  989. func (ri *runIterator32) hasNext() bool {
  990. if len(ri.rc.iv) == 0 {
  991. return false
  992. }
  993. if ri.curIndex == -1 {
  994. return true
  995. }
  996. return ri.curSeq+1 < ri.rc.cardinality()
  997. }
  998. // cur returns the current value pointed to by the iterator.
  999. func (ri *runIterator32) cur() uint32 {
  1000. return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
  1001. }
  1002. // Next returns the next value in the iteration sequence.
  1003. func (ri *runIterator32) next() uint32 {
  1004. if !ri.hasNext() {
  1005. panic("no Next available")
  1006. }
  1007. if ri.curIndex >= int64(len(ri.rc.iv)) {
  1008. panic("runIterator.Next() going beyond what is available")
  1009. }
  1010. if ri.curIndex == -1 {
  1011. // first time is special
  1012. ri.curIndex = 0
  1013. } else {
  1014. ri.curPosInIndex++
  1015. if int64(ri.rc.iv[ri.curIndex].start)+int64(ri.curPosInIndex) == int64(ri.rc.iv[ri.curIndex].last)+1 {
  1016. ri.curPosInIndex = 0
  1017. ri.curIndex++
  1018. }
  1019. ri.curSeq++
  1020. }
  1021. return ri.cur()
  1022. }
  1023. // remove removes the element that the iterator
  1024. // is on from the run container. You can use
  1025. // Cur if you want to double check what is about
  1026. // to be deleted.
  1027. func (ri *runIterator32) remove() uint32 {
  1028. n := ri.rc.cardinality()
  1029. if n == 0 {
  1030. panic("runIterator.Remove called on empty runContainer32")
  1031. }
  1032. cur := ri.cur()
  1033. ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq)
  1034. return cur
  1035. }
  1036. // remove removes key from the container.
  1037. func (rc *runContainer32) removeKey(key uint32) (wasPresent bool) {
  1038. var index int64
  1039. var curSeq int64
  1040. index, wasPresent, _ = rc.search(int64(key), nil)
  1041. if !wasPresent {
  1042. return // already removed, nothing to do.
  1043. }
  1044. pos := key - rc.iv[index].start
  1045. rc.deleteAt(&index, &pos, &curSeq)
  1046. return
  1047. }
  1048. // internal helper functions
  1049. func (rc *runContainer32) deleteAt(curIndex *int64, curPosInIndex *uint32, curSeq *int64) {
  1050. rc.card--
  1051. (*curSeq)--
  1052. ci := *curIndex
  1053. pos := *curPosInIndex
  1054. // are we first, last, or in the middle of our interval32?
  1055. switch {
  1056. case pos == 0:
  1057. if int64(rc.iv[ci].start) == int64(rc.iv[ci].last) {
  1058. // our interval disappears
  1059. rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...)
  1060. // curIndex stays the same, since the delete did
  1061. // the advance for us.
  1062. *curPosInIndex = 0
  1063. } else {
  1064. rc.iv[ci].start++ // no longer overflowable
  1065. }
  1066. case int64(pos) == rc.iv[ci].runlen()-1:
  1067. // last
  1068. rc.iv[ci].last--
  1069. // our interval32 cannot disappear, else we would have been pos == 0, case first above.
  1070. (*curPosInIndex)--
  1071. // if we leave *curIndex alone, then Next() will work properly even after the delete.
  1072. default:
  1073. //middle
  1074. // split into two, adding an interval32
  1075. new0 := interval32{
  1076. start: rc.iv[ci].start,
  1077. last: rc.iv[ci].start + *curPosInIndex - 1}
  1078. new1start := int64(rc.iv[ci].start) + int64(*curPosInIndex) + 1
  1079. if new1start > int64(MaxUint32) {
  1080. panic("overflow?!?!")
  1081. }
  1082. new1 := interval32{
  1083. start: uint32(new1start),
  1084. last: rc.iv[ci].last}
  1085. tail := append([]interval32{new0, new1}, rc.iv[ci+1:]...)
  1086. rc.iv = append(rc.iv[:ci], tail...)
  1087. // update curIndex and curPosInIndex
  1088. (*curIndex)++
  1089. *curPosInIndex = 0
  1090. }
  1091. }
  1092. func have4Overlap32(astart, alast, bstart, blast int64) bool {
  1093. if alast+1 <= bstart {
  1094. return false
  1095. }
  1096. return blast+1 > astart
  1097. }
  1098. func intersectWithLeftover32(astart, alast, bstart, blast int64) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int64, intersection interval32) {
  1099. if !have4Overlap32(astart, alast, bstart, blast) {
  1100. return
  1101. }
  1102. isOverlap = true
  1103. // do the intersection:
  1104. if bstart > astart {
  1105. intersection.start = uint32(bstart)
  1106. } else {
  1107. intersection.start = uint32(astart)
  1108. }
  1109. switch {
  1110. case blast < alast:
  1111. isLeftoverA = true
  1112. leftoverstart = blast + 1
  1113. intersection.last = uint32(blast)
  1114. case alast < blast:
  1115. isLeftoverB = true
  1116. leftoverstart = alast + 1
  1117. intersection.last = uint32(alast)
  1118. default:
  1119. // alast == blast
  1120. intersection.last = uint32(alast)
  1121. }
  1122. return
  1123. }
  1124. func (rc *runContainer32) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) {
  1125. rc.myOpts.startIndex = startIndex
  1126. rc.myOpts.endxIndex = 0
  1127. w, _, _ := rc.search(key, &rc.myOpts)
  1128. // rc.search always returns w < len(rc.iv)
  1129. if w < startIndex {
  1130. // not found and comes before lower bound startIndex,
  1131. // so just use the lower bound.
  1132. if startIndex == int64(len(rc.iv)) {
  1133. // also this bump up means that we are done
  1134. return startIndex, true
  1135. }
  1136. return startIndex, false
  1137. }
  1138. return w, false
  1139. }
  1140. func sliceToString32(m []interval32) string {
  1141. s := ""
  1142. for i := range m {
  1143. s += fmt.Sprintf("%v: %s, ", i, m[i])
  1144. }
  1145. return s
  1146. }
  1147. // selectInt32 returns the j-th value in the container.
  1148. // We panic of j is out of bounds.
  1149. func (rc *runContainer32) selectInt32(j uint32) int {
  1150. n := rc.cardinality()
  1151. if int64(j) > n {
  1152. panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n))
  1153. }
  1154. var offset int64
  1155. for k := range rc.iv {
  1156. nextOffset := offset + rc.iv[k].runlen() + 1
  1157. if nextOffset > int64(j) {
  1158. return int(int64(rc.iv[k].start) + (int64(j) - offset))
  1159. }
  1160. offset = nextOffset
  1161. }
  1162. panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n))
  1163. }
  1164. // helper for invert
  1165. func (rc *runContainer32) invertlastInterval(origin uint32, lastIdx int) []interval32 {
  1166. cur := rc.iv[lastIdx]
  1167. if cur.last == MaxUint32 {
  1168. if cur.start == origin {
  1169. return nil // empty container
  1170. }
  1171. return []interval32{{start: origin, last: cur.start - 1}}
  1172. }
  1173. if cur.start == origin {
  1174. return []interval32{{start: cur.last + 1, last: MaxUint32}}
  1175. }
  1176. // invert splits
  1177. return []interval32{
  1178. {start: origin, last: cur.start - 1},
  1179. {start: cur.last + 1, last: MaxUint32},
  1180. }
  1181. }
  1182. // invert returns a new container (not inplace), that is
  1183. // the inversion of rc. For each bit b in rc, the
  1184. // returned value has !b
  1185. func (rc *runContainer32) invert() *runContainer32 {
  1186. ni := len(rc.iv)
  1187. var m []interval32
  1188. switch ni {
  1189. case 0:
  1190. return &runContainer32{iv: []interval32{{0, MaxUint32}}}
  1191. case 1:
  1192. return &runContainer32{iv: rc.invertlastInterval(0, 0)}
  1193. }
  1194. var invstart int64
  1195. ult := ni - 1
  1196. for i, cur := range rc.iv {
  1197. if i == ult {
  1198. // invertlastInteval will add both intervals (b) and (c) in
  1199. // diagram below.
  1200. m = append(m, rc.invertlastInterval(uint32(invstart), i)...)
  1201. break
  1202. }
  1203. // INVAR: i and cur are not the last interval, there is a next at i+1
  1204. //
  1205. // ........[cur.start, cur.last] ...... [next.start, next.last]....
  1206. // ^ ^ ^
  1207. // (a) (b) (c)
  1208. //
  1209. // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it.
  1210. if cur.start > 0 {
  1211. m = append(m, interval32{start: uint32(invstart), last: cur.start - 1})
  1212. }
  1213. invstart = int64(cur.last + 1)
  1214. }
  1215. return &runContainer32{iv: m}
  1216. }
  1217. func (iv interval32) equal(b interval32) bool {
  1218. if iv.start == b.start {
  1219. return iv.last == b.last
  1220. }
  1221. return false
  1222. }
  1223. func (iv interval32) isSuperSetOf(b interval32) bool {
  1224. return iv.start <= b.start && b.last <= iv.last
  1225. }
  1226. func (iv interval32) subtractInterval(del interval32) (left []interval32, delcount int64) {
  1227. isect, isEmpty := intersectInterval32s(iv, del)
  1228. if isEmpty {
  1229. return nil, 0
  1230. }
  1231. if del.isSuperSetOf(iv) {
  1232. return nil, iv.runlen()
  1233. }
  1234. switch {
  1235. case isect.start > iv.start && isect.last < iv.last:
  1236. new0 := interval32{start: iv.start, last: isect.start - 1}
  1237. new1 := interval32{start: isect.last + 1, last: iv.last}
  1238. return []interval32{new0, new1}, isect.runlen()
  1239. case isect.start == iv.start:
  1240. return []interval32{{start: isect.last + 1, last: iv.last}}, isect.runlen()
  1241. default:
  1242. return []interval32{{start: iv.start, last: isect.start - 1}}, isect.runlen()
  1243. }
  1244. }
  1245. func (rc *runContainer32) isubtract(del interval32) {
  1246. origiv := make([]interval32, len(rc.iv))
  1247. copy(origiv, rc.iv)
  1248. n := int64(len(rc.iv))
  1249. if n == 0 {
  1250. return // already done.
  1251. }
  1252. _, isEmpty := intersectInterval32s(
  1253. interval32{
  1254. start: rc.iv[0].start,
  1255. last: rc.iv[n-1].last,
  1256. }, del)
  1257. if isEmpty {
  1258. return // done
  1259. }
  1260. // INVAR there is some intersection between rc and del
  1261. istart, startAlready, _ := rc.search(int64(del.start), nil)
  1262. ilast, lastAlready, _ := rc.search(int64(del.last), nil)
  1263. rc.card = -1
  1264. if istart == -1 {
  1265. if ilast == n-1 && !lastAlready {
  1266. rc.iv = nil
  1267. return
  1268. }
  1269. }
  1270. // some intervals will remain
  1271. switch {
  1272. case startAlready && lastAlready:
  1273. res0, _ := rc.iv[istart].subtractInterval(del)
  1274. // would overwrite values in iv b/c res0 can have len 2. so
  1275. // write to origiv instead.
  1276. lost := 1 + ilast - istart
  1277. changeSize := int64(len(res0)) - lost
  1278. newSize := int64(len(rc.iv)) + changeSize
  1279. // rc.iv = append(pre, caboose...)
  1280. // return
  1281. if ilast != istart {
  1282. res1, _ := rc.iv[ilast].subtractInterval(del)
  1283. res0 = append(res0, res1...)
  1284. changeSize = int64(len(res0)) - lost
  1285. newSize = int64(len(rc.iv)) + changeSize
  1286. }
  1287. switch {
  1288. case changeSize < 0:
  1289. // shrink
  1290. copy(rc.iv[istart+int64(len(res0)):], rc.iv[ilast+1:])
  1291. copy(rc.iv[istart:istart+int64(len(res0))], res0)
  1292. rc.iv = rc.iv[:newSize]
  1293. return
  1294. case changeSize == 0:
  1295. // stay the same
  1296. copy(rc.iv[istart:istart+int64(len(res0))], res0)
  1297. return
  1298. default:
  1299. // changeSize > 0 is only possible when ilast == istart.
  1300. // Hence we now know: changeSize == 1 and len(res0) == 2
  1301. rc.iv = append(rc.iv, interval32{})
  1302. // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize]
  1303. // copy the tail into place
  1304. copy(rc.iv[ilast+2:], rc.iv[ilast+1:])
  1305. // copy the new item(s) into place
  1306. copy(rc.iv[istart:istart+2], res0)
  1307. return
  1308. }
  1309. case !startAlready && !lastAlready:
  1310. // we get to discard whole intervals
  1311. // from the search() definition:
  1312. // if del.start is not present, then istart is
  1313. // set as follows:
  1314. //
  1315. // a) istart == n-1 if del.start is beyond our
  1316. // last interval32 in rc.iv;
  1317. //
  1318. // b) istart == -1 if del.start is before our first
  1319. // interval32 in rc.iv;
  1320. //
  1321. // c) istart is set to the minimum index of rc.iv
  1322. // which comes strictly before the del.start;
  1323. // so del.start > rc.iv[istart].last,
  1324. // and if istart+1 exists, then del.start < rc.iv[istart+1].startx
  1325. // if del.last is not present, then ilast is
  1326. // set as follows:
  1327. //
  1328. // a) ilast == n-1 if del.last is beyond our
  1329. // last interval32 in rc.iv;
  1330. //
  1331. // b) ilast == -1 if del.last is before our first
  1332. // interval32 in rc.iv;
  1333. //
  1334. // c) ilast is set to the minimum index of rc.iv
  1335. // which comes strictly before the del.last;
  1336. // so del.last > rc.iv[ilast].last,
  1337. // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start
  1338. // INVAR: istart >= 0
  1339. pre := rc.iv[:istart+1]
  1340. if ilast == n-1 {
  1341. rc.iv = pre
  1342. return
  1343. }
  1344. // INVAR: ilast < n-1
  1345. lost := ilast - istart
  1346. changeSize := -lost
  1347. newSize := int64(len(rc.iv)) + changeSize
  1348. if changeSize != 0 {
  1349. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1350. }
  1351. rc.iv = rc.iv[:newSize]
  1352. return
  1353. case startAlready && !lastAlready:
  1354. // we can only shrink or stay the same size
  1355. // i.e. we either eliminate the whole interval,
  1356. // or just cut off the right side.
  1357. res0, _ := rc.iv[istart].subtractInterval(del)
  1358. if len(res0) > 0 {
  1359. // len(res) must be 1
  1360. rc.iv[istart] = res0[0]
  1361. }
  1362. lost := 1 + (ilast - istart)
  1363. changeSize := int64(len(res0)) - lost
  1364. newSize := int64(len(rc.iv)) + changeSize
  1365. if changeSize != 0 {
  1366. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1367. }
  1368. rc.iv = rc.iv[:newSize]
  1369. return
  1370. case !startAlready && lastAlready:
  1371. // we can only shrink or stay the same size
  1372. res1, _ := rc.iv[ilast].subtractInterval(del)
  1373. lost := ilast - istart
  1374. changeSize := int64(len(res1)) - lost
  1375. newSize := int64(len(rc.iv)) + changeSize
  1376. if changeSize != 0 {
  1377. // move the tail first to make room for res1
  1378. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1379. }
  1380. copy(rc.iv[istart+1:], res1)
  1381. rc.iv = rc.iv[:newSize]
  1382. return
  1383. }
  1384. }
  1385. // compute rc minus b, and return the result as a new value (not inplace).
  1386. // port of run_container_andnot from CRoaring...
  1387. // https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496
  1388. func (rc *runContainer32) AndNotRunContainer32(b *runContainer32) *runContainer32 {
  1389. if len(b.iv) == 0 || len(rc.iv) == 0 {
  1390. return rc
  1391. }
  1392. dst := newRunContainer32()
  1393. apos := 0
  1394. bpos := 0
  1395. a := rc
  1396. astart := a.iv[apos].start
  1397. alast := a.iv[apos].last
  1398. bstart := b.iv[bpos].start
  1399. blast := b.iv[bpos].last
  1400. alen := len(a.iv)
  1401. blen := len(b.iv)
  1402. for apos < alen && bpos < blen {
  1403. switch {
  1404. case alast < bstart:
  1405. // output the first run
  1406. dst.iv = append(dst.iv, interval32{start: astart, last: alast})
  1407. apos++
  1408. if apos < alen {
  1409. astart = a.iv[apos].start
  1410. alast = a.iv[apos].last
  1411. }
  1412. case blast < astart:
  1413. // exit the second run
  1414. bpos++
  1415. if bpos < blen {
  1416. bstart = b.iv[bpos].start
  1417. blast = b.iv[bpos].last
  1418. }
  1419. default:
  1420. // a: [ ]
  1421. // b: [ ]
  1422. // alast >= bstart
  1423. // blast >= astart
  1424. if astart < bstart {
  1425. dst.iv = append(dst.iv, interval32{start: astart, last: bstart - 1})
  1426. }
  1427. if alast > blast {
  1428. astart = blast + 1
  1429. } else {
  1430. apos++
  1431. if apos < alen {
  1432. astart = a.iv[apos].start
  1433. alast = a.iv[apos].last
  1434. }
  1435. }
  1436. }
  1437. }
  1438. if apos < alen {
  1439. dst.iv = append(dst.iv, interval32{start: astart, last: alast})
  1440. apos++
  1441. if apos < alen {
  1442. dst.iv = append(dst.iv, a.iv[apos:]...)
  1443. }
  1444. }
  1445. return dst
  1446. }
  1447. func (rc *runContainer32) numberOfRuns() (nr int) {
  1448. return len(rc.iv)
  1449. }
  1450. func (rc *runContainer32) containerType() contype {
  1451. return run32Contype
  1452. }
  1453. func (rc *runContainer32) equals32(srb *runContainer32) bool {
  1454. //p("both rc32")
  1455. // Check if the containers are the same object.
  1456. if rc == srb {
  1457. //p("same object")
  1458. return true
  1459. }
  1460. if len(srb.iv) != len(rc.iv) {
  1461. //p("iv len differ")
  1462. return false
  1463. }
  1464. for i, v := range rc.iv {
  1465. if v != srb.iv[i] {
  1466. //p("differ at iv i=%v, srb.iv[i]=%v, rc.iv[i]=%v", i, srb.iv[i], rc.iv[i])
  1467. return false
  1468. }
  1469. }
  1470. //p("all intervals same, returning true")
  1471. return true
  1472. }