You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

runcontainer.go 64KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526
  1. package roaring
  2. //
  3. // Copyright (c) 2016 by the roaring authors.
  4. // Licensed under the Apache License, Version 2.0.
  5. //
  6. // We derive a few lines of code from the sort.Search
  7. // function in the golang standard library. That function
  8. // is Copyright 2009 The Go Authors, and licensed
  9. // under the following BSD-style license.
  10. /*
  11. Copyright (c) 2009 The Go Authors. All rights reserved.
  12. Redistribution and use in source and binary forms, with or without
  13. modification, are permitted provided that the following conditions are
  14. met:
  15. * Redistributions of source code must retain the above copyright
  16. notice, this list of conditions and the following disclaimer.
  17. * Redistributions in binary form must reproduce the above
  18. copyright notice, this list of conditions and the following disclaimer
  19. in the documentation and/or other materials provided with the
  20. distribution.
  21. * Neither the name of Google Inc. nor the names of its
  22. contributors may be used to endorse or promote products derived from
  23. this software without specific prior written permission.
  24. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  28. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  30. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  31. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  32. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  33. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  34. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35. */
  36. import (
  37. "fmt"
  38. "sort"
  39. "unsafe"
  40. )
  41. //go:generate msgp -unexported
  42. // runContainer16 does run-length encoding of sets of
  43. // uint16 integers.
  44. type runContainer16 struct {
  45. iv []interval16
  46. card int64
  47. // avoid allocation during search
  48. myOpts searchOptions `msg:"-"`
  49. }
  50. // interval16 is the internal to runContainer16
  51. // structure that maintains the individual [start, last]
  52. // closed intervals.
  53. type interval16 struct {
  54. start uint16
  55. length uint16 // length minus 1
  56. }
  57. func newInterval16Range(start, last uint16) interval16 {
  58. if last < start {
  59. panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start))
  60. }
  61. return interval16{
  62. start,
  63. last - start,
  64. }
  65. }
  66. // runlen returns the count of integers in the interval.
  67. func (iv interval16) runlen() int64 {
  68. return int64(iv.length) + 1
  69. }
  70. func (iv interval16) last() uint16 {
  71. return iv.start + iv.length
  72. }
  73. // String produces a human viewable string of the contents.
  74. func (iv interval16) String() string {
  75. return fmt.Sprintf("[%d, %d]", iv.start, iv.length)
  76. }
  77. func ivalString16(iv []interval16) string {
  78. var s string
  79. var j int
  80. var p interval16
  81. for j, p = range iv {
  82. s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last())
  83. }
  84. return s
  85. }
  86. // String produces a human viewable string of the contents.
  87. func (rc *runContainer16) String() string {
  88. if len(rc.iv) == 0 {
  89. return "runContainer16{}"
  90. }
  91. is := ivalString16(rc.iv)
  92. return `runContainer16{` + is + `}`
  93. }
  94. // uint16Slice is a sort.Sort convenience method
  95. type uint16Slice []uint16
  96. // Len returns the length of p.
  97. func (p uint16Slice) Len() int { return len(p) }
  98. // Less returns p[i] < p[j]
  99. func (p uint16Slice) Less(i, j int) bool { return p[i] < p[j] }
  100. // Swap swaps elements i and j.
  101. func (p uint16Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  102. //msgp:ignore addHelper
  103. // addHelper helps build a runContainer16.
  104. type addHelper16 struct {
  105. runstart uint16
  106. runlen uint16
  107. actuallyAdded uint16
  108. m []interval16
  109. rc *runContainer16
  110. }
  111. func (ah *addHelper16) storeIval(runstart, runlen uint16) {
  112. mi := interval16{start: runstart, length: runlen}
  113. ah.m = append(ah.m, mi)
  114. }
  115. func (ah *addHelper16) add(cur, prev uint16, i int) {
  116. if cur == prev+1 {
  117. ah.runlen++
  118. ah.actuallyAdded++
  119. } else {
  120. if cur < prev {
  121. panic(fmt.Sprintf("newRunContainer16FromVals sees "+
  122. "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+
  123. " before calling us with alreadySorted == true.", i, cur, prev))
  124. }
  125. if cur == prev {
  126. // ignore duplicates
  127. } else {
  128. ah.actuallyAdded++
  129. ah.storeIval(ah.runstart, ah.runlen)
  130. ah.runstart = cur
  131. ah.runlen = 0
  132. }
  133. }
  134. }
  135. // newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast]
  136. func newRunContainer16Range(rangestart uint16, rangelast uint16) *runContainer16 {
  137. rc := &runContainer16{}
  138. rc.iv = append(rc.iv, newInterval16Range(rangestart, rangelast))
  139. return rc
  140. }
  141. // newRunContainer16FromVals makes a new container from vals.
  142. //
  143. // For efficiency, vals should be sorted in ascending order.
  144. // Ideally vals should not contain duplicates, but we detect and
  145. // ignore them. If vals is already sorted in ascending order, then
  146. // pass alreadySorted = true. Otherwise, for !alreadySorted,
  147. // we will sort vals before creating a runContainer16 of them.
  148. // We sort the original vals, so this will change what the
  149. // caller sees in vals as a side effect.
  150. func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer16 {
  151. // keep this in sync with newRunContainer16FromArray below
  152. rc := &runContainer16{}
  153. ah := addHelper16{rc: rc}
  154. if !alreadySorted {
  155. sort.Sort(uint16Slice(vals))
  156. }
  157. n := len(vals)
  158. var cur, prev uint16
  159. switch {
  160. case n == 0:
  161. // nothing more
  162. case n == 1:
  163. ah.m = append(ah.m, newInterval16Range(vals[0], vals[0]))
  164. ah.actuallyAdded++
  165. default:
  166. ah.runstart = vals[0]
  167. ah.actuallyAdded++
  168. for i := 1; i < n; i++ {
  169. prev = vals[i-1]
  170. cur = vals[i]
  171. ah.add(cur, prev, i)
  172. }
  173. ah.storeIval(ah.runstart, ah.runlen)
  174. }
  175. rc.iv = ah.m
  176. rc.card = int64(ah.actuallyAdded)
  177. return rc
  178. }
  179. // newRunContainer16FromBitmapContainer makes a new run container from bc,
  180. // somewhat efficiently. For reference, see the Java
  181. // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192
  182. func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
  183. rc := &runContainer16{}
  184. nbrRuns := bc.numberOfRuns()
  185. if nbrRuns == 0 {
  186. return rc
  187. }
  188. rc.iv = make([]interval16, nbrRuns)
  189. longCtr := 0 // index of current long in bitmap
  190. curWord := bc.bitmap[0] // its value
  191. runCount := 0
  192. for {
  193. // potentially multiword advance to first 1 bit
  194. for curWord == 0 && longCtr < len(bc.bitmap)-1 {
  195. longCtr++
  196. curWord = bc.bitmap[longCtr]
  197. }
  198. if curWord == 0 {
  199. // wrap up, no more runs
  200. return rc
  201. }
  202. localRunStart := countTrailingZeros(curWord)
  203. runStart := localRunStart + 64*longCtr
  204. // stuff 1s into number's LSBs
  205. curWordWith1s := curWord | (curWord - 1)
  206. // find the next 0, potentially in a later word
  207. runEnd := 0
  208. for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 {
  209. longCtr++
  210. curWordWith1s = bc.bitmap[longCtr]
  211. }
  212. if curWordWith1s == maxWord {
  213. // a final unterminated run of 1s
  214. runEnd = wordSizeInBits + longCtr*64
  215. rc.iv[runCount].start = uint16(runStart)
  216. rc.iv[runCount].length = uint16(runEnd) - uint16(runStart) - 1
  217. return rc
  218. }
  219. localRunEnd := countTrailingZeros(^curWordWith1s)
  220. runEnd = localRunEnd + longCtr*64
  221. rc.iv[runCount].start = uint16(runStart)
  222. rc.iv[runCount].length = uint16(runEnd) - 1 - uint16(runStart)
  223. runCount++
  224. // now, zero out everything right of runEnd.
  225. curWord = curWordWith1s & (curWordWith1s + 1)
  226. // We've lathered and rinsed, so repeat...
  227. }
  228. }
  229. //
  230. // newRunContainer16FromArray populates a new
  231. // runContainer16 from the contents of arr.
  232. //
  233. func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 {
  234. // keep this in sync with newRunContainer16FromVals above
  235. rc := &runContainer16{}
  236. ah := addHelper16{rc: rc}
  237. n := arr.getCardinality()
  238. var cur, prev uint16
  239. switch {
  240. case n == 0:
  241. // nothing more
  242. case n == 1:
  243. ah.m = append(ah.m, newInterval16Range(arr.content[0], arr.content[0]))
  244. ah.actuallyAdded++
  245. default:
  246. ah.runstart = arr.content[0]
  247. ah.actuallyAdded++
  248. for i := 1; i < n; i++ {
  249. prev = arr.content[i-1]
  250. cur = arr.content[i]
  251. ah.add(cur, prev, i)
  252. }
  253. ah.storeIval(ah.runstart, ah.runlen)
  254. }
  255. rc.iv = ah.m
  256. rc.card = int64(ah.actuallyAdded)
  257. return rc
  258. }
  259. // set adds the integers in vals to the set. Vals
  260. // must be sorted in increasing order; if not, you should set
  261. // alreadySorted to false, and we will sort them in place for you.
  262. // (Be aware of this side effect -- it will affect the callers
  263. // view of vals).
  264. //
  265. // If you have a small number of additions to an already
  266. // big runContainer16, calling Add() may be faster.
  267. func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) {
  268. rc2 := newRunContainer16FromVals(alreadySorted, vals...)
  269. un := rc.union(rc2)
  270. rc.iv = un.iv
  271. rc.card = 0
  272. }
  273. // canMerge returns true iff the intervals
  274. // a and b either overlap or they are
  275. // contiguous and so can be merged into
  276. // a single interval.
  277. func canMerge16(a, b interval16) bool {
  278. if int64(a.last())+1 < int64(b.start) {
  279. return false
  280. }
  281. return int64(b.last())+1 >= int64(a.start)
  282. }
  283. // haveOverlap differs from canMerge in that
  284. // it tells you if the intersection of a
  285. // and b would contain an element (otherwise
  286. // it would be the empty set, and we return
  287. // false).
  288. func haveOverlap16(a, b interval16) bool {
  289. if int64(a.last())+1 <= int64(b.start) {
  290. return false
  291. }
  292. return int64(b.last())+1 > int64(a.start)
  293. }
  294. // mergeInterval16s joins a and b into a
  295. // new interval, and panics if it cannot.
  296. func mergeInterval16s(a, b interval16) (res interval16) {
  297. if !canMerge16(a, b) {
  298. panic(fmt.Sprintf("cannot merge %#v and %#v", a, b))
  299. }
  300. if b.start < a.start {
  301. res.start = b.start
  302. } else {
  303. res.start = a.start
  304. }
  305. if b.last() > a.last() {
  306. res.length = b.last() - res.start
  307. } else {
  308. res.length = a.last() - res.start
  309. }
  310. return
  311. }
  312. // intersectInterval16s returns the intersection
  313. // of a and b. The isEmpty flag will be true if
  314. // a and b were disjoint.
  315. func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) {
  316. if !haveOverlap16(a, b) {
  317. isEmpty = true
  318. return
  319. }
  320. if b.start > a.start {
  321. res.start = b.start
  322. } else {
  323. res.start = a.start
  324. }
  325. bEnd := b.last()
  326. aEnd := a.last()
  327. var resEnd uint16
  328. if bEnd < aEnd {
  329. resEnd = bEnd
  330. } else {
  331. resEnd = aEnd
  332. }
  333. res.length = resEnd - res.start
  334. return
  335. }
  336. // union merges two runContainer16s, producing
  337. // a new runContainer16 with the union of rc and b.
  338. func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
  339. // rc is also known as 'a' here, but golint insisted we
  340. // call it rc for consistency with the rest of the methods.
  341. var m []interval16
  342. alim := int64(len(rc.iv))
  343. blim := int64(len(b.iv))
  344. var na int64 // next from a
  345. var nb int64 // next from b
  346. // merged holds the current merge output, which might
  347. // get additional merges before being appended to m.
  348. var merged interval16
  349. var mergedUsed bool // is merged being used at the moment?
  350. var cura interval16 // currently considering this interval16 from a
  351. var curb interval16 // currently considering this interval16 from b
  352. pass := 0
  353. for na < alim && nb < blim {
  354. pass++
  355. cura = rc.iv[na]
  356. curb = b.iv[nb]
  357. if mergedUsed {
  358. mergedUpdated := false
  359. if canMerge16(cura, merged) {
  360. merged = mergeInterval16s(cura, merged)
  361. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  362. mergedUpdated = true
  363. }
  364. if canMerge16(curb, merged) {
  365. merged = mergeInterval16s(curb, merged)
  366. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  367. mergedUpdated = true
  368. }
  369. if !mergedUpdated {
  370. // we know that merged is disjoint from cura and curb
  371. m = append(m, merged)
  372. mergedUsed = false
  373. }
  374. continue
  375. } else {
  376. // !mergedUsed
  377. if !canMerge16(cura, curb) {
  378. if cura.start < curb.start {
  379. m = append(m, cura)
  380. na++
  381. } else {
  382. m = append(m, curb)
  383. nb++
  384. }
  385. } else {
  386. merged = mergeInterval16s(cura, curb)
  387. mergedUsed = true
  388. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  389. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  390. }
  391. }
  392. }
  393. var aDone, bDone bool
  394. if na >= alim {
  395. aDone = true
  396. }
  397. if nb >= blim {
  398. bDone = true
  399. }
  400. // finish by merging anything remaining into merged we can:
  401. if mergedUsed {
  402. if !aDone {
  403. aAdds:
  404. for na < alim {
  405. cura = rc.iv[na]
  406. if canMerge16(cura, merged) {
  407. merged = mergeInterval16s(cura, merged)
  408. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  409. } else {
  410. break aAdds
  411. }
  412. }
  413. }
  414. if !bDone {
  415. bAdds:
  416. for nb < blim {
  417. curb = b.iv[nb]
  418. if canMerge16(curb, merged) {
  419. merged = mergeInterval16s(curb, merged)
  420. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  421. } else {
  422. break bAdds
  423. }
  424. }
  425. }
  426. m = append(m, merged)
  427. }
  428. if na < alim {
  429. m = append(m, rc.iv[na:]...)
  430. }
  431. if nb < blim {
  432. m = append(m, b.iv[nb:]...)
  433. }
  434. res := &runContainer16{iv: m}
  435. return res
  436. }
  437. // unionCardinality returns the cardinality of the merger of two runContainer16s, the union of rc and b.
  438. func (rc *runContainer16) unionCardinality(b *runContainer16) uint64 {
  439. // rc is also known as 'a' here, but golint insisted we
  440. // call it rc for consistency with the rest of the methods.
  441. answer := uint64(0)
  442. alim := int64(len(rc.iv))
  443. blim := int64(len(b.iv))
  444. var na int64 // next from a
  445. var nb int64 // next from b
  446. // merged holds the current merge output, which might
  447. // get additional merges before being appended to m.
  448. var merged interval16
  449. var mergedUsed bool // is merged being used at the moment?
  450. var cura interval16 // currently considering this interval16 from a
  451. var curb interval16 // currently considering this interval16 from b
  452. pass := 0
  453. for na < alim && nb < blim {
  454. pass++
  455. cura = rc.iv[na]
  456. curb = b.iv[nb]
  457. if mergedUsed {
  458. mergedUpdated := false
  459. if canMerge16(cura, merged) {
  460. merged = mergeInterval16s(cura, merged)
  461. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  462. mergedUpdated = true
  463. }
  464. if canMerge16(curb, merged) {
  465. merged = mergeInterval16s(curb, merged)
  466. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  467. mergedUpdated = true
  468. }
  469. if !mergedUpdated {
  470. // we know that merged is disjoint from cura and curb
  471. //m = append(m, merged)
  472. answer += uint64(merged.last()) - uint64(merged.start) + 1
  473. mergedUsed = false
  474. }
  475. continue
  476. } else {
  477. // !mergedUsed
  478. if !canMerge16(cura, curb) {
  479. if cura.start < curb.start {
  480. answer += uint64(cura.last()) - uint64(cura.start) + 1
  481. //m = append(m, cura)
  482. na++
  483. } else {
  484. answer += uint64(curb.last()) - uint64(curb.start) + 1
  485. //m = append(m, curb)
  486. nb++
  487. }
  488. } else {
  489. merged = mergeInterval16s(cura, curb)
  490. mergedUsed = true
  491. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  492. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  493. }
  494. }
  495. }
  496. var aDone, bDone bool
  497. if na >= alim {
  498. aDone = true
  499. }
  500. if nb >= blim {
  501. bDone = true
  502. }
  503. // finish by merging anything remaining into merged we can:
  504. if mergedUsed {
  505. if !aDone {
  506. aAdds:
  507. for na < alim {
  508. cura = rc.iv[na]
  509. if canMerge16(cura, merged) {
  510. merged = mergeInterval16s(cura, merged)
  511. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  512. } else {
  513. break aAdds
  514. }
  515. }
  516. }
  517. if !bDone {
  518. bAdds:
  519. for nb < blim {
  520. curb = b.iv[nb]
  521. if canMerge16(curb, merged) {
  522. merged = mergeInterval16s(curb, merged)
  523. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  524. } else {
  525. break bAdds
  526. }
  527. }
  528. }
  529. //m = append(m, merged)
  530. answer += uint64(merged.last()) - uint64(merged.start) + 1
  531. }
  532. for _, r := range rc.iv[na:] {
  533. answer += uint64(r.last()) - uint64(r.start) + 1
  534. }
  535. for _, r := range b.iv[nb:] {
  536. answer += uint64(r.last()) - uint64(r.start) + 1
  537. }
  538. return answer
  539. }
  540. // indexOfIntervalAtOrAfter is a helper for union.
  541. func (rc *runContainer16) indexOfIntervalAtOrAfter(key int64, startIndex int64) int64 {
  542. rc.myOpts.startIndex = startIndex
  543. rc.myOpts.endxIndex = 0
  544. w, already, _ := rc.search(key, &rc.myOpts)
  545. if already {
  546. return w
  547. }
  548. return w + 1
  549. }
  550. // intersect returns a new runContainer16 holding the
  551. // intersection of rc (also known as 'a') and b.
  552. func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
  553. a := rc
  554. numa := int64(len(a.iv))
  555. numb := int64(len(b.iv))
  556. res := &runContainer16{}
  557. if numa == 0 || numb == 0 {
  558. return res
  559. }
  560. if numa == 1 && numb == 1 {
  561. if !haveOverlap16(a.iv[0], b.iv[0]) {
  562. return res
  563. }
  564. }
  565. var output []interval16
  566. var acuri int64
  567. var bcuri int64
  568. astart := int64(a.iv[acuri].start)
  569. bstart := int64(b.iv[bcuri].start)
  570. var intersection interval16
  571. var leftoverstart int64
  572. var isOverlap, isLeftoverA, isLeftoverB bool
  573. var done bool
  574. toploop:
  575. for acuri < numa && bcuri < numb {
  576. isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
  577. intersectWithLeftover16(astart, int64(a.iv[acuri].last()), bstart, int64(b.iv[bcuri].last()))
  578. if !isOverlap {
  579. switch {
  580. case astart < bstart:
  581. acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart)
  582. if done {
  583. break toploop
  584. }
  585. astart = int64(a.iv[acuri].start)
  586. case astart > bstart:
  587. bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart)
  588. if done {
  589. break toploop
  590. }
  591. bstart = int64(b.iv[bcuri].start)
  592. //default:
  593. // panic("impossible that astart == bstart, since !isOverlap")
  594. }
  595. } else {
  596. // isOverlap
  597. output = append(output, intersection)
  598. switch {
  599. case isLeftoverA:
  600. // note that we change astart without advancing acuri,
  601. // since we need to capture any 2ndary intersections with a.iv[acuri]
  602. astart = leftoverstart
  603. bcuri++
  604. if bcuri >= numb {
  605. break toploop
  606. }
  607. bstart = int64(b.iv[bcuri].start)
  608. case isLeftoverB:
  609. // note that we change bstart without advancing bcuri,
  610. // since we need to capture any 2ndary intersections with b.iv[bcuri]
  611. bstart = leftoverstart
  612. acuri++
  613. if acuri >= numa {
  614. break toploop
  615. }
  616. astart = int64(a.iv[acuri].start)
  617. default:
  618. // neither had leftover, both completely consumed
  619. // optionally, assert for sanity:
  620. //if a.iv[acuri].endx != b.iv[bcuri].endx {
  621. // panic("huh? should only be possible that endx agree now!")
  622. //}
  623. // advance to next a interval
  624. acuri++
  625. if acuri >= numa {
  626. break toploop
  627. }
  628. astart = int64(a.iv[acuri].start)
  629. // advance to next b interval
  630. bcuri++
  631. if bcuri >= numb {
  632. break toploop
  633. }
  634. bstart = int64(b.iv[bcuri].start)
  635. }
  636. }
  637. } // end for toploop
  638. if len(output) == 0 {
  639. return res
  640. }
  641. res.iv = output
  642. return res
  643. }
  644. // intersectCardinality returns the cardinality of the
  645. // intersection of rc (also known as 'a') and b.
  646. func (rc *runContainer16) intersectCardinality(b *runContainer16) int64 {
  647. answer := int64(0)
  648. a := rc
  649. numa := int64(len(a.iv))
  650. numb := int64(len(b.iv))
  651. if numa == 0 || numb == 0 {
  652. return 0
  653. }
  654. if numa == 1 && numb == 1 {
  655. if !haveOverlap16(a.iv[0], b.iv[0]) {
  656. return 0
  657. }
  658. }
  659. var acuri int64
  660. var bcuri int64
  661. astart := int64(a.iv[acuri].start)
  662. bstart := int64(b.iv[bcuri].start)
  663. var intersection interval16
  664. var leftoverstart int64
  665. var isOverlap, isLeftoverA, isLeftoverB bool
  666. var done bool
  667. pass := 0
  668. toploop:
  669. for acuri < numa && bcuri < numb {
  670. pass++
  671. isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
  672. intersectWithLeftover16(astart, int64(a.iv[acuri].last()), bstart, int64(b.iv[bcuri].last()))
  673. if !isOverlap {
  674. switch {
  675. case astart < bstart:
  676. acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart)
  677. if done {
  678. break toploop
  679. }
  680. astart = int64(a.iv[acuri].start)
  681. case astart > bstart:
  682. bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart)
  683. if done {
  684. break toploop
  685. }
  686. bstart = int64(b.iv[bcuri].start)
  687. //default:
  688. // panic("impossible that astart == bstart, since !isOverlap")
  689. }
  690. } else {
  691. // isOverlap
  692. answer += int64(intersection.last()) - int64(intersection.start) + 1
  693. switch {
  694. case isLeftoverA:
  695. // note that we change astart without advancing acuri,
  696. // since we need to capture any 2ndary intersections with a.iv[acuri]
  697. astart = leftoverstart
  698. bcuri++
  699. if bcuri >= numb {
  700. break toploop
  701. }
  702. bstart = int64(b.iv[bcuri].start)
  703. case isLeftoverB:
  704. // note that we change bstart without advancing bcuri,
  705. // since we need to capture any 2ndary intersections with b.iv[bcuri]
  706. bstart = leftoverstart
  707. acuri++
  708. if acuri >= numa {
  709. break toploop
  710. }
  711. astart = int64(a.iv[acuri].start)
  712. default:
  713. // neither had leftover, both completely consumed
  714. // optionally, assert for sanity:
  715. //if a.iv[acuri].endx != b.iv[bcuri].endx {
  716. // panic("huh? should only be possible that endx agree now!")
  717. //}
  718. // advance to next a interval
  719. acuri++
  720. if acuri >= numa {
  721. break toploop
  722. }
  723. astart = int64(a.iv[acuri].start)
  724. // advance to next b interval
  725. bcuri++
  726. if bcuri >= numb {
  727. break toploop
  728. }
  729. bstart = int64(b.iv[bcuri].start)
  730. }
  731. }
  732. } // end for toploop
  733. return answer
  734. }
  735. // get returns true iff key is in the container.
  736. func (rc *runContainer16) contains(key uint16) bool {
  737. _, in, _ := rc.search(int64(key), nil)
  738. return in
  739. }
  740. // numIntervals returns the count of intervals in the container.
  741. func (rc *runContainer16) numIntervals() int {
  742. return len(rc.iv)
  743. }
  744. // searchOptions allows us to accelerate search with
  745. // prior knowledge of (mostly lower) bounds. This is used by Union
  746. // and Intersect.
  747. type searchOptions struct {
  748. // start here instead of at 0
  749. startIndex int64
  750. // upper bound instead of len(rc.iv);
  751. // endxIndex == 0 means ignore the bound and use
  752. // endxIndex == n ==len(rc.iv) which is also
  753. // naturally the default for search()
  754. // when opt = nil.
  755. endxIndex int64
  756. }
  757. // search returns alreadyPresent to indicate if the
  758. // key is already in one of our interval16s.
  759. //
  760. // If key is alreadyPresent, then whichInterval16 tells
  761. // you where.
  762. //
  763. // If key is not already present, then whichInterval16 is
  764. // set as follows:
  765. //
  766. // a) whichInterval16 == len(rc.iv)-1 if key is beyond our
  767. // last interval16 in rc.iv;
  768. //
  769. // b) whichInterval16 == -1 if key is before our first
  770. // interval16 in rc.iv;
  771. //
  772. // c) whichInterval16 is set to the minimum index of rc.iv
  773. // which comes strictly before the key;
  774. // so rc.iv[whichInterval16].last < key,
  775. // and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start
  776. // (Note that whichInterval16+1 won't exist when
  777. // whichInterval16 is the last interval.)
  778. //
  779. // runContainer16.search always returns whichInterval16 < len(rc.iv).
  780. //
  781. // If not nil, opts can be used to further restrict
  782. // the search space.
  783. //
  784. func (rc *runContainer16) search(key int64, opts *searchOptions) (whichInterval16 int64, alreadyPresent bool, numCompares int) {
  785. n := int64(len(rc.iv))
  786. if n == 0 {
  787. return -1, false, 0
  788. }
  789. startIndex := int64(0)
  790. endxIndex := n
  791. if opts != nil {
  792. startIndex = opts.startIndex
  793. // let endxIndex == 0 mean no effect
  794. if opts.endxIndex > 0 {
  795. endxIndex = opts.endxIndex
  796. }
  797. }
  798. // sort.Search returns the smallest index i
  799. // in [0, n) at which f(i) is true, assuming that on the range [0, n),
  800. // f(i) == true implies f(i+1) == true.
  801. // If there is no such index, Search returns n.
  802. // For correctness, this began as verbatim snippet from
  803. // sort.Search in the Go standard lib.
  804. // We inline our comparison function for speed, and
  805. // annotate with numCompares
  806. // to observe and test that extra bounds are utilized.
  807. i, j := startIndex, endxIndex
  808. for i < j {
  809. h := i + (j-i)/2 // avoid overflow when computing h as the bisector
  810. // i <= h < j
  811. numCompares++
  812. if !(key < int64(rc.iv[h].start)) {
  813. i = h + 1
  814. } else {
  815. j = h
  816. }
  817. }
  818. below := i
  819. // end std lib snippet.
  820. // The above is a simple in-lining and annotation of:
  821. /* below := sort.Search(n,
  822. func(i int) bool {
  823. return key < rc.iv[i].start
  824. })
  825. */
  826. whichInterval16 = below - 1
  827. if below == n {
  828. // all falses => key is >= start of all interval16s
  829. // ... so does it belong to the last interval16?
  830. if key < int64(rc.iv[n-1].last())+1 {
  831. // yes, it belongs to the last interval16
  832. alreadyPresent = true
  833. return
  834. }
  835. // no, it is beyond the last interval16.
  836. // leave alreadyPreset = false
  837. return
  838. }
  839. // INVAR: key is below rc.iv[below]
  840. if below == 0 {
  841. // key is before the first first interval16.
  842. // leave alreadyPresent = false
  843. return
  844. }
  845. // INVAR: key is >= rc.iv[below-1].start and
  846. // key is < rc.iv[below].start
  847. // is key in below-1 interval16?
  848. if key >= int64(rc.iv[below-1].start) && key < int64(rc.iv[below-1].last())+1 {
  849. // yes, it is. key is in below-1 interval16.
  850. alreadyPresent = true
  851. return
  852. }
  853. // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start
  854. // leave alreadyPresent = false
  855. return
  856. }
  857. // cardinality returns the count of the integers stored in the
  858. // runContainer16.
  859. func (rc *runContainer16) cardinality() int64 {
  860. if len(rc.iv) == 0 {
  861. rc.card = 0
  862. return 0
  863. }
  864. if rc.card > 0 {
  865. return rc.card // already cached
  866. }
  867. // have to compute it
  868. var n int64
  869. for _, p := range rc.iv {
  870. n += p.runlen()
  871. }
  872. rc.card = n // cache it
  873. return n
  874. }
  875. // AsSlice decompresses the contents into a []uint16 slice.
  876. func (rc *runContainer16) AsSlice() []uint16 {
  877. s := make([]uint16, rc.cardinality())
  878. j := 0
  879. for _, p := range rc.iv {
  880. for i := p.start; i <= p.last(); i++ {
  881. s[j] = i
  882. j++
  883. }
  884. }
  885. return s
  886. }
  887. // newRunContainer16 creates an empty run container.
  888. func newRunContainer16() *runContainer16 {
  889. return &runContainer16{}
  890. }
  891. // newRunContainer16CopyIv creates a run container, initializing
  892. // with a copy of the supplied iv slice.
  893. //
  894. func newRunContainer16CopyIv(iv []interval16) *runContainer16 {
  895. rc := &runContainer16{
  896. iv: make([]interval16, len(iv)),
  897. }
  898. copy(rc.iv, iv)
  899. return rc
  900. }
  901. func (rc *runContainer16) Clone() *runContainer16 {
  902. rc2 := newRunContainer16CopyIv(rc.iv)
  903. return rc2
  904. }
  905. // newRunContainer16TakeOwnership returns a new runContainer16
  906. // backed by the provided iv slice, which we will
  907. // assume exclusive control over from now on.
  908. //
  909. func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 {
  910. rc := &runContainer16{
  911. iv: iv,
  912. }
  913. return rc
  914. }
  915. const baseRc16Size = int(unsafe.Sizeof(runContainer16{}))
  916. const perIntervalRc16Size = int(unsafe.Sizeof(interval16{}))
  917. const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0)))
  918. // see also runContainer16SerializedSizeInBytes(numRuns int) int
  919. // getSizeInBytes returns the number of bytes of memory
  920. // required by this runContainer16.
  921. func (rc *runContainer16) getSizeInBytes() int {
  922. return perIntervalRc16Size*len(rc.iv) + baseRc16Size
  923. }
  924. // runContainer16SerializedSizeInBytes returns the number of bytes of disk
  925. // required to hold numRuns in a runContainer16.
  926. func runContainer16SerializedSizeInBytes(numRuns int) int {
  927. return perIntervalRc16Size*numRuns + baseDiskRc16Size
  928. }
  929. // Add adds a single value k to the set.
  930. func (rc *runContainer16) Add(k uint16) (wasNew bool) {
  931. // TODO comment from runContainer16.java:
  932. // it might be better and simpler to do return
  933. // toBitmapOrArrayContainer(getCardinality()).add(k)
  934. // but note that some unit tests use this method to build up test
  935. // runcontainers without calling runOptimize
  936. k64 := int64(k)
  937. index, present, _ := rc.search(k64, nil)
  938. if present {
  939. return // already there
  940. }
  941. wasNew = true
  942. // increment card if it is cached already
  943. if rc.card > 0 {
  944. rc.card++
  945. }
  946. n := int64(len(rc.iv))
  947. if index == -1 {
  948. // we may need to extend the first run
  949. if n > 0 {
  950. if rc.iv[0].start == k+1 {
  951. rc.iv[0].start = k
  952. rc.iv[0].length++
  953. return
  954. }
  955. }
  956. // nope, k stands alone, starting the new first interval16.
  957. rc.iv = append([]interval16{newInterval16Range(k, k)}, rc.iv...)
  958. return
  959. }
  960. // are we off the end? handle both index == n and index == n-1:
  961. if index >= n-1 {
  962. if int64(rc.iv[n-1].last())+1 == k64 {
  963. rc.iv[n-1].length++
  964. return
  965. }
  966. rc.iv = append(rc.iv, newInterval16Range(k, k))
  967. return
  968. }
  969. // INVAR: index and index+1 both exist, and k goes between them.
  970. //
  971. // Now: add k into the middle,
  972. // possibly fusing with index or index+1 interval16
  973. // and possibly resulting in fusing of two interval16s
  974. // that had a one integer gap.
  975. left := index
  976. right := index + 1
  977. // are we fusing left and right by adding k?
  978. if int64(rc.iv[left].last())+1 == k64 && int64(rc.iv[right].start) == k64+1 {
  979. // fuse into left
  980. rc.iv[left].length = rc.iv[right].last() - rc.iv[left].start
  981. // remove redundant right
  982. rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...)
  983. return
  984. }
  985. // are we an addition to left?
  986. if int64(rc.iv[left].last())+1 == k64 {
  987. // yes
  988. rc.iv[left].length++
  989. return
  990. }
  991. // are we an addition to right?
  992. if int64(rc.iv[right].start) == k64+1 {
  993. // yes
  994. rc.iv[right].start = k
  995. rc.iv[right].length++
  996. return
  997. }
  998. // k makes a standalone new interval16, inserted in the middle
  999. tail := append([]interval16{newInterval16Range(k, k)}, rc.iv[right:]...)
  1000. rc.iv = append(rc.iv[:left+1], tail...)
  1001. return
  1002. }
  1003. //msgp:ignore runIterator
  1004. // runIterator16 advice: you must call hasNext()
  1005. // before calling next()/peekNext() to insure there are contents.
  1006. type runIterator16 struct {
  1007. rc *runContainer16
  1008. curIndex int64
  1009. curPosInIndex uint16
  1010. }
  1011. // newRunIterator16 returns a new empty run container.
  1012. func (rc *runContainer16) newRunIterator16() *runIterator16 {
  1013. return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0}
  1014. }
  1015. func (rc *runContainer16) iterate(cb func(x uint16) bool) bool {
  1016. iterator := runIterator16{rc, 0, 0}
  1017. for iterator.hasNext() {
  1018. if !cb(iterator.next()) {
  1019. return false
  1020. }
  1021. }
  1022. return true
  1023. }
  1024. // hasNext returns false if calling next will panic. It
  1025. // returns true when there is at least one more value
  1026. // available in the iteration sequence.
  1027. func (ri *runIterator16) hasNext() bool {
  1028. return int64(len(ri.rc.iv)) > ri.curIndex+1 ||
  1029. (int64(len(ri.rc.iv)) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex)
  1030. }
  1031. // next returns the next value in the iteration sequence.
  1032. func (ri *runIterator16) next() uint16 {
  1033. next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
  1034. if ri.curPosInIndex == ri.rc.iv[ri.curIndex].length {
  1035. ri.curPosInIndex = 0
  1036. ri.curIndex++
  1037. } else {
  1038. ri.curPosInIndex++
  1039. }
  1040. return next
  1041. }
  1042. // peekNext returns the next value in the iteration sequence without advancing the iterator
  1043. func (ri *runIterator16) peekNext() uint16 {
  1044. return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
  1045. }
  1046. // advanceIfNeeded advances as long as the next value is smaller than minval
  1047. func (ri *runIterator16) advanceIfNeeded(minval uint16) {
  1048. if !ri.hasNext() || ri.peekNext() >= minval {
  1049. return
  1050. }
  1051. opt := &searchOptions{
  1052. startIndex: ri.curIndex,
  1053. endxIndex: int64(len(ri.rc.iv)),
  1054. }
  1055. // interval cannot be -1 because of minval > peekNext
  1056. interval, isPresent, _ := ri.rc.search(int64(minval), opt)
  1057. // if the minval is present, set the curPosIndex at the right position
  1058. if isPresent {
  1059. ri.curIndex = interval
  1060. ri.curPosInIndex = minval - ri.rc.iv[ri.curIndex].start
  1061. } else {
  1062. // otherwise interval is set to to the minimum index of rc.iv
  1063. // which comes strictly before the key, that's why we set the next interval
  1064. ri.curIndex = interval + 1
  1065. ri.curPosInIndex = 0
  1066. }
  1067. }
  1068. // runReverseIterator16 advice: you must call hasNext()
  1069. // before calling next() to insure there are contents.
  1070. type runReverseIterator16 struct {
  1071. rc *runContainer16
  1072. curIndex int64 // index into rc.iv
  1073. curPosInIndex uint16 // offset in rc.iv[curIndex]
  1074. }
  1075. // newRunReverseIterator16 returns a new empty run iterator.
  1076. func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 {
  1077. index := int64(len(rc.iv)) - 1
  1078. pos := uint16(0)
  1079. if index >= 0 {
  1080. pos = rc.iv[index].length
  1081. }
  1082. return &runReverseIterator16{
  1083. rc: rc,
  1084. curIndex: index,
  1085. curPosInIndex: pos,
  1086. }
  1087. }
  1088. // hasNext returns false if calling next will panic. It
  1089. // returns true when there is at least one more value
  1090. // available in the iteration sequence.
  1091. func (ri *runReverseIterator16) hasNext() bool {
  1092. return ri.curIndex > 0 || ri.curIndex == 0 && ri.curPosInIndex >= 0
  1093. }
  1094. // next returns the next value in the iteration sequence.
  1095. func (ri *runReverseIterator16) next() uint16 {
  1096. next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
  1097. if ri.curPosInIndex > 0 {
  1098. ri.curPosInIndex--
  1099. } else {
  1100. ri.curIndex--
  1101. if ri.curIndex >= 0 {
  1102. ri.curPosInIndex = ri.rc.iv[ri.curIndex].length
  1103. }
  1104. }
  1105. return next
  1106. }
  1107. func (rc *runContainer16) newManyRunIterator16() *runIterator16 {
  1108. return rc.newRunIterator16()
  1109. }
  1110. // hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany
  1111. func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
  1112. n := 0
  1113. if !ri.hasNext() {
  1114. return n
  1115. }
  1116. // start and end are inclusive
  1117. for n < len(buf) {
  1118. moreVals := 0
  1119. if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex {
  1120. // add as many as you can from this seq
  1121. moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n)
  1122. base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs
  1123. // allows BCE
  1124. buf2 := buf[n : n+moreVals]
  1125. for i := range buf2 {
  1126. buf2[i] = base + uint32(i)
  1127. }
  1128. // update values
  1129. n += moreVals
  1130. }
  1131. if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) {
  1132. ri.curPosInIndex = 0
  1133. ri.curIndex++
  1134. if ri.curIndex == int64(len(ri.rc.iv)) {
  1135. break
  1136. }
  1137. } else {
  1138. ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
  1139. }
  1140. }
  1141. return n
  1142. }
  1143. // remove removes key from the container.
  1144. func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {
  1145. var index int64
  1146. index, wasPresent, _ = rc.search(int64(key), nil)
  1147. if !wasPresent {
  1148. return // already removed, nothing to do.
  1149. }
  1150. pos := key - rc.iv[index].start
  1151. rc.deleteAt(&index, &pos)
  1152. return
  1153. }
  1154. // internal helper functions
  1155. func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16) {
  1156. rc.card--
  1157. ci := *curIndex
  1158. pos := *curPosInIndex
  1159. // are we first, last, or in the middle of our interval16?
  1160. switch {
  1161. case pos == 0:
  1162. if int64(rc.iv[ci].length) == 0 {
  1163. // our interval disappears
  1164. rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...)
  1165. // curIndex stays the same, since the delete did
  1166. // the advance for us.
  1167. *curPosInIndex = 0
  1168. } else {
  1169. rc.iv[ci].start++ // no longer overflowable
  1170. rc.iv[ci].length--
  1171. }
  1172. case pos == rc.iv[ci].length:
  1173. // length
  1174. rc.iv[ci].length--
  1175. // our interval16 cannot disappear, else we would have been pos == 0, case first above.
  1176. *curPosInIndex--
  1177. // if we leave *curIndex alone, then Next() will work properly even after the delete.
  1178. default:
  1179. //middle
  1180. // split into two, adding an interval16
  1181. new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1)
  1182. new1start := int64(rc.iv[ci].start+*curPosInIndex) + 1
  1183. if new1start > int64(MaxUint16) {
  1184. panic("overflow?!?!")
  1185. }
  1186. new1 := newInterval16Range(uint16(new1start), rc.iv[ci].last())
  1187. tail := append([]interval16{new0, new1}, rc.iv[ci+1:]...)
  1188. rc.iv = append(rc.iv[:ci], tail...)
  1189. // update curIndex and curPosInIndex
  1190. *curIndex++
  1191. *curPosInIndex = 0
  1192. }
  1193. }
  1194. func have4Overlap16(astart, alast, bstart, blast int64) bool {
  1195. if alast+1 <= bstart {
  1196. return false
  1197. }
  1198. return blast+1 > astart
  1199. }
  1200. func intersectWithLeftover16(astart, alast, bstart, blast int64) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int64, intersection interval16) {
  1201. if !have4Overlap16(astart, alast, bstart, blast) {
  1202. return
  1203. }
  1204. isOverlap = true
  1205. // do the intersection:
  1206. if bstart > astart {
  1207. intersection.start = uint16(bstart)
  1208. } else {
  1209. intersection.start = uint16(astart)
  1210. }
  1211. switch {
  1212. case blast < alast:
  1213. isLeftoverA = true
  1214. leftoverstart = blast + 1
  1215. intersection.length = uint16(blast) - intersection.start
  1216. case alast < blast:
  1217. isLeftoverB = true
  1218. leftoverstart = alast + 1
  1219. intersection.length = uint16(alast) - intersection.start
  1220. default:
  1221. // alast == blast
  1222. intersection.length = uint16(alast) - intersection.start
  1223. }
  1224. return
  1225. }
  1226. func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) {
  1227. rc.myOpts.startIndex = startIndex
  1228. rc.myOpts.endxIndex = 0
  1229. w, _, _ := rc.search(key, &rc.myOpts)
  1230. // rc.search always returns w < len(rc.iv)
  1231. if w < startIndex {
  1232. // not found and comes before lower bound startIndex,
  1233. // so just use the lower bound.
  1234. if startIndex == int64(len(rc.iv)) {
  1235. // also this bump up means that we are done
  1236. return startIndex, true
  1237. }
  1238. return startIndex, false
  1239. }
  1240. return w, false
  1241. }
  1242. func sliceToString16(m []interval16) string {
  1243. s := ""
  1244. for i := range m {
  1245. s += fmt.Sprintf("%v: %s, ", i, m[i])
  1246. }
  1247. return s
  1248. }
  1249. // selectInt16 returns the j-th value in the container.
  1250. // We panic of j is out of bounds.
  1251. func (rc *runContainer16) selectInt16(j uint16) int {
  1252. n := rc.cardinality()
  1253. if int64(j) > n {
  1254. panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n))
  1255. }
  1256. var offset int64
  1257. for k := range rc.iv {
  1258. nextOffset := offset + rc.iv[k].runlen()
  1259. if nextOffset > int64(j) {
  1260. return int(int64(rc.iv[k].start) + (int64(j) - offset))
  1261. }
  1262. offset = nextOffset
  1263. }
  1264. panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n))
  1265. }
  1266. // helper for invert
  1267. func (rc *runContainer16) invertlastInterval(origin uint16, lastIdx int) []interval16 {
  1268. cur := rc.iv[lastIdx]
  1269. if cur.last() == MaxUint16 {
  1270. if cur.start == origin {
  1271. return nil // empty container
  1272. }
  1273. return []interval16{newInterval16Range(origin, cur.start-1)}
  1274. }
  1275. if cur.start == origin {
  1276. return []interval16{newInterval16Range(cur.last()+1, MaxUint16)}
  1277. }
  1278. // invert splits
  1279. return []interval16{
  1280. newInterval16Range(origin, cur.start-1),
  1281. newInterval16Range(cur.last()+1, MaxUint16),
  1282. }
  1283. }
  1284. // invert returns a new container (not inplace), that is
  1285. // the inversion of rc. For each bit b in rc, the
  1286. // returned value has !b
  1287. func (rc *runContainer16) invert() *runContainer16 {
  1288. ni := len(rc.iv)
  1289. var m []interval16
  1290. switch ni {
  1291. case 0:
  1292. return &runContainer16{iv: []interval16{newInterval16Range(0, MaxUint16)}}
  1293. case 1:
  1294. return &runContainer16{iv: rc.invertlastInterval(0, 0)}
  1295. }
  1296. var invstart int64
  1297. ult := ni - 1
  1298. for i, cur := range rc.iv {
  1299. if i == ult {
  1300. // invertlastInteval will add both intervals (b) and (c) in
  1301. // diagram below.
  1302. m = append(m, rc.invertlastInterval(uint16(invstart), i)...)
  1303. break
  1304. }
  1305. // INVAR: i and cur are not the last interval, there is a next at i+1
  1306. //
  1307. // ........[cur.start, cur.last] ...... [next.start, next.last]....
  1308. // ^ ^ ^
  1309. // (a) (b) (c)
  1310. //
  1311. // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it.
  1312. if cur.start > 0 {
  1313. m = append(m, newInterval16Range(uint16(invstart), cur.start-1))
  1314. }
  1315. invstart = int64(cur.last() + 1)
  1316. }
  1317. return &runContainer16{iv: m}
  1318. }
  1319. func (iv interval16) equal(b interval16) bool {
  1320. return iv.start == b.start && iv.length == b.length
  1321. }
  1322. func (iv interval16) isSuperSetOf(b interval16) bool {
  1323. return iv.start <= b.start && b.last() <= iv.last()
  1324. }
  1325. func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int64) {
  1326. isect, isEmpty := intersectInterval16s(iv, del)
  1327. if isEmpty {
  1328. return nil, 0
  1329. }
  1330. if del.isSuperSetOf(iv) {
  1331. return nil, iv.runlen()
  1332. }
  1333. switch {
  1334. case isect.start > iv.start && isect.last() < iv.last():
  1335. new0 := newInterval16Range(iv.start, isect.start-1)
  1336. new1 := newInterval16Range(isect.last()+1, iv.last())
  1337. return []interval16{new0, new1}, isect.runlen()
  1338. case isect.start == iv.start:
  1339. return []interval16{newInterval16Range(isect.last()+1, iv.last())}, isect.runlen()
  1340. default:
  1341. return []interval16{newInterval16Range(iv.start, isect.start-1)}, isect.runlen()
  1342. }
  1343. }
  1344. func (rc *runContainer16) isubtract(del interval16) {
  1345. origiv := make([]interval16, len(rc.iv))
  1346. copy(origiv, rc.iv)
  1347. n := int64(len(rc.iv))
  1348. if n == 0 {
  1349. return // already done.
  1350. }
  1351. _, isEmpty := intersectInterval16s(newInterval16Range(rc.iv[0].start, rc.iv[n-1].last()), del)
  1352. if isEmpty {
  1353. return // done
  1354. }
  1355. // INVAR there is some intersection between rc and del
  1356. istart, startAlready, _ := rc.search(int64(del.start), nil)
  1357. ilast, lastAlready, _ := rc.search(int64(del.last()), nil)
  1358. rc.card = -1
  1359. if istart == -1 {
  1360. if ilast == n-1 && !lastAlready {
  1361. rc.iv = nil
  1362. return
  1363. }
  1364. }
  1365. // some intervals will remain
  1366. switch {
  1367. case startAlready && lastAlready:
  1368. res0, _ := rc.iv[istart].subtractInterval(del)
  1369. // would overwrite values in iv b/c res0 can have len 2. so
  1370. // write to origiv instead.
  1371. lost := 1 + ilast - istart
  1372. changeSize := int64(len(res0)) - lost
  1373. newSize := int64(len(rc.iv)) + changeSize
  1374. // rc.iv = append(pre, caboose...)
  1375. // return
  1376. if ilast != istart {
  1377. res1, _ := rc.iv[ilast].subtractInterval(del)
  1378. res0 = append(res0, res1...)
  1379. changeSize = int64(len(res0)) - lost
  1380. newSize = int64(len(rc.iv)) + changeSize
  1381. }
  1382. switch {
  1383. case changeSize < 0:
  1384. // shrink
  1385. copy(rc.iv[istart+int64(len(res0)):], rc.iv[ilast+1:])
  1386. copy(rc.iv[istart:istart+int64(len(res0))], res0)
  1387. rc.iv = rc.iv[:newSize]
  1388. return
  1389. case changeSize == 0:
  1390. // stay the same
  1391. copy(rc.iv[istart:istart+int64(len(res0))], res0)
  1392. return
  1393. default:
  1394. // changeSize > 0 is only possible when ilast == istart.
  1395. // Hence we now know: changeSize == 1 and len(res0) == 2
  1396. rc.iv = append(rc.iv, interval16{})
  1397. // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize]
  1398. // copy the tail into place
  1399. copy(rc.iv[ilast+2:], rc.iv[ilast+1:])
  1400. // copy the new item(s) into place
  1401. copy(rc.iv[istart:istart+2], res0)
  1402. return
  1403. }
  1404. case !startAlready && !lastAlready:
  1405. // we get to discard whole intervals
  1406. // from the search() definition:
  1407. // if del.start is not present, then istart is
  1408. // set as follows:
  1409. //
  1410. // a) istart == n-1 if del.start is beyond our
  1411. // last interval16 in rc.iv;
  1412. //
  1413. // b) istart == -1 if del.start is before our first
  1414. // interval16 in rc.iv;
  1415. //
  1416. // c) istart is set to the minimum index of rc.iv
  1417. // which comes strictly before the del.start;
  1418. // so del.start > rc.iv[istart].last,
  1419. // and if istart+1 exists, then del.start < rc.iv[istart+1].startx
  1420. // if del.last is not present, then ilast is
  1421. // set as follows:
  1422. //
  1423. // a) ilast == n-1 if del.last is beyond our
  1424. // last interval16 in rc.iv;
  1425. //
  1426. // b) ilast == -1 if del.last is before our first
  1427. // interval16 in rc.iv;
  1428. //
  1429. // c) ilast is set to the minimum index of rc.iv
  1430. // which comes strictly before the del.last;
  1431. // so del.last > rc.iv[ilast].last,
  1432. // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start
  1433. // INVAR: istart >= 0
  1434. pre := rc.iv[:istart+1]
  1435. if ilast == n-1 {
  1436. rc.iv = pre
  1437. return
  1438. }
  1439. // INVAR: ilast < n-1
  1440. lost := ilast - istart
  1441. changeSize := -lost
  1442. newSize := int64(len(rc.iv)) + changeSize
  1443. if changeSize != 0 {
  1444. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1445. }
  1446. rc.iv = rc.iv[:newSize]
  1447. return
  1448. case startAlready && !lastAlready:
  1449. // we can only shrink or stay the same size
  1450. // i.e. we either eliminate the whole interval,
  1451. // or just cut off the right side.
  1452. res0, _ := rc.iv[istart].subtractInterval(del)
  1453. if len(res0) > 0 {
  1454. // len(res) must be 1
  1455. rc.iv[istart] = res0[0]
  1456. }
  1457. lost := 1 + (ilast - istart)
  1458. changeSize := int64(len(res0)) - lost
  1459. newSize := int64(len(rc.iv)) + changeSize
  1460. if changeSize != 0 {
  1461. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1462. }
  1463. rc.iv = rc.iv[:newSize]
  1464. return
  1465. case !startAlready && lastAlready:
  1466. // we can only shrink or stay the same size
  1467. res1, _ := rc.iv[ilast].subtractInterval(del)
  1468. lost := ilast - istart
  1469. changeSize := int64(len(res1)) - lost
  1470. newSize := int64(len(rc.iv)) + changeSize
  1471. if changeSize != 0 {
  1472. // move the tail first to make room for res1
  1473. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1474. }
  1475. copy(rc.iv[istart+1:], res1)
  1476. rc.iv = rc.iv[:newSize]
  1477. return
  1478. }
  1479. }
  1480. // compute rc minus b, and return the result as a new value (not inplace).
  1481. // port of run_container_andnot from CRoaring...
  1482. // https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496
  1483. func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 {
  1484. if len(b.iv) == 0 || len(rc.iv) == 0 {
  1485. return rc
  1486. }
  1487. dst := newRunContainer16()
  1488. apos := 0
  1489. bpos := 0
  1490. a := rc
  1491. astart := a.iv[apos].start
  1492. alast := a.iv[apos].last()
  1493. bstart := b.iv[bpos].start
  1494. blast := b.iv[bpos].last()
  1495. alen := len(a.iv)
  1496. blen := len(b.iv)
  1497. for apos < alen && bpos < blen {
  1498. switch {
  1499. case alast < bstart:
  1500. // output the first run
  1501. dst.iv = append(dst.iv, newInterval16Range(astart, alast))
  1502. apos++
  1503. if apos < alen {
  1504. astart = a.iv[apos].start
  1505. alast = a.iv[apos].last()
  1506. }
  1507. case blast < astart:
  1508. // exit the second run
  1509. bpos++
  1510. if bpos < blen {
  1511. bstart = b.iv[bpos].start
  1512. blast = b.iv[bpos].last()
  1513. }
  1514. default:
  1515. // a: [ ]
  1516. // b: [ ]
  1517. // alast >= bstart
  1518. // blast >= astart
  1519. if astart < bstart {
  1520. dst.iv = append(dst.iv, newInterval16Range(astart, bstart-1))
  1521. }
  1522. if alast > blast {
  1523. astart = blast + 1
  1524. } else {
  1525. apos++
  1526. if apos < alen {
  1527. astart = a.iv[apos].start
  1528. alast = a.iv[apos].last()
  1529. }
  1530. }
  1531. }
  1532. }
  1533. if apos < alen {
  1534. dst.iv = append(dst.iv, newInterval16Range(astart, alast))
  1535. apos++
  1536. if apos < alen {
  1537. dst.iv = append(dst.iv, a.iv[apos:]...)
  1538. }
  1539. }
  1540. return dst
  1541. }
  1542. func (rc *runContainer16) numberOfRuns() (nr int) {
  1543. return len(rc.iv)
  1544. }
  1545. func (rc *runContainer16) containerType() contype {
  1546. return run16Contype
  1547. }
  1548. func (rc *runContainer16) equals16(srb *runContainer16) bool {
  1549. // Check if the containers are the same object.
  1550. if rc == srb {
  1551. return true
  1552. }
  1553. if len(srb.iv) != len(rc.iv) {
  1554. return false
  1555. }
  1556. for i, v := range rc.iv {
  1557. if v != srb.iv[i] {
  1558. return false
  1559. }
  1560. }
  1561. return true
  1562. }
  1563. // compile time verify we meet interface requirements
  1564. var _ container = &runContainer16{}
  1565. func (rc *runContainer16) clone() container {
  1566. return newRunContainer16CopyIv(rc.iv)
  1567. }
  1568. func (rc *runContainer16) minimum() uint16 {
  1569. return rc.iv[0].start // assume not empty
  1570. }
  1571. func (rc *runContainer16) maximum() uint16 {
  1572. return rc.iv[len(rc.iv)-1].last() // assume not empty
  1573. }
  1574. func (rc *runContainer16) isFull() bool {
  1575. return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
  1576. }
  1577. func (rc *runContainer16) and(a container) container {
  1578. if rc.isFull() {
  1579. return a.clone()
  1580. }
  1581. switch c := a.(type) {
  1582. case *runContainer16:
  1583. return rc.intersect(c)
  1584. case *arrayContainer:
  1585. return rc.andArray(c)
  1586. case *bitmapContainer:
  1587. return rc.andBitmapContainer(c)
  1588. }
  1589. panic("unsupported container type")
  1590. }
  1591. func (rc *runContainer16) andCardinality(a container) int {
  1592. switch c := a.(type) {
  1593. case *runContainer16:
  1594. return int(rc.intersectCardinality(c))
  1595. case *arrayContainer:
  1596. return rc.andArrayCardinality(c)
  1597. case *bitmapContainer:
  1598. return rc.andBitmapContainerCardinality(c)
  1599. }
  1600. panic("unsupported container type")
  1601. }
  1602. // andBitmapContainer finds the intersection of rc and b.
  1603. func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container {
  1604. bc2 := newBitmapContainerFromRun(rc)
  1605. return bc2.andBitmap(bc)
  1606. }
  1607. func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int {
  1608. pos := 0
  1609. answer := 0
  1610. maxpos := ac.getCardinality()
  1611. if maxpos == 0 {
  1612. return 0 // won't happen in actual code
  1613. }
  1614. v := ac.content[pos]
  1615. mainloop:
  1616. for _, p := range rc.iv {
  1617. for v < p.start {
  1618. pos++
  1619. if pos == maxpos {
  1620. break mainloop
  1621. }
  1622. v = ac.content[pos]
  1623. }
  1624. for v <= p.last() {
  1625. answer++
  1626. pos++
  1627. if pos == maxpos {
  1628. break mainloop
  1629. }
  1630. v = ac.content[pos]
  1631. }
  1632. }
  1633. return answer
  1634. }
  1635. func (rc *runContainer16) iand(a container) container {
  1636. if rc.isFull() {
  1637. return a.clone()
  1638. }
  1639. switch c := a.(type) {
  1640. case *runContainer16:
  1641. return rc.inplaceIntersect(c)
  1642. case *arrayContainer:
  1643. return rc.andArray(c)
  1644. case *bitmapContainer:
  1645. return rc.iandBitmapContainer(c)
  1646. }
  1647. panic("unsupported container type")
  1648. }
  1649. func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
  1650. // TODO: optimize by doing less allocation, possibly?
  1651. // sect will be new
  1652. sect := rc.intersect(rc2)
  1653. *rc = *sect
  1654. return rc
  1655. }
  1656. func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
  1657. isect := rc.andBitmapContainer(bc)
  1658. *rc = *newRunContainer16FromContainer(isect)
  1659. return rc
  1660. }
  1661. func (rc *runContainer16) andArray(ac *arrayContainer) container {
  1662. if len(rc.iv) == 0 {
  1663. return newArrayContainer()
  1664. }
  1665. acCardinality := ac.getCardinality()
  1666. c := newArrayContainerCapacity(acCardinality)
  1667. for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; {
  1668. iv := rc.iv[rlePos]
  1669. arrayVal := ac.content[arrayPos]
  1670. for iv.last() < arrayVal {
  1671. rlePos++
  1672. if rlePos == len(rc.iv) {
  1673. return c
  1674. }
  1675. iv = rc.iv[rlePos]
  1676. }
  1677. if iv.start > arrayVal {
  1678. arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start)
  1679. } else {
  1680. c.content = append(c.content, arrayVal)
  1681. arrayPos++
  1682. }
  1683. }
  1684. return c
  1685. }
  1686. func (rc *runContainer16) andNot(a container) container {
  1687. switch c := a.(type) {
  1688. case *arrayContainer:
  1689. return rc.andNotArray(c)
  1690. case *bitmapContainer:
  1691. return rc.andNotBitmap(c)
  1692. case *runContainer16:
  1693. return rc.andNotRunContainer16(c)
  1694. }
  1695. panic("unsupported container type")
  1696. }
  1697. func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
  1698. k := 0
  1699. var val int64
  1700. for _, p := range rc.iv {
  1701. n := p.runlen()
  1702. for j := int64(0); j < n; j++ {
  1703. val = int64(p.start) + j
  1704. x[k+i] = uint32(val) | mask
  1705. k++
  1706. }
  1707. }
  1708. }
  1709. func (rc *runContainer16) getShortIterator() shortPeekable {
  1710. return rc.newRunIterator16()
  1711. }
  1712. func (rc *runContainer16) getReverseIterator() shortIterable {
  1713. return rc.newRunReverseIterator16()
  1714. }
  1715. func (rc *runContainer16) getManyIterator() manyIterable {
  1716. return rc.newManyRunIterator16()
  1717. }
  1718. // add the values in the range [firstOfRange, endx). endx
  1719. // is still abe to express 2^16 because it is an int not an uint16.
  1720. func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
  1721. if firstOfRange >= endx {
  1722. panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx))
  1723. }
  1724. addme := newRunContainer16TakeOwnership([]interval16{
  1725. {
  1726. start: uint16(firstOfRange),
  1727. length: uint16(endx - 1 - firstOfRange),
  1728. },
  1729. })
  1730. *rc = *rc.union(addme)
  1731. return rc
  1732. }
  1733. // remove the values in the range [firstOfRange,endx)
  1734. func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container {
  1735. if firstOfRange >= endx {
  1736. panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+
  1737. " nothing to do.", firstOfRange, endx))
  1738. //return rc
  1739. }
  1740. x := newInterval16Range(uint16(firstOfRange), uint16(endx-1))
  1741. rc.isubtract(x)
  1742. return rc
  1743. }
  1744. // not flip the values in the range [firstOfRange,endx)
  1745. func (rc *runContainer16) not(firstOfRange, endx int) container {
  1746. if firstOfRange >= endx {
  1747. panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
  1748. }
  1749. return rc.Not(firstOfRange, endx)
  1750. }
  1751. // Not flips the values in the range [firstOfRange,endx).
  1752. // This is not inplace. Only the returned value has the flipped bits.
  1753. //
  1754. // Currently implemented as (!A intersect B) union (A minus B),
  1755. // where A is rc, and B is the supplied [firstOfRange, endx) interval.
  1756. //
  1757. // TODO(time optimization): convert this to a single pass
  1758. // algorithm by copying AndNotRunContainer16() and modifying it.
  1759. // Current routine is correct but
  1760. // makes 2 more passes through the arrays than should be
  1761. // strictly necessary. Measure both ways though--this may not matter.
  1762. //
  1763. func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
  1764. if firstOfRange >= endx {
  1765. panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange))
  1766. }
  1767. if firstOfRange >= endx {
  1768. return rc.Clone()
  1769. }
  1770. a := rc
  1771. // algo:
  1772. // (!A intersect B) union (A minus B)
  1773. nota := a.invert()
  1774. bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))}
  1775. b := newRunContainer16TakeOwnership(bs)
  1776. notAintersectB := nota.intersect(b)
  1777. aMinusB := a.AndNotRunContainer16(b)
  1778. rc2 := notAintersectB.union(aMinusB)
  1779. return rc2
  1780. }
  1781. // equals is now logical equals; it does not require the
  1782. // same underlying container type.
  1783. func (rc *runContainer16) equals(o container) bool {
  1784. srb, ok := o.(*runContainer16)
  1785. if !ok {
  1786. // maybe value instead of pointer
  1787. val, valok := o.(*runContainer16)
  1788. if valok {
  1789. srb = val
  1790. ok = true
  1791. }
  1792. }
  1793. if ok {
  1794. // Check if the containers are the same object.
  1795. if rc == srb {
  1796. return true
  1797. }
  1798. if len(srb.iv) != len(rc.iv) {
  1799. return false
  1800. }
  1801. for i, v := range rc.iv {
  1802. if v != srb.iv[i] {
  1803. return false
  1804. }
  1805. }
  1806. return true
  1807. }
  1808. // use generic comparison
  1809. if o.getCardinality() != rc.getCardinality() {
  1810. return false
  1811. }
  1812. rit := rc.getShortIterator()
  1813. bit := o.getShortIterator()
  1814. //k := 0
  1815. for rit.hasNext() {
  1816. if bit.next() != rit.next() {
  1817. return false
  1818. }
  1819. //k++
  1820. }
  1821. return true
  1822. }
  1823. func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
  1824. rc.Add(x)
  1825. return rc
  1826. }
  1827. func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
  1828. return rc.Add(x)
  1829. }
  1830. func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
  1831. rc.removeKey(x)
  1832. return rc
  1833. }
  1834. func (rc *runContainer16) iremove(x uint16) bool {
  1835. return rc.removeKey(x)
  1836. }
  1837. func (rc *runContainer16) or(a container) container {
  1838. if rc.isFull() {
  1839. return rc.clone()
  1840. }
  1841. switch c := a.(type) {
  1842. case *runContainer16:
  1843. return rc.union(c)
  1844. case *arrayContainer:
  1845. return rc.orArray(c)
  1846. case *bitmapContainer:
  1847. return rc.orBitmapContainer(c)
  1848. }
  1849. panic("unsupported container type")
  1850. }
  1851. func (rc *runContainer16) orCardinality(a container) int {
  1852. switch c := a.(type) {
  1853. case *runContainer16:
  1854. return int(rc.unionCardinality(c))
  1855. case *arrayContainer:
  1856. return rc.orArrayCardinality(c)
  1857. case *bitmapContainer:
  1858. return rc.orBitmapContainerCardinality(c)
  1859. }
  1860. panic("unsupported container type")
  1861. }
  1862. // orBitmapContainer finds the union of rc and bc.
  1863. func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container {
  1864. bc2 := newBitmapContainerFromRun(rc)
  1865. return bc2.iorBitmap(bc)
  1866. }
  1867. func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int {
  1868. answer := 0
  1869. for i := range rc.iv {
  1870. answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
  1871. }
  1872. //bc.computeCardinality()
  1873. return answer
  1874. }
  1875. func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int {
  1876. return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc)
  1877. }
  1878. // orArray finds the union of rc and ac.
  1879. func (rc *runContainer16) orArray(ac *arrayContainer) container {
  1880. bc1 := newBitmapContainerFromRun(rc)
  1881. bc2 := ac.toBitmapContainer()
  1882. return bc1.orBitmap(bc2)
  1883. }
  1884. // orArray finds the union of rc and ac.
  1885. func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int {
  1886. return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac)
  1887. }
  1888. func (rc *runContainer16) ior(a container) container {
  1889. if rc.isFull() {
  1890. return rc
  1891. }
  1892. switch c := a.(type) {
  1893. case *runContainer16:
  1894. return rc.inplaceUnion(c)
  1895. case *arrayContainer:
  1896. return rc.iorArray(c)
  1897. case *bitmapContainer:
  1898. return rc.iorBitmapContainer(c)
  1899. }
  1900. panic("unsupported container type")
  1901. }
  1902. func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
  1903. for _, p := range rc2.iv {
  1904. last := int64(p.last())
  1905. for i := int64(p.start); i <= last; i++ {
  1906. rc.Add(uint16(i))
  1907. }
  1908. }
  1909. return rc
  1910. }
  1911. func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
  1912. it := bc.getShortIterator()
  1913. for it.hasNext() {
  1914. rc.Add(it.next())
  1915. }
  1916. return rc
  1917. }
  1918. func (rc *runContainer16) iorArray(ac *arrayContainer) container {
  1919. it := ac.getShortIterator()
  1920. for it.hasNext() {
  1921. rc.Add(it.next())
  1922. }
  1923. return rc
  1924. }
  1925. // lazyIOR is described (not yet implemented) in
  1926. // this nice note from @lemire on
  1927. // https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
  1928. //
  1929. // Description of lazyOR and lazyIOR from @lemire:
  1930. //
  1931. // Lazy functions are optional and can be simply
  1932. // wrapper around non-lazy functions.
  1933. //
  1934. // The idea of "laziness" is as follows. It is
  1935. // inspired by the concept of lazy evaluation
  1936. // you might be familiar with (functional programming
  1937. // and all that). So a roaring bitmap is
  1938. // such that all its containers are, in some
  1939. // sense, chosen to use as little memory as
  1940. // possible. This is nice. Also, all bitsets
  1941. // are "cardinality aware" so that you can do
  1942. // fast rank/select queries, or query the
  1943. // cardinality of the whole bitmap... very fast,
  1944. // without latency.
  1945. //
  1946. // However, imagine that you are aggregating 100
  1947. // bitmaps together. So you OR the first two, then OR
  1948. // that with the third one and so forth. Clearly,
  1949. // intermediate bitmaps don't need to be as
  1950. // compressed as possible, right? They can be
  1951. // in a "dirty state". You only need the end
  1952. // result to be in a nice state... which you
  1953. // can achieve by calling repairAfterLazy at the end.
  1954. //
  1955. // The Java/C code does something special for
  1956. // the in-place lazy OR runs. The idea is that
  1957. // instead of taking two run containers and
  1958. // generating a new one, we actually try to
  1959. // do the computation in-place through a
  1960. // technique invented by @gssiyankai (pinging him!).
  1961. // What you do is you check whether the host
  1962. // run container has lots of extra capacity.
  1963. // If it does, you move its data at the end of
  1964. // the backing array, and then you write
  1965. // the answer at the beginning. What this
  1966. // trick does is minimize memory allocations.
  1967. //
  1968. func (rc *runContainer16) lazyIOR(a container) container {
  1969. // not lazy at the moment
  1970. return rc.ior(a)
  1971. }
  1972. // lazyOR is described above in lazyIOR.
  1973. func (rc *runContainer16) lazyOR(a container) container {
  1974. // not lazy at the moment
  1975. return rc.or(a)
  1976. }
  1977. func (rc *runContainer16) intersects(a container) bool {
  1978. // TODO: optimize by doing inplace/less allocation, possibly?
  1979. isect := rc.and(a)
  1980. return isect.getCardinality() > 0
  1981. }
  1982. func (rc *runContainer16) xor(a container) container {
  1983. switch c := a.(type) {
  1984. case *arrayContainer:
  1985. return rc.xorArray(c)
  1986. case *bitmapContainer:
  1987. return rc.xorBitmap(c)
  1988. case *runContainer16:
  1989. return rc.xorRunContainer16(c)
  1990. }
  1991. panic("unsupported container type")
  1992. }
  1993. func (rc *runContainer16) iandNot(a container) container {
  1994. switch c := a.(type) {
  1995. case *arrayContainer:
  1996. return rc.iandNotArray(c)
  1997. case *bitmapContainer:
  1998. return rc.iandNotBitmap(c)
  1999. case *runContainer16:
  2000. return rc.iandNotRunContainer16(c)
  2001. }
  2002. panic("unsupported container type")
  2003. }
  2004. // flip the values in the range [firstOfRange,endx)
  2005. func (rc *runContainer16) inot(firstOfRange, endx int) container {
  2006. if firstOfRange >= endx {
  2007. panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
  2008. }
  2009. // TODO: minimize copies, do it all inplace; not() makes a copy.
  2010. rc = rc.Not(firstOfRange, endx)
  2011. return rc
  2012. }
  2013. func (rc *runContainer16) getCardinality() int {
  2014. return int(rc.cardinality())
  2015. }
  2016. func (rc *runContainer16) rank(x uint16) int {
  2017. n := int64(len(rc.iv))
  2018. xx := int64(x)
  2019. w, already, _ := rc.search(xx, nil)
  2020. if w < 0 {
  2021. return 0
  2022. }
  2023. if !already && w == n-1 {
  2024. return rc.getCardinality()
  2025. }
  2026. var rnk int64
  2027. if !already {
  2028. for i := int64(0); i <= w; i++ {
  2029. rnk += rc.iv[i].runlen()
  2030. }
  2031. return int(rnk)
  2032. }
  2033. for i := int64(0); i < w; i++ {
  2034. rnk += rc.iv[i].runlen()
  2035. }
  2036. rnk += int64(x-rc.iv[w].start) + 1
  2037. return int(rnk)
  2038. }
  2039. func (rc *runContainer16) selectInt(x uint16) int {
  2040. return rc.selectInt16(x)
  2041. }
  2042. func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container {
  2043. return rc.AndNotRunContainer16(b)
  2044. }
  2045. func (rc *runContainer16) andNotArray(ac *arrayContainer) container {
  2046. rcb := rc.toBitmapContainer()
  2047. acb := ac.toBitmapContainer()
  2048. return rcb.andNotBitmap(acb)
  2049. }
  2050. func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
  2051. rcb := rc.toBitmapContainer()
  2052. return rcb.andNotBitmap(bc)
  2053. }
  2054. func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
  2055. bc := newBitmapContainer()
  2056. for i := range rc.iv {
  2057. bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
  2058. }
  2059. bc.computeCardinality()
  2060. return bc
  2061. }
  2062. func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
  2063. rcb := rc.toBitmapContainer()
  2064. x2b := x2.toBitmapContainer()
  2065. rcb.iandNotBitmapSurely(x2b)
  2066. // TODO: check size and optimize the return value
  2067. // TODO: is inplace modification really required? If not, elide the copy.
  2068. rc2 := newRunContainer16FromBitmapContainer(rcb)
  2069. *rc = *rc2
  2070. return rc
  2071. }
  2072. func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
  2073. rcb := rc.toBitmapContainer()
  2074. acb := ac.toBitmapContainer()
  2075. rcb.iandNotBitmapSurely(acb)
  2076. // TODO: check size and optimize the return value
  2077. // TODO: is inplace modification really required? If not, elide the copy.
  2078. rc2 := newRunContainer16FromBitmapContainer(rcb)
  2079. *rc = *rc2
  2080. return rc
  2081. }
  2082. func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
  2083. rcb := rc.toBitmapContainer()
  2084. rcb.iandNotBitmapSurely(bc)
  2085. // TODO: check size and optimize the return value
  2086. // TODO: is inplace modification really required? If not, elide the copy.
  2087. rc2 := newRunContainer16FromBitmapContainer(rcb)
  2088. *rc = *rc2
  2089. return rc
  2090. }
  2091. func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
  2092. rcb := rc.toBitmapContainer()
  2093. x2b := x2.toBitmapContainer()
  2094. return rcb.xorBitmap(x2b)
  2095. }
  2096. func (rc *runContainer16) xorArray(ac *arrayContainer) container {
  2097. rcb := rc.toBitmapContainer()
  2098. acb := ac.toBitmapContainer()
  2099. return rcb.xorBitmap(acb)
  2100. }
  2101. func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container {
  2102. rcb := rc.toBitmapContainer()
  2103. return rcb.xorBitmap(bc)
  2104. }
  2105. // convert to bitmap or array *if needed*
  2106. func (rc *runContainer16) toEfficientContainer() container {
  2107. // runContainer16SerializedSizeInBytes(numRuns)
  2108. sizeAsRunContainer := rc.getSizeInBytes()
  2109. sizeAsBitmapContainer := bitmapContainerSizeInBytes()
  2110. card := int(rc.cardinality())
  2111. sizeAsArrayContainer := arrayContainerSizeInBytes(card)
  2112. if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
  2113. return rc
  2114. }
  2115. if card <= arrayDefaultMaxSize {
  2116. return rc.toArrayContainer()
  2117. }
  2118. bc := newBitmapContainerFromRun(rc)
  2119. return bc
  2120. }
  2121. func (rc *runContainer16) toArrayContainer() *arrayContainer {
  2122. ac := newArrayContainer()
  2123. for i := range rc.iv {
  2124. ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
  2125. }
  2126. return ac
  2127. }
  2128. func newRunContainer16FromContainer(c container) *runContainer16 {
  2129. switch x := c.(type) {
  2130. case *runContainer16:
  2131. return x.Clone()
  2132. case *arrayContainer:
  2133. return newRunContainer16FromArray(x)
  2134. case *bitmapContainer:
  2135. return newRunContainer16FromBitmapContainer(x)
  2136. }
  2137. panic("unsupported container type")
  2138. }
  2139. // And finds the intersection of rc and b.
  2140. func (rc *runContainer16) And(b *Bitmap) *Bitmap {
  2141. out := NewBitmap()
  2142. for _, p := range rc.iv {
  2143. plast := p.last()
  2144. for i := p.start; i <= plast; i++ {
  2145. if b.Contains(uint32(i)) {
  2146. out.Add(uint32(i))
  2147. }
  2148. }
  2149. }
  2150. return out
  2151. }
  2152. // Xor returns the exclusive-or of rc and b.
  2153. func (rc *runContainer16) Xor(b *Bitmap) *Bitmap {
  2154. out := b.Clone()
  2155. for _, p := range rc.iv {
  2156. plast := p.last()
  2157. for v := p.start; v <= plast; v++ {
  2158. w := uint32(v)
  2159. if out.Contains(w) {
  2160. out.RemoveRange(uint64(w), uint64(w+1))
  2161. } else {
  2162. out.Add(w)
  2163. }
  2164. }
  2165. }
  2166. return out
  2167. }
  2168. // Or returns the union of rc and b.
  2169. func (rc *runContainer16) Or(b *Bitmap) *Bitmap {
  2170. out := b.Clone()
  2171. for _, p := range rc.iv {
  2172. plast := p.last()
  2173. for v := p.start; v <= plast; v++ {
  2174. out.Add(uint32(v))
  2175. }
  2176. }
  2177. return out
  2178. }
  2179. // serializedSizeInBytes returns the number of bytes of memory
  2180. // required by this runContainer16. This is for the
  2181. // Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/
  2182. func (rc *runContainer16) serializedSizeInBytes() int {
  2183. // number of runs in one uint16, then each run
  2184. // needs two more uint16
  2185. return 2 + len(rc.iv)*4
  2186. }
  2187. func (rc *runContainer16) addOffset(x uint16) []container {
  2188. low := newRunContainer16()
  2189. high := newRunContainer16()
  2190. for _, iv := range rc.iv {
  2191. val := int(iv.start) + int(x)
  2192. finalVal := int(val) + int(iv.length)
  2193. if val <= 0xffff {
  2194. if finalVal <= 0xffff {
  2195. low.iv = append(low.iv, interval16{uint16(val), iv.length})
  2196. } else {
  2197. low.iv = append(low.iv, interval16{uint16(val), uint16(0xffff - val)})
  2198. high.iv = append(high.iv, interval16{uint16(0), uint16(finalVal & 0xffff)})
  2199. }
  2200. } else {
  2201. high.iv = append(high.iv, interval16{uint16(val & 0xffff), iv.length})
  2202. }
  2203. }
  2204. return []container{low, high}
  2205. }