You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

snapshot_index.go 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package scorch
  15. import (
  16. "container/heap"
  17. "encoding/binary"
  18. "fmt"
  19. "reflect"
  20. "sort"
  21. "sync"
  22. "sync/atomic"
  23. "github.com/RoaringBitmap/roaring"
  24. "github.com/blevesearch/bleve/v2/document"
  25. index "github.com/blevesearch/bleve_index_api"
  26. segment "github.com/blevesearch/scorch_segment_api/v2"
  27. "github.com/blevesearch/vellum"
  28. lev "github.com/blevesearch/vellum/levenshtein"
  29. )
  30. // re usable, threadsafe levenshtein builders
  31. var lb1, lb2 *lev.LevenshteinAutomatonBuilder
  32. type asynchSegmentResult struct {
  33. dict segment.TermDictionary
  34. dictItr segment.DictionaryIterator
  35. index int
  36. docs *roaring.Bitmap
  37. postings segment.PostingsList
  38. err error
  39. }
  40. var reflectStaticSizeIndexSnapshot int
  41. func init() {
  42. var is interface{} = IndexSnapshot{}
  43. reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
  44. var err error
  45. lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
  46. if err != nil {
  47. panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
  48. }
  49. lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
  50. if err != nil {
  51. panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
  52. }
  53. }
  54. type IndexSnapshot struct {
  55. parent *Scorch
  56. segment []*SegmentSnapshot
  57. offsets []uint64
  58. internal map[string][]byte
  59. epoch uint64
  60. size uint64
  61. creator string
  62. m sync.Mutex // Protects the fields that follow.
  63. refs int64
  64. m2 sync.Mutex // Protects the fields that follow.
  65. fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
  66. }
  67. func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
  68. return i.segment
  69. }
  70. func (i *IndexSnapshot) Internal() map[string][]byte {
  71. return i.internal
  72. }
  73. func (i *IndexSnapshot) AddRef() {
  74. i.m.Lock()
  75. i.refs++
  76. i.m.Unlock()
  77. }
  78. func (i *IndexSnapshot) DecRef() (err error) {
  79. i.m.Lock()
  80. i.refs--
  81. if i.refs == 0 {
  82. for _, s := range i.segment {
  83. if s != nil {
  84. err2 := s.segment.DecRef()
  85. if err == nil {
  86. err = err2
  87. }
  88. }
  89. }
  90. if i.parent != nil {
  91. go i.parent.AddEligibleForRemoval(i.epoch)
  92. }
  93. }
  94. i.m.Unlock()
  95. return err
  96. }
  97. func (i *IndexSnapshot) Close() error {
  98. return i.DecRef()
  99. }
  100. func (i *IndexSnapshot) Size() int {
  101. return int(i.size)
  102. }
  103. func (i *IndexSnapshot) updateSize() {
  104. i.size += uint64(reflectStaticSizeIndexSnapshot)
  105. for _, s := range i.segment {
  106. i.size += uint64(s.Size())
  107. }
  108. }
  109. func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
  110. makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
  111. randomLookup bool) (*IndexSnapshotFieldDict, error) {
  112. results := make(chan *asynchSegmentResult)
  113. for index, segment := range i.segment {
  114. go func(index int, segment *SegmentSnapshot) {
  115. dict, err := segment.segment.Dictionary(field)
  116. if err != nil {
  117. results <- &asynchSegmentResult{err: err}
  118. } else {
  119. if randomLookup {
  120. results <- &asynchSegmentResult{dict: dict}
  121. } else {
  122. results <- &asynchSegmentResult{dictItr: makeItr(dict)}
  123. }
  124. }
  125. }(index, segment)
  126. }
  127. var err error
  128. rv := &IndexSnapshotFieldDict{
  129. snapshot: i,
  130. cursors: make([]*segmentDictCursor, 0, len(i.segment)),
  131. }
  132. for count := 0; count < len(i.segment); count++ {
  133. asr := <-results
  134. if asr.err != nil && err == nil {
  135. err = asr.err
  136. } else {
  137. if !randomLookup {
  138. next, err2 := asr.dictItr.Next()
  139. if err2 != nil && err == nil {
  140. err = err2
  141. }
  142. if next != nil {
  143. rv.cursors = append(rv.cursors, &segmentDictCursor{
  144. itr: asr.dictItr,
  145. curr: *next,
  146. })
  147. }
  148. } else {
  149. rv.cursors = append(rv.cursors, &segmentDictCursor{
  150. dict: asr.dict,
  151. })
  152. }
  153. }
  154. }
  155. // after ensuring we've read all items on channel
  156. if err != nil {
  157. return nil, err
  158. }
  159. if !randomLookup {
  160. // prepare heap
  161. heap.Init(rv)
  162. }
  163. return rv, nil
  164. }
  165. func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
  166. return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
  167. return i.AutomatonIterator(nil, nil, nil)
  168. }, false)
  169. }
  170. // calculateExclusiveEndFromInclusiveEnd produces the next key
  171. // when sorting using memcmp style comparisons, suitable to
  172. // use as the end key in a traditional (inclusive, exclusive]
  173. // start/end range
  174. func calculateExclusiveEndFromInclusiveEnd(inclusiveEnd []byte) []byte {
  175. rv := inclusiveEnd
  176. if len(inclusiveEnd) > 0 {
  177. rv = make([]byte, len(inclusiveEnd))
  178. copy(rv, inclusiveEnd)
  179. if rv[len(rv)-1] < 0xff {
  180. // last byte can be incremented by one
  181. rv[len(rv)-1]++
  182. } else {
  183. // last byte is already 0xff, so append 0
  184. // next key is simply one byte longer
  185. rv = append(rv, 0x0)
  186. }
  187. }
  188. return rv
  189. }
  190. func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
  191. endTerm []byte) (index.FieldDict, error) {
  192. return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
  193. endTermExclusive := calculateExclusiveEndFromInclusiveEnd(endTerm)
  194. return i.AutomatonIterator(nil, startTerm, endTermExclusive)
  195. }, false)
  196. }
  197. // calculateExclusiveEndFromPrefix produces the first key that
  198. // does not have the same prefix as the input bytes, suitable
  199. // to use as the end key in a traditional (inclusive, exclusive]
  200. // start/end range
  201. func calculateExclusiveEndFromPrefix(in []byte) []byte {
  202. rv := make([]byte, len(in))
  203. copy(rv, in)
  204. for i := len(rv) - 1; i >= 0; i-- {
  205. rv[i] = rv[i] + 1
  206. if rv[i] != 0 {
  207. return rv // didn't overflow, so stop
  208. }
  209. }
  210. // all bytes were 0xff, so return nil
  211. // as there is no end key for this prefix
  212. return nil
  213. }
  214. func (i *IndexSnapshot) FieldDictPrefix(field string,
  215. termPrefix []byte) (index.FieldDict, error) {
  216. termPrefixEnd := calculateExclusiveEndFromPrefix(termPrefix)
  217. return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
  218. return i.AutomatonIterator(nil, termPrefix, termPrefixEnd)
  219. }, false)
  220. }
  221. func (i *IndexSnapshot) FieldDictRegexp(field string,
  222. termRegex string) (index.FieldDict, error) {
  223. // TODO: potential optimization where the literal prefix represents the,
  224. // entire regexp, allowing us to use PrefixIterator(prefixTerm)?
  225. a, prefixBeg, prefixEnd, err := parseRegexp(termRegex)
  226. if err != nil {
  227. return nil, err
  228. }
  229. return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
  230. return i.AutomatonIterator(a, prefixBeg, prefixEnd)
  231. }, false)
  232. }
  233. func (i *IndexSnapshot) getLevAutomaton(term string,
  234. fuzziness uint8) (vellum.Automaton, error) {
  235. if fuzziness == 1 {
  236. return lb1.BuildDfa(term, fuzziness)
  237. } else if fuzziness == 2 {
  238. return lb2.BuildDfa(term, fuzziness)
  239. }
  240. return nil, fmt.Errorf("fuzziness exceeds the max limit")
  241. }
  242. func (i *IndexSnapshot) FieldDictFuzzy(field string,
  243. term string, fuzziness int, prefix string) (index.FieldDict, error) {
  244. a, err := i.getLevAutomaton(term, uint8(fuzziness))
  245. if err != nil {
  246. return nil, err
  247. }
  248. var prefixBeg, prefixEnd []byte
  249. if prefix != "" {
  250. prefixBeg = []byte(prefix)
  251. prefixEnd = calculateExclusiveEndFromPrefix(prefixBeg)
  252. }
  253. return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
  254. return i.AutomatonIterator(a, prefixBeg, prefixEnd)
  255. }, false)
  256. }
  257. func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
  258. return i.newIndexSnapshotFieldDict(field, nil, true)
  259. }
  260. func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
  261. results := make(chan *asynchSegmentResult)
  262. for index, segment := range i.segment {
  263. go func(index int, segment *SegmentSnapshot) {
  264. results <- &asynchSegmentResult{
  265. index: index,
  266. docs: segment.DocNumbersLive(),
  267. }
  268. }(index, segment)
  269. }
  270. return i.newDocIDReader(results)
  271. }
  272. func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
  273. results := make(chan *asynchSegmentResult)
  274. for index, segment := range i.segment {
  275. go func(index int, segment *SegmentSnapshot) {
  276. docs, err := segment.DocNumbers(ids)
  277. if err != nil {
  278. results <- &asynchSegmentResult{err: err}
  279. } else {
  280. results <- &asynchSegmentResult{
  281. index: index,
  282. docs: docs,
  283. }
  284. }
  285. }(index, segment)
  286. }
  287. return i.newDocIDReader(results)
  288. }
  289. func (i *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index.DocIDReader, error) {
  290. rv := &IndexSnapshotDocIDReader{
  291. snapshot: i,
  292. iterators: make([]roaring.IntIterable, len(i.segment)),
  293. }
  294. var err error
  295. for count := 0; count < len(i.segment); count++ {
  296. asr := <-results
  297. if asr.err != nil {
  298. if err == nil {
  299. // returns the first error encountered
  300. err = asr.err
  301. }
  302. } else if err == nil {
  303. rv.iterators[asr.index] = asr.docs.Iterator()
  304. }
  305. }
  306. if err != nil {
  307. return nil, err
  308. }
  309. return rv, nil
  310. }
  311. func (i *IndexSnapshot) Fields() ([]string, error) {
  312. // FIXME not making this concurrent for now as it's not used in hot path
  313. // of any searches at the moment (just a debug aid)
  314. fieldsMap := map[string]struct{}{}
  315. for _, segment := range i.segment {
  316. fields := segment.Fields()
  317. for _, field := range fields {
  318. fieldsMap[field] = struct{}{}
  319. }
  320. }
  321. rv := make([]string, 0, len(fieldsMap))
  322. for k := range fieldsMap {
  323. rv = append(rv, k)
  324. }
  325. return rv, nil
  326. }
  327. func (i *IndexSnapshot) GetInternal(key []byte) ([]byte, error) {
  328. return i.internal[string(key)], nil
  329. }
  330. func (i *IndexSnapshot) DocCount() (uint64, error) {
  331. var rv uint64
  332. for _, segment := range i.segment {
  333. rv += segment.Count()
  334. }
  335. return rv, nil
  336. }
  337. func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) {
  338. // FIXME could be done more efficiently directly, but reusing for simplicity
  339. tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
  340. if err != nil {
  341. return nil, err
  342. }
  343. defer func() {
  344. if cerr := tfr.Close(); err == nil && cerr != nil {
  345. err = cerr
  346. }
  347. }()
  348. next, err := tfr.Next(nil)
  349. if err != nil {
  350. return nil, err
  351. }
  352. if next == nil {
  353. // no such doc exists
  354. return nil, nil
  355. }
  356. docNum, err := docInternalToNumber(next.ID)
  357. if err != nil {
  358. return nil, err
  359. }
  360. segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
  361. rvd := document.NewDocument(id)
  362. err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool {
  363. if name == "_id" {
  364. return true
  365. }
  366. // copy value, array positions to preserve them beyond the scope of this callback
  367. value := append([]byte(nil), val...)
  368. arrayPos := append([]uint64(nil), pos...)
  369. switch typ {
  370. case 't':
  371. rvd.AddField(document.NewTextField(name, arrayPos, value))
  372. case 'n':
  373. rvd.AddField(document.NewNumericFieldFromBytes(name, arrayPos, value))
  374. case 'd':
  375. rvd.AddField(document.NewDateTimeFieldFromBytes(name, arrayPos, value))
  376. case 'b':
  377. rvd.AddField(document.NewBooleanFieldFromBytes(name, arrayPos, value))
  378. case 'g':
  379. rvd.AddField(document.NewGeoPointFieldFromBytes(name, arrayPos, value))
  380. }
  381. return true
  382. })
  383. if err != nil {
  384. return nil, err
  385. }
  386. return rvd, nil
  387. }
  388. func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) {
  389. segmentIndex := sort.Search(len(i.offsets),
  390. func(x int) bool {
  391. return i.offsets[x] > docNum
  392. }) - 1
  393. localDocNum := docNum - i.offsets[segmentIndex]
  394. return int(segmentIndex), localDocNum
  395. }
  396. func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
  397. docNum, err := docInternalToNumber(id)
  398. if err != nil {
  399. return "", err
  400. }
  401. segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
  402. v, err := i.segment[segmentIndex].DocID(localDocNum)
  403. if err != nil {
  404. return "", err
  405. }
  406. if v == nil {
  407. return "", fmt.Errorf("document number %d not found", docNum)
  408. }
  409. return string(v), nil
  410. }
  411. func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) {
  412. // FIXME could be done more efficiently directly, but reusing for simplicity
  413. tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
  414. if err != nil {
  415. return nil, err
  416. }
  417. defer func() {
  418. if cerr := tfr.Close(); err == nil && cerr != nil {
  419. err = cerr
  420. }
  421. }()
  422. next, err := tfr.Next(nil)
  423. if err != nil || next == nil {
  424. return nil, err
  425. }
  426. return next.ID, nil
  427. }
  428. func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
  429. includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
  430. rv := i.allocTermFieldReaderDicts(field)
  431. rv.term = term
  432. rv.field = field
  433. rv.snapshot = i
  434. if rv.postings == nil {
  435. rv.postings = make([]segment.PostingsList, len(i.segment))
  436. }
  437. if rv.iterators == nil {
  438. rv.iterators = make([]segment.PostingsIterator, len(i.segment))
  439. }
  440. rv.segmentOffset = 0
  441. rv.includeFreq = includeFreq
  442. rv.includeNorm = includeNorm
  443. rv.includeTermVectors = includeTermVectors
  444. rv.currPosting = nil
  445. rv.currID = rv.currID[:0]
  446. if rv.dicts == nil {
  447. rv.dicts = make([]segment.TermDictionary, len(i.segment))
  448. for i, segment := range i.segment {
  449. dict, err := segment.segment.Dictionary(field)
  450. if err != nil {
  451. return nil, err
  452. }
  453. rv.dicts[i] = dict
  454. }
  455. }
  456. for i, segment := range i.segment {
  457. pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i])
  458. if err != nil {
  459. return nil, err
  460. }
  461. rv.postings[i] = pl
  462. rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i])
  463. }
  464. atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1))
  465. return rv, nil
  466. }
  467. func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) {
  468. i.m2.Lock()
  469. if i.fieldTFRs != nil {
  470. tfrs := i.fieldTFRs[field]
  471. last := len(tfrs) - 1
  472. if last >= 0 {
  473. tfr = tfrs[last]
  474. tfrs[last] = nil
  475. i.fieldTFRs[field] = tfrs[:last]
  476. i.m2.Unlock()
  477. return
  478. }
  479. }
  480. i.m2.Unlock()
  481. return &IndexSnapshotTermFieldReader{
  482. recycle: true,
  483. }
  484. }
  485. func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
  486. if !tfr.recycle {
  487. // Do not recycle an optimized unadorned term field reader (used for
  488. // ConjunctionUnadorned or DisjunctionUnadorned), during when a fresh
  489. // roaring.Bitmap is built by AND-ing or OR-ing individual bitmaps,
  490. // and we'll need to release them for GC. (See MB-40916)
  491. return
  492. }
  493. i.parent.rootLock.RLock()
  494. obsolete := i.parent.root != i
  495. i.parent.rootLock.RUnlock()
  496. if obsolete {
  497. // if we're not the current root (mutations happened), don't bother recycling
  498. return
  499. }
  500. i.m2.Lock()
  501. if i.fieldTFRs == nil {
  502. i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
  503. }
  504. i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
  505. i.m2.Unlock()
  506. }
  507. func docNumberToBytes(buf []byte, in uint64) []byte {
  508. if len(buf) != 8 {
  509. if cap(buf) >= 8 {
  510. buf = buf[0:8]
  511. } else {
  512. buf = make([]byte, 8)
  513. }
  514. }
  515. binary.BigEndian.PutUint64(buf, in)
  516. return buf
  517. }
  518. func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
  519. if len(in) != 8 {
  520. return 0, fmt.Errorf("wrong len for IndexInternalID: %q", in)
  521. }
  522. return binary.BigEndian.Uint64(in), nil
  523. }
  524. func (i *IndexSnapshot) documentVisitFieldTermsOnSegment(
  525. segmentIndex int, localDocNum uint64, fields []string, cFields []string,
  526. visitor index.DocValueVisitor, dvs segment.DocVisitState) (
  527. cFieldsOut []string, dvsOut segment.DocVisitState, err error) {
  528. ss := i.segment[segmentIndex]
  529. var vFields []string // fields that are visitable via the segment
  530. ssv, ssvOk := ss.segment.(segment.DocValueVisitable)
  531. if ssvOk && ssv != nil {
  532. vFields, err = ssv.VisitableDocValueFields()
  533. if err != nil {
  534. return nil, nil, err
  535. }
  536. }
  537. var errCh chan error
  538. // cFields represents the fields that we'll need from the
  539. // cachedDocs, and might be optionally be provided by the caller,
  540. // if the caller happens to know we're on the same segmentIndex
  541. // from a previous invocation
  542. if cFields == nil {
  543. cFields = subtractStrings(fields, vFields)
  544. if !ss.cachedDocs.hasFields(cFields) {
  545. errCh = make(chan error, 1)
  546. go func() {
  547. err := ss.cachedDocs.prepareFields(cFields, ss)
  548. if err != nil {
  549. errCh <- err
  550. }
  551. close(errCh)
  552. }()
  553. }
  554. }
  555. if ssvOk && ssv != nil && len(vFields) > 0 {
  556. dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs)
  557. if err != nil {
  558. return nil, nil, err
  559. }
  560. }
  561. if errCh != nil {
  562. err = <-errCh
  563. if err != nil {
  564. return nil, nil, err
  565. }
  566. }
  567. if len(cFields) > 0 {
  568. ss.cachedDocs.visitDoc(localDocNum, cFields, visitor)
  569. }
  570. return cFields, dvs, nil
  571. }
  572. func (i *IndexSnapshot) DocValueReader(fields []string) (
  573. index.DocValueReader, error) {
  574. return &DocValueReader{i: i, fields: fields, currSegmentIndex: -1}, nil
  575. }
  576. type DocValueReader struct {
  577. i *IndexSnapshot
  578. fields []string
  579. dvs segment.DocVisitState
  580. currSegmentIndex int
  581. currCachedFields []string
  582. }
  583. func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
  584. visitor index.DocValueVisitor) (err error) {
  585. docNum, err := docInternalToNumber(id)
  586. if err != nil {
  587. return err
  588. }
  589. segmentIndex, localDocNum := dvr.i.segmentIndexAndLocalDocNumFromGlobal(docNum)
  590. if segmentIndex >= len(dvr.i.segment) {
  591. return nil
  592. }
  593. if dvr.currSegmentIndex != segmentIndex {
  594. dvr.currSegmentIndex = segmentIndex
  595. dvr.currCachedFields = nil
  596. }
  597. dvr.currCachedFields, dvr.dvs, err = dvr.i.documentVisitFieldTermsOnSegment(
  598. dvr.currSegmentIndex, localDocNum, dvr.fields, dvr.currCachedFields, visitor, dvr.dvs)
  599. return err
  600. }
  601. func (i *IndexSnapshot) DumpAll() chan interface{} {
  602. rv := make(chan interface{})
  603. go func() {
  604. close(rv)
  605. }()
  606. return rv
  607. }
  608. func (i *IndexSnapshot) DumpDoc(id string) chan interface{} {
  609. rv := make(chan interface{})
  610. go func() {
  611. close(rv)
  612. }()
  613. return rv
  614. }
  615. func (i *IndexSnapshot) DumpFields() chan interface{} {
  616. rv := make(chan interface{})
  617. go func() {
  618. close(rv)
  619. }()
  620. return rv
  621. }
  622. func (i *IndexSnapshot) diskSegmentsPaths() map[string]struct{} {
  623. rv := make(map[string]struct{}, len(i.segment))
  624. for _, segmentSnapshot := range i.segment {
  625. if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
  626. rv[seg.Path()] = struct{}{}
  627. }
  628. }
  629. return rv
  630. }
  631. // reClaimableDocsRatio gives a ratio about the obsoleted or
  632. // reclaimable documents present in a given index snapshot.
  633. func (i *IndexSnapshot) reClaimableDocsRatio() float64 {
  634. var totalCount, liveCount uint64
  635. for _, segmentSnapshot := range i.segment {
  636. if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
  637. totalCount += uint64(segmentSnapshot.FullSize())
  638. liveCount += uint64(segmentSnapshot.Count())
  639. }
  640. }
  641. if totalCount > 0 {
  642. return float64(totalCount-liveCount) / float64(totalCount)
  643. }
  644. return 0
  645. }
  646. // subtractStrings returns set a minus elements of set b.
  647. func subtractStrings(a, b []string) []string {
  648. if len(b) == 0 {
  649. return a
  650. }
  651. rv := make([]string, 0, len(a))
  652. OUTER:
  653. for _, as := range a {
  654. for _, bs := range b {
  655. if as == bs {
  656. continue OUTER
  657. }
  658. }
  659. rv = append(rv, as)
  660. }
  661. return rv
  662. }