You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

index.go 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package index
  15. import (
  16. "bytes"
  17. "encoding/json"
  18. "fmt"
  19. "github.com/blevesearch/bleve/document"
  20. "github.com/blevesearch/bleve/index/store"
  21. )
  22. var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
  23. type Index interface {
  24. Open() error
  25. Close() error
  26. Update(doc *document.Document) error
  27. Delete(id string) error
  28. Batch(batch *Batch) error
  29. SetInternal(key, val []byte) error
  30. DeleteInternal(key []byte) error
  31. // Reader returns a low-level accessor on the index data. Close it to
  32. // release associated resources.
  33. Reader() (IndexReader, error)
  34. Stats() json.Marshaler
  35. StatsMap() map[string]interface{}
  36. Analyze(d *document.Document) *AnalysisResult
  37. Advanced() (store.KVStore, error)
  38. }
  39. type DocumentFieldTermVisitor func(field string, term []byte)
  40. type IndexReader interface {
  41. TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)
  42. // DocIDReader returns an iterator over all doc ids
  43. // The caller must close returned instance to release associated resources.
  44. DocIDReaderAll() (DocIDReader, error)
  45. DocIDReaderOnly(ids []string) (DocIDReader, error)
  46. FieldDict(field string) (FieldDict, error)
  47. // FieldDictRange is currently defined to include the start and end terms
  48. FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error)
  49. FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
  50. Document(id string) (*document.Document, error)
  51. DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
  52. Fields() ([]string, error)
  53. GetInternal(key []byte) ([]byte, error)
  54. DocCount() (uint64, error)
  55. ExternalID(id IndexInternalID) (string, error)
  56. InternalID(id string) (IndexInternalID, error)
  57. DumpAll() chan interface{}
  58. DumpDoc(id string) chan interface{}
  59. DumpFields() chan interface{}
  60. Close() error
  61. }
  62. // FieldTerms contains the terms used by a document, keyed by field
  63. type FieldTerms map[string][]string
  64. // FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields
  65. func (f FieldTerms) FieldsNotYetCached(fields []string) []string {
  66. rv := make([]string, 0, len(fields))
  67. for _, field := range fields {
  68. if _, ok := f[field]; !ok {
  69. rv = append(rv, field)
  70. }
  71. }
  72. return rv
  73. }
  74. // Merge will combine two FieldTerms
  75. // it assumes that the terms lists are complete (thus do not need to be merged)
  76. // field terms from the other list always replace the ones in the receiver
  77. func (f FieldTerms) Merge(other FieldTerms) {
  78. for field, terms := range other {
  79. f[field] = terms
  80. }
  81. }
  82. type TermFieldVector struct {
  83. Field string
  84. ArrayPositions []uint64
  85. Pos uint64
  86. Start uint64
  87. End uint64
  88. }
  89. // IndexInternalID is an opaque document identifier interal to the index impl
  90. type IndexInternalID []byte
  91. func (id IndexInternalID) Equals(other IndexInternalID) bool {
  92. return id.Compare(other) == 0
  93. }
  94. func (id IndexInternalID) Compare(other IndexInternalID) int {
  95. return bytes.Compare(id, other)
  96. }
  97. type TermFieldDoc struct {
  98. Term string
  99. ID IndexInternalID
  100. Freq uint64
  101. Norm float64
  102. Vectors []*TermFieldVector
  103. }
  104. // Reset allows an already allocated TermFieldDoc to be reused
  105. func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
  106. // remember the []byte used for the ID
  107. id := tfd.ID
  108. // idiom to copy over from empty TermFieldDoc (0 allocations)
  109. *tfd = TermFieldDoc{}
  110. // reuse the []byte already allocated (and reset len to 0)
  111. tfd.ID = id[:0]
  112. return tfd
  113. }
  114. // TermFieldReader is the interface exposing the enumeration of documents
  115. // containing a given term in a given field. Documents are returned in byte
  116. // lexicographic order over their identifiers.
  117. type TermFieldReader interface {
  118. // Next returns the next document containing the term in this field, or nil
  119. // when it reaches the end of the enumeration. The preAlloced TermFieldDoc
  120. // is optional, and when non-nil, will be used instead of allocating memory.
  121. Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
  122. // Advance resets the enumeration at specified document or its immediate
  123. // follower.
  124. Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
  125. // Count returns the number of documents contains the term in this field.
  126. Count() uint64
  127. Close() error
  128. }
  129. type DictEntry struct {
  130. Term string
  131. Count uint64
  132. }
  133. type FieldDict interface {
  134. Next() (*DictEntry, error)
  135. Close() error
  136. }
  137. // DocIDReader is the interface exposing enumeration of documents identifiers.
  138. // Close the reader to release associated resources.
  139. type DocIDReader interface {
  140. // Next returns the next document internal identifier in the natural
  141. // index order, nil when the end of the sequence is reached.
  142. Next() (IndexInternalID, error)
  143. // Advance resets the iteration to the first internal identifier greater than
  144. // or equal to ID. If ID is smaller than the start of the range, the iteration
  145. // will start there instead. If ID is greater than or equal to the end of
  146. // the range, Next() call will return io.EOF.
  147. Advance(ID IndexInternalID) (IndexInternalID, error)
  148. Close() error
  149. }
  150. type Batch struct {
  151. IndexOps map[string]*document.Document
  152. InternalOps map[string][]byte
  153. }
  154. func NewBatch() *Batch {
  155. return &Batch{
  156. IndexOps: make(map[string]*document.Document),
  157. InternalOps: make(map[string][]byte),
  158. }
  159. }
  160. func (b *Batch) Update(doc *document.Document) {
  161. b.IndexOps[doc.ID] = doc
  162. }
  163. func (b *Batch) Delete(id string) {
  164. b.IndexOps[id] = nil
  165. }
  166. func (b *Batch) SetInternal(key, val []byte) {
  167. b.InternalOps[string(key)] = val
  168. }
  169. func (b *Batch) DeleteInternal(key []byte) {
  170. b.InternalOps[string(key)] = nil
  171. }
  172. func (b *Batch) String() string {
  173. rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
  174. for k, v := range b.IndexOps {
  175. if v != nil {
  176. rv += fmt.Sprintf("\tINDEX - '%s'\n", k)
  177. } else {
  178. rv += fmt.Sprintf("\tDELETE - '%s'\n", k)
  179. }
  180. }
  181. for k, v := range b.InternalOps {
  182. if v != nil {
  183. rv += fmt.Sprintf("\tSET INTERNAL - '%s'\n", k)
  184. } else {
  185. rv += fmt.Sprintf("\tDELETE INTERNAL - '%s'\n", k)
  186. }
  187. }
  188. return rv
  189. }
  190. func (b *Batch) Reset() {
  191. b.IndexOps = make(map[string]*document.Document)
  192. b.InternalOps = make(map[string][]byte)
  193. }