You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

reader.go 8.9KB


  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package upsidedown
  15. import (
  16. "bytes"
  17. "sort"
  18. "sync/atomic"
  19. "github.com/blevesearch/bleve/index"
  20. "github.com/blevesearch/bleve/index/store"
  21. )
  22. type UpsideDownCouchTermFieldReader struct {
  23. count uint64
  24. indexReader *IndexReader
  25. iterator store.KVIterator
  26. term []byte
  27. tfrNext *TermFrequencyRow
  28. tfrPrealloc TermFrequencyRow
  29. keyBuf []byte
  30. field uint16
  31. includeTermVectors bool
  32. }
  33. func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
  34. bufNeeded := termFrequencyRowKeySize(term, nil)
  35. if bufNeeded < dictionaryRowKeySize(term) {
  36. bufNeeded = dictionaryRowKeySize(term)
  37. }
  38. buf := make([]byte, bufNeeded)
  39. bufUsed := dictionaryRowKeyTo(buf, field, term)
  40. val, err := indexReader.kvreader.Get(buf[:bufUsed])
  41. if err != nil {
  42. return nil, err
  43. }
  44. if val == nil {
  45. atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
  46. rv := &UpsideDownCouchTermFieldReader{
  47. count: 0,
  48. term: term,
  49. field: field,
  50. includeTermVectors: includeTermVectors,
  51. }
  52. rv.tfrNext = &rv.tfrPrealloc
  53. return rv, nil
  54. }
  55. count, err := dictionaryRowParseV(val)
  56. if err != nil {
  57. return nil, err
  58. }
  59. bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
  60. it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
  61. atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
  62. return &UpsideDownCouchTermFieldReader{
  63. indexReader: indexReader,
  64. iterator: it,
  65. count: count,
  66. term: term,
  67. field: field,
  68. includeTermVectors: includeTermVectors,
  69. }, nil
  70. }
  71. func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
  72. return r.count
  73. }
  74. func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
  75. if r.iterator != nil {
  76. // We treat tfrNext also like an initialization flag, which
  77. // tells us whether we need to invoke the underlying
  78. // iterator.Next(). The first time, don't call iterator.Next().
  79. if r.tfrNext != nil {
  80. r.iterator.Next()
  81. } else {
  82. r.tfrNext = &r.tfrPrealloc
  83. }
  84. key, val, valid := r.iterator.Current()
  85. if valid {
  86. tfr := r.tfrNext
  87. err := tfr.parseKDoc(key, r.term)
  88. if err != nil {
  89. return nil, err
  90. }
  91. err = tfr.parseV(val, r.includeTermVectors)
  92. if err != nil {
  93. return nil, err
  94. }
  95. rv := preAlloced
  96. if rv == nil {
  97. rv = &index.TermFieldDoc{}
  98. }
  99. rv.ID = append(rv.ID, tfr.doc...)
  100. rv.Freq = tfr.freq
  101. rv.Norm = float64(tfr.norm)
  102. if tfr.vectors != nil {
  103. rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
  104. }
  105. return rv, nil
  106. }
  107. }
  108. return nil, nil
  109. }
  110. func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) {
  111. if r.iterator != nil {
  112. if r.tfrNext == nil {
  113. r.tfrNext = &TermFrequencyRow{}
  114. }
  115. tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0)
  116. r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0])
  117. if err != nil {
  118. return nil, err
  119. }
  120. r.iterator.Seek(r.keyBuf)
  121. key, val, valid := r.iterator.Current()
  122. if valid {
  123. err := tfr.parseKDoc(key, r.term)
  124. if err != nil {
  125. return nil, err
  126. }
  127. err = tfr.parseV(val, r.includeTermVectors)
  128. if err != nil {
  129. return nil, err
  130. }
  131. rv = preAlloced
  132. if rv == nil {
  133. rv = &index.TermFieldDoc{}
  134. }
  135. rv.ID = append(rv.ID, tfr.doc...)
  136. rv.Freq = tfr.freq
  137. rv.Norm = float64(tfr.norm)
  138. if tfr.vectors != nil {
  139. rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
  140. }
  141. return rv, nil
  142. }
  143. }
  144. return nil, nil
  145. }
  146. func (r *UpsideDownCouchTermFieldReader) Close() error {
  147. if r.indexReader != nil {
  148. atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1))
  149. }
  150. if r.iterator != nil {
  151. return r.iterator.Close()
  152. }
  153. return nil
  154. }
  155. type UpsideDownCouchDocIDReader struct {
  156. indexReader *IndexReader
  157. iterator store.KVIterator
  158. only []string
  159. onlyPos int
  160. onlyMode bool
  161. }
  162. func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
  163. startBytes := []byte{0x0}
  164. endBytes := []byte{0xff}
  165. bisr := NewBackIndexRow(startBytes, nil, nil)
  166. bier := NewBackIndexRow(endBytes, nil, nil)
  167. it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
  168. return &UpsideDownCouchDocIDReader{
  169. indexReader: indexReader,
  170. iterator: it,
  171. }, nil
  172. }
  173. func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
  174. // ensure ids are sorted
  175. sort.Strings(ids)
  176. startBytes := []byte{0x0}
  177. if len(ids) > 0 {
  178. startBytes = []byte(ids[0])
  179. }
  180. endBytes := []byte{0xff}
  181. if len(ids) > 0 {
  182. endBytes = incrementBytes([]byte(ids[len(ids)-1]))
  183. }
  184. bisr := NewBackIndexRow(startBytes, nil, nil)
  185. bier := NewBackIndexRow(endBytes, nil, nil)
  186. it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
  187. return &UpsideDownCouchDocIDReader{
  188. indexReader: indexReader,
  189. iterator: it,
  190. only: ids,
  191. onlyMode: true,
  192. }, nil
  193. }
  194. func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) {
  195. key, val, valid := r.iterator.Current()
  196. if r.onlyMode {
  197. var rv index.IndexInternalID
  198. for valid && r.onlyPos < len(r.only) {
  199. br, err := NewBackIndexRowKV(key, val)
  200. if err != nil {
  201. return nil, err
  202. }
  203. if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
  204. ok := r.nextOnly()
  205. if !ok {
  206. return nil, nil
  207. }
  208. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  209. key, val, valid = r.iterator.Current()
  210. continue
  211. } else {
  212. rv = append([]byte(nil), br.doc...)
  213. break
  214. }
  215. }
  216. if valid && r.onlyPos < len(r.only) {
  217. ok := r.nextOnly()
  218. if ok {
  219. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  220. }
  221. return rv, nil
  222. }
  223. } else {
  224. if valid {
  225. br, err := NewBackIndexRowKV(key, val)
  226. if err != nil {
  227. return nil, err
  228. }
  229. rv := append([]byte(nil), br.doc...)
  230. r.iterator.Next()
  231. return rv, nil
  232. }
  233. }
  234. return nil, nil
  235. }
  236. func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) {
  237. if r.onlyMode {
  238. r.onlyPos = sort.SearchStrings(r.only, string(docID))
  239. if r.onlyPos >= len(r.only) {
  240. // advanced to key after our last only key
  241. return nil, nil
  242. }
  243. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  244. key, val, valid := r.iterator.Current()
  245. var rv index.IndexInternalID
  246. for valid && r.onlyPos < len(r.only) {
  247. br, err := NewBackIndexRowKV(key, val)
  248. if err != nil {
  249. return nil, err
  250. }
  251. if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
  252. // the only key we seek'd to didn't exist
  253. // now look for the closest key that did exist in only
  254. r.onlyPos = sort.SearchStrings(r.only, string(br.doc))
  255. if r.onlyPos >= len(r.only) {
  256. // advanced to key after our last only key
  257. return nil, nil
  258. }
  259. // now seek to this new only key
  260. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  261. key, val, valid = r.iterator.Current()
  262. continue
  263. } else {
  264. rv = append([]byte(nil), br.doc...)
  265. break
  266. }
  267. }
  268. if valid && r.onlyPos < len(r.only) {
  269. ok := r.nextOnly()
  270. if ok {
  271. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  272. }
  273. return rv, nil
  274. }
  275. } else {
  276. bir := NewBackIndexRow(docID, nil, nil)
  277. r.iterator.Seek(bir.Key())
  278. key, val, valid := r.iterator.Current()
  279. if valid {
  280. br, err := NewBackIndexRowKV(key, val)
  281. if err != nil {
  282. return nil, err
  283. }
  284. rv := append([]byte(nil), br.doc...)
  285. r.iterator.Next()
  286. return rv, nil
  287. }
  288. }
  289. return nil, nil
  290. }
  291. func (r *UpsideDownCouchDocIDReader) Close() error {
  292. return r.iterator.Close()
  293. }
  294. // move the r.only pos forward one, skipping duplicates
  295. // return true if there is more data, or false if we got to the end of the list
  296. func (r *UpsideDownCouchDocIDReader) nextOnly() bool {
  297. // advance 1 position, until we see a different key
  298. // it's already sorted, so this skips duplicates
  299. start := r.onlyPos
  300. r.onlyPos++
  301. for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] {
  302. start = r.onlyPos
  303. r.onlyPos++
  304. }
  305. // inidicate if we got to the end of the list
  306. return r.onlyPos < len(r.only)
  307. }