You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

upsidedown.go 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //go:generate protoc --gofast_out=. upsidedown.proto
  15. package upsidedown
  16. import (
  17. "encoding/binary"
  18. "encoding/json"
  19. "fmt"
  20. "math"
  21. "sync"
  22. "sync/atomic"
  23. "time"
  24. "github.com/blevesearch/bleve/analysis"
  25. "github.com/blevesearch/bleve/document"
  26. "github.com/blevesearch/bleve/index"
  27. "github.com/blevesearch/bleve/index/store"
  28. "github.com/blevesearch/bleve/registry"
  29. "github.com/golang/protobuf/proto"
  30. )
  31. const Name = "upside_down"
  32. // RowBufferSize should ideally this is sized to be the smallest
  33. // size that can contain an index row key and its corresponding
  34. // value. It is not a limit, if need be a larger buffer is
  35. // allocated, but performance will be more optimal if *most*
  36. // rows fit this size.
  37. const RowBufferSize = 4 * 1024
  38. var VersionKey = []byte{'v'}
  39. const Version uint8 = 7
  40. var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version)
  41. type UpsideDownCouch struct {
  42. version uint8
  43. path string
  44. storeName string
  45. storeConfig map[string]interface{}
  46. store store.KVStore
  47. fieldCache *index.FieldCache
  48. analysisQueue *index.AnalysisQueue
  49. stats *indexStat
  50. m sync.RWMutex
  51. // fields protected by m
  52. docCount uint64
  53. writeMutex sync.Mutex
  54. }
  55. type docBackIndexRow struct {
  56. docID string
  57. doc *document.Document // If deletion, doc will be nil.
  58. backIndexRow *BackIndexRow
  59. }
  60. func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
  61. rv := &UpsideDownCouch{
  62. version: Version,
  63. fieldCache: index.NewFieldCache(),
  64. storeName: storeName,
  65. storeConfig: storeConfig,
  66. analysisQueue: analysisQueue,
  67. }
  68. rv.stats = &indexStat{i: rv}
  69. return rv, nil
  70. }
  71. func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) {
  72. // version marker
  73. rowsAll := [][]UpsideDownCouchRow{
  74. {NewVersionRow(udc.version)},
  75. }
  76. err = udc.batchRows(kvwriter, nil, rowsAll, nil)
  77. return
  78. }
  79. func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) {
  80. it := kvreader.PrefixIterator([]byte{'f'})
  81. defer func() {
  82. if cerr := it.Close(); err == nil && cerr != nil {
  83. err = cerr
  84. }
  85. }()
  86. key, val, valid := it.Current()
  87. for valid {
  88. var fieldRow *FieldRow
  89. fieldRow, err = NewFieldRowKV(key, val)
  90. if err != nil {
  91. return
  92. }
  93. udc.fieldCache.AddExisting(fieldRow.name, fieldRow.index)
  94. it.Next()
  95. key, val, valid = it.Current()
  96. }
  97. val, err = kvreader.Get([]byte{'v'})
  98. if err != nil {
  99. return
  100. }
  101. var vr *VersionRow
  102. vr, err = NewVersionRowKV([]byte{'v'}, val)
  103. if err != nil {
  104. return
  105. }
  106. if vr.version != Version {
  107. err = IncompatibleVersion
  108. return
  109. }
  110. return
  111. }
  112. var rowBufferPool sync.Pool
  113. func GetRowBuffer() []byte {
  114. if rb, ok := rowBufferPool.Get().([]byte); ok {
  115. return rb
  116. } else {
  117. return make([]byte, RowBufferSize)
  118. }
  119. }
  120. func PutRowBuffer(buf []byte) {
  121. rowBufferPool.Put(buf)
  122. }
  123. func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) {
  124. dictionaryDeltas := make(map[string]int64)
  125. // count up bytes needed for buffering.
  126. addNum := 0
  127. addKeyBytes := 0
  128. addValBytes := 0
  129. updateNum := 0
  130. updateKeyBytes := 0
  131. updateValBytes := 0
  132. deleteNum := 0
  133. deleteKeyBytes := 0
  134. rowBuf := GetRowBuffer()
  135. for _, addRows := range addRowsAll {
  136. for _, row := range addRows {
  137. tfr, ok := row.(*TermFrequencyRow)
  138. if ok {
  139. if tfr.DictionaryRowKeySize() > len(rowBuf) {
  140. rowBuf = make([]byte, tfr.DictionaryRowKeySize())
  141. }
  142. dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
  143. if err != nil {
  144. return err
  145. }
  146. dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
  147. }
  148. addKeyBytes += row.KeySize()
  149. addValBytes += row.ValueSize()
  150. }
  151. addNum += len(addRows)
  152. }
  153. for _, updateRows := range updateRowsAll {
  154. for _, row := range updateRows {
  155. updateKeyBytes += row.KeySize()
  156. updateValBytes += row.ValueSize()
  157. }
  158. updateNum += len(updateRows)
  159. }
  160. for _, deleteRows := range deleteRowsAll {
  161. for _, row := range deleteRows {
  162. tfr, ok := row.(*TermFrequencyRow)
  163. if ok {
  164. // need to decrement counter
  165. if tfr.DictionaryRowKeySize() > len(rowBuf) {
  166. rowBuf = make([]byte, tfr.DictionaryRowKeySize())
  167. }
  168. dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
  169. if err != nil {
  170. return err
  171. }
  172. dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
  173. }
  174. deleteKeyBytes += row.KeySize()
  175. }
  176. deleteNum += len(deleteRows)
  177. }
  178. PutRowBuffer(rowBuf)
  179. mergeNum := len(dictionaryDeltas)
  180. mergeKeyBytes := 0
  181. mergeValBytes := mergeNum * DictionaryRowMaxValueSize
  182. for dictRowKey := range dictionaryDeltas {
  183. mergeKeyBytes += len(dictRowKey)
  184. }
  185. // prepare batch
  186. totBytes := addKeyBytes + addValBytes +
  187. updateKeyBytes + updateValBytes +
  188. deleteKeyBytes +
  189. 2*(mergeKeyBytes+mergeValBytes)
  190. buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{
  191. TotalBytes: totBytes,
  192. NumSets: addNum + updateNum,
  193. NumDeletes: deleteNum,
  194. NumMerges: mergeNum,
  195. })
  196. if err != nil {
  197. return err
  198. }
  199. defer func() {
  200. _ = wb.Close()
  201. }()
  202. // fill the batch
  203. for _, addRows := range addRowsAll {
  204. for _, row := range addRows {
  205. keySize, err := row.KeyTo(buf)
  206. if err != nil {
  207. return err
  208. }
  209. valSize, err := row.ValueTo(buf[keySize:])
  210. if err != nil {
  211. return err
  212. }
  213. wb.Set(buf[:keySize], buf[keySize:keySize+valSize])
  214. buf = buf[keySize+valSize:]
  215. }
  216. }
  217. for _, updateRows := range updateRowsAll {
  218. for _, row := range updateRows {
  219. keySize, err := row.KeyTo(buf)
  220. if err != nil {
  221. return err
  222. }
  223. valSize, err := row.ValueTo(buf[keySize:])
  224. if err != nil {
  225. return err
  226. }
  227. wb.Set(buf[:keySize], buf[keySize:keySize+valSize])
  228. buf = buf[keySize+valSize:]
  229. }
  230. }
  231. for _, deleteRows := range deleteRowsAll {
  232. for _, row := range deleteRows {
  233. keySize, err := row.KeyTo(buf)
  234. if err != nil {
  235. return err
  236. }
  237. wb.Delete(buf[:keySize])
  238. buf = buf[keySize:]
  239. }
  240. }
  241. for dictRowKey, delta := range dictionaryDeltas {
  242. dictRowKeyLen := copy(buf, dictRowKey)
  243. binary.LittleEndian.PutUint64(buf[dictRowKeyLen:], uint64(delta))
  244. wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+DictionaryRowMaxValueSize])
  245. buf = buf[dictRowKeyLen+DictionaryRowMaxValueSize:]
  246. }
  247. // write out the batch
  248. return writer.ExecuteBatch(wb)
  249. }
  250. func (udc *UpsideDownCouch) Open() (err error) {
  251. // acquire the write mutex for the duration of Open()
  252. udc.writeMutex.Lock()
  253. defer udc.writeMutex.Unlock()
  254. // open the kv store
  255. storeConstructor := registry.KVStoreConstructorByName(udc.storeName)
  256. if storeConstructor == nil {
  257. err = index.ErrorUnknownStorageType
  258. return
  259. }
  260. // now open the store
  261. udc.store, err = storeConstructor(&mergeOperator, udc.storeConfig)
  262. if err != nil {
  263. return
  264. }
  265. // start a reader to look at the index
  266. var kvreader store.KVReader
  267. kvreader, err = udc.store.Reader()
  268. if err != nil {
  269. return
  270. }
  271. var value []byte
  272. value, err = kvreader.Get(VersionKey)
  273. if err != nil {
  274. _ = kvreader.Close()
  275. return
  276. }
  277. if value != nil {
  278. err = udc.loadSchema(kvreader)
  279. if err != nil {
  280. _ = kvreader.Close()
  281. return
  282. }
  283. // set doc count
  284. udc.m.Lock()
  285. udc.docCount, err = udc.countDocs(kvreader)
  286. udc.m.Unlock()
  287. err = kvreader.Close()
  288. } else {
  289. // new index, close the reader and open writer to init
  290. err = kvreader.Close()
  291. if err != nil {
  292. return
  293. }
  294. var kvwriter store.KVWriter
  295. kvwriter, err = udc.store.Writer()
  296. if err != nil {
  297. return
  298. }
  299. defer func() {
  300. if cerr := kvwriter.Close(); err == nil && cerr != nil {
  301. err = cerr
  302. }
  303. }()
  304. // init the index
  305. err = udc.init(kvwriter)
  306. }
  307. return
  308. }
  309. func (udc *UpsideDownCouch) countDocs(kvreader store.KVReader) (count uint64, err error) {
  310. it := kvreader.PrefixIterator([]byte{'b'})
  311. defer func() {
  312. if cerr := it.Close(); err == nil && cerr != nil {
  313. err = cerr
  314. }
  315. }()
  316. _, _, valid := it.Current()
  317. for valid {
  318. count++
  319. it.Next()
  320. _, _, valid = it.Current()
  321. }
  322. return
  323. }
  324. func (udc *UpsideDownCouch) rowCount() (count uint64, err error) {
  325. // start an isolated reader for use during the rowcount
  326. kvreader, err := udc.store.Reader()
  327. if err != nil {
  328. return
  329. }
  330. defer func() {
  331. if cerr := kvreader.Close(); err == nil && cerr != nil {
  332. err = cerr
  333. }
  334. }()
  335. it := kvreader.RangeIterator(nil, nil)
  336. defer func() {
  337. if cerr := it.Close(); err == nil && cerr != nil {
  338. err = cerr
  339. }
  340. }()
  341. _, _, valid := it.Current()
  342. for valid {
  343. count++
  344. it.Next()
  345. _, _, valid = it.Current()
  346. }
  347. return
  348. }
  349. func (udc *UpsideDownCouch) Close() error {
  350. return udc.store.Close()
  351. }
  352. func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
  353. // do analysis before acquiring write lock
  354. analysisStart := time.Now()
  355. resultChan := make(chan *index.AnalysisResult)
  356. aw := index.NewAnalysisWork(udc, doc, resultChan)
  357. // put the work on the queue
  358. udc.analysisQueue.Queue(aw)
  359. // wait for the result
  360. result := <-resultChan
  361. close(resultChan)
  362. atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))
  363. udc.writeMutex.Lock()
  364. defer udc.writeMutex.Unlock()
  365. // open a reader for backindex lookup
  366. var kvreader store.KVReader
  367. kvreader, err = udc.store.Reader()
  368. if err != nil {
  369. return
  370. }
  371. // first we lookup the backindex row for the doc id if it exists
  372. // lookup the back index row
  373. var backIndexRow *BackIndexRow
  374. backIndexRow, err = backIndexRowForDoc(kvreader, index.IndexInternalID(doc.ID))
  375. if err != nil {
  376. _ = kvreader.Close()
  377. atomic.AddUint64(&udc.stats.errors, 1)
  378. return
  379. }
  380. err = kvreader.Close()
  381. if err != nil {
  382. return
  383. }
  384. return udc.UpdateWithAnalysis(doc, result, backIndexRow)
  385. }
  386. func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
  387. result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
  388. // start a writer for this update
  389. indexStart := time.Now()
  390. var kvwriter store.KVWriter
  391. kvwriter, err = udc.store.Writer()
  392. if err != nil {
  393. return
  394. }
  395. defer func() {
  396. if cerr := kvwriter.Close(); err == nil && cerr != nil {
  397. err = cerr
  398. }
  399. }()
  400. // prepare a list of rows
  401. var addRowsAll [][]UpsideDownCouchRow
  402. var updateRowsAll [][]UpsideDownCouchRow
  403. var deleteRowsAll [][]UpsideDownCouchRow
  404. addRows, updateRows, deleteRows := udc.mergeOldAndNew(backIndexRow, result.Rows)
  405. if len(addRows) > 0 {
  406. addRowsAll = append(addRowsAll, addRows)
  407. }
  408. if len(updateRows) > 0 {
  409. updateRowsAll = append(updateRowsAll, updateRows)
  410. }
  411. if len(deleteRows) > 0 {
  412. deleteRowsAll = append(deleteRowsAll, deleteRows)
  413. }
  414. err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
  415. if err == nil && backIndexRow == nil {
  416. udc.m.Lock()
  417. udc.docCount++
  418. udc.m.Unlock()
  419. }
  420. atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
  421. if err == nil {
  422. atomic.AddUint64(&udc.stats.updates, 1)
  423. atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
  424. } else {
  425. atomic.AddUint64(&udc.stats.errors, 1)
  426. }
  427. return
  428. }
  429. func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) {
  430. addRows = make([]UpsideDownCouchRow, 0, len(rows))
  431. if backIndexRow == nil {
  432. addRows = addRows[0:len(rows)]
  433. for i, row := range rows {
  434. addRows[i] = row
  435. }
  436. return addRows, nil, nil
  437. }
  438. updateRows = make([]UpsideDownCouchRow, 0, len(rows))
  439. deleteRows = make([]UpsideDownCouchRow, 0, len(rows))
  440. var existingTermKeys map[string]struct{}
  441. backIndexTermKeys := backIndexRow.AllTermKeys()
  442. if len(backIndexTermKeys) > 0 {
  443. existingTermKeys = make(map[string]struct{}, len(backIndexTermKeys))
  444. for _, key := range backIndexTermKeys {
  445. existingTermKeys[string(key)] = struct{}{}
  446. }
  447. }
  448. var existingStoredKeys map[string]struct{}
  449. backIndexStoredKeys := backIndexRow.AllStoredKeys()
  450. if len(backIndexStoredKeys) > 0 {
  451. existingStoredKeys = make(map[string]struct{}, len(backIndexStoredKeys))
  452. for _, key := range backIndexStoredKeys {
  453. existingStoredKeys[string(key)] = struct{}{}
  454. }
  455. }
  456. keyBuf := GetRowBuffer()
  457. for _, row := range rows {
  458. switch row := row.(type) {
  459. case *TermFrequencyRow:
  460. if existingTermKeys != nil {
  461. if row.KeySize() > len(keyBuf) {
  462. keyBuf = make([]byte, row.KeySize())
  463. }
  464. keySize, _ := row.KeyTo(keyBuf)
  465. if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
  466. updateRows = append(updateRows, row)
  467. delete(existingTermKeys, string(keyBuf[:keySize]))
  468. continue
  469. }
  470. }
  471. addRows = append(addRows, row)
  472. case *StoredRow:
  473. if existingStoredKeys != nil {
  474. if row.KeySize() > len(keyBuf) {
  475. keyBuf = make([]byte, row.KeySize())
  476. }
  477. keySize, _ := row.KeyTo(keyBuf)
  478. if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
  479. updateRows = append(updateRows, row)
  480. delete(existingStoredKeys, string(keyBuf[:keySize]))
  481. continue
  482. }
  483. }
  484. addRows = append(addRows, row)
  485. default:
  486. updateRows = append(updateRows, row)
  487. }
  488. }
  489. PutRowBuffer(keyBuf)
  490. // any of the existing rows that weren't updated need to be deleted
  491. for existingTermKey := range existingTermKeys {
  492. termFreqRow, err := NewTermFrequencyRowK([]byte(existingTermKey))
  493. if err == nil {
  494. deleteRows = append(deleteRows, termFreqRow)
  495. }
  496. }
  497. // any of the existing stored fields that weren't updated need to be deleted
  498. for existingStoredKey := range existingStoredKeys {
  499. storedRow, err := NewStoredRowK([]byte(existingStoredKey))
  500. if err == nil {
  501. deleteRows = append(deleteRows, storedRow)
  502. }
  503. }
  504. return addRows, updateRows, deleteRows
  505. }
  506. func (udc *UpsideDownCouch) storeField(docID []byte, field document.Field, fieldIndex uint16, rows []index.IndexRow, backIndexStoredEntries []*BackIndexStoreEntry) ([]index.IndexRow, []*BackIndexStoreEntry) {
  507. fieldType := encodeFieldType(field)
  508. storedRow := NewStoredRow(docID, fieldIndex, field.ArrayPositions(), fieldType, field.Value())
  509. // record the back index entry
  510. backIndexStoredEntry := BackIndexStoreEntry{Field: proto.Uint32(uint32(fieldIndex)), ArrayPositions: field.ArrayPositions()}
  511. return append(rows, storedRow), append(backIndexStoredEntries, &backIndexStoredEntry)
  512. }
  513. func encodeFieldType(f document.Field) byte {
  514. fieldType := byte('x')
  515. switch f.(type) {
  516. case *document.TextField:
  517. fieldType = 't'
  518. case *document.NumericField:
  519. fieldType = 'n'
  520. case *document.DateTimeField:
  521. fieldType = 'd'
  522. case *document.BooleanField:
  523. fieldType = 'b'
  524. case *document.GeoPointField:
  525. fieldType = 'g'
  526. case *document.CompositeField:
  527. fieldType = 'c'
  528. }
  529. return fieldType
  530. }
  531. func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermsEntries []*BackIndexTermsEntry) ([]index.IndexRow, []*BackIndexTermsEntry) {
  532. fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
  533. termFreqRows := make([]TermFrequencyRow, len(tokenFreqs))
  534. termFreqRowsUsed := 0
  535. terms := make([]string, 0, len(tokenFreqs))
  536. for k, tf := range tokenFreqs {
  537. termFreqRow := &termFreqRows[termFreqRowsUsed]
  538. termFreqRowsUsed++
  539. InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID,
  540. uint64(frequencyFromTokenFreq(tf)), fieldNorm)
  541. if includeTermVectors {
  542. termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
  543. }
  544. // record the back index entry
  545. terms = append(terms, k)
  546. rows = append(rows, termFreqRow)
  547. }
  548. backIndexTermsEntry := BackIndexTermsEntry{Field: proto.Uint32(uint32(fieldIndex)), Terms: terms}
  549. backIndexTermsEntries = append(backIndexTermsEntries, &backIndexTermsEntry)
  550. return rows, backIndexTermsEntries
  551. }
  552. func (udc *UpsideDownCouch) Delete(id string) (err error) {
  553. indexStart := time.Now()
  554. udc.writeMutex.Lock()
  555. defer udc.writeMutex.Unlock()
  556. // open a reader for backindex lookup
  557. var kvreader store.KVReader
  558. kvreader, err = udc.store.Reader()
  559. if err != nil {
  560. return
  561. }
  562. // first we lookup the backindex row for the doc id if it exists
  563. // lookup the back index row
  564. var backIndexRow *BackIndexRow
  565. backIndexRow, err = backIndexRowForDoc(kvreader, index.IndexInternalID(id))
  566. if err != nil {
  567. _ = kvreader.Close()
  568. atomic.AddUint64(&udc.stats.errors, 1)
  569. return
  570. }
  571. err = kvreader.Close()
  572. if err != nil {
  573. return
  574. }
  575. if backIndexRow == nil {
  576. atomic.AddUint64(&udc.stats.deletes, 1)
  577. return
  578. }
  579. // start a writer for this delete
  580. var kvwriter store.KVWriter
  581. kvwriter, err = udc.store.Writer()
  582. if err != nil {
  583. return
  584. }
  585. defer func() {
  586. if cerr := kvwriter.Close(); err == nil && cerr != nil {
  587. err = cerr
  588. }
  589. }()
  590. var deleteRowsAll [][]UpsideDownCouchRow
  591. deleteRows := udc.deleteSingle(id, backIndexRow, nil)
  592. if len(deleteRows) > 0 {
  593. deleteRowsAll = append(deleteRowsAll, deleteRows)
  594. }
  595. err = udc.batchRows(kvwriter, nil, nil, deleteRowsAll)
  596. if err == nil {
  597. udc.m.Lock()
  598. udc.docCount--
  599. udc.m.Unlock()
  600. }
  601. atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
  602. if err == nil {
  603. atomic.AddUint64(&udc.stats.deletes, 1)
  604. } else {
  605. atomic.AddUint64(&udc.stats.errors, 1)
  606. }
  607. return
  608. }
  609. func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow {
  610. idBytes := []byte(id)
  611. for _, backIndexEntry := range backIndexRow.termsEntries {
  612. for i := range backIndexEntry.Terms {
  613. tfr := NewTermFrequencyRow([]byte(backIndexEntry.Terms[i]), uint16(*backIndexEntry.Field), idBytes, 0, 0)
  614. deleteRows = append(deleteRows, tfr)
  615. }
  616. }
  617. for _, se := range backIndexRow.storedEntries {
  618. sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil)
  619. deleteRows = append(deleteRows, sf)
  620. }
  621. // also delete the back entry itself
  622. deleteRows = append(deleteRows, backIndexRow)
  623. return deleteRows
  624. }
  625. func decodeFieldType(typ byte, name string, pos []uint64, value []byte) document.Field {
  626. switch typ {
  627. case 't':
  628. return document.NewTextField(name, pos, value)
  629. case 'n':
  630. return document.NewNumericFieldFromBytes(name, pos, value)
  631. case 'd':
  632. return document.NewDateTimeFieldFromBytes(name, pos, value)
  633. case 'b':
  634. return document.NewBooleanFieldFromBytes(name, pos, value)
  635. case 'g':
  636. return document.NewGeoPointFieldFromBytes(name, pos, value)
  637. }
  638. return nil
  639. }
  640. func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
  641. return tf.Frequency()
  642. }
  643. func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) {
  644. a := make([]TermVector, len(tf.Locations))
  645. rv := make([]*TermVector, len(tf.Locations))
  646. for i, l := range tf.Locations {
  647. var newFieldRow *FieldRow
  648. fieldIndex := field
  649. if l.Field != "" {
  650. // lookup correct field
  651. fieldIndex, newFieldRow = udc.fieldIndexOrNewRow(l.Field)
  652. if newFieldRow != nil {
  653. rows = append(rows, newFieldRow)
  654. }
  655. }
  656. a[i] = TermVector{
  657. field: fieldIndex,
  658. arrayPositions: l.ArrayPositions,
  659. pos: uint64(l.Position),
  660. start: uint64(l.Start),
  661. end: uint64(l.End),
  662. }
  663. rv[i] = &a[i]
  664. }
  665. return rv, rows
  666. }
  667. func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
  668. if len(in) == 0 {
  669. return nil
  670. }
  671. a := make([]index.TermFieldVector, len(in))
  672. rv := make([]*index.TermFieldVector, len(in))
  673. for i, tv := range in {
  674. fieldName := udc.fieldCache.FieldIndexed(tv.field)
  675. a[i] = index.TermFieldVector{
  676. Field: fieldName,
  677. ArrayPositions: tv.arrayPositions,
  678. Pos: tv.pos,
  679. Start: tv.start,
  680. End: tv.end,
  681. }
  682. rv[i] = &a[i]
  683. }
  684. return rv
  685. }
  686. func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
  687. persistedCallback := batch.PersistedCallback()
  688. if persistedCallback != nil {
  689. defer persistedCallback(err)
  690. }
  691. analysisStart := time.Now()
  692. resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
  693. var numUpdates uint64
  694. var numPlainTextBytes uint64
  695. for _, doc := range batch.IndexOps {
  696. if doc != nil {
  697. numUpdates++
  698. numPlainTextBytes += doc.NumPlainTextBytes()
  699. }
  700. }
  701. if numUpdates > 0 {
  702. go func() {
  703. for k := range batch.IndexOps {
  704. doc := batch.IndexOps[k]
  705. if doc != nil {
  706. aw := index.NewAnalysisWork(udc, doc, resultChan)
  707. // put the work on the queue
  708. udc.analysisQueue.Queue(aw)
  709. }
  710. }
  711. }()
  712. }
  713. // retrieve back index rows concurrent with analysis
  714. docBackIndexRowErr := error(nil)
  715. docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps))
  716. udc.writeMutex.Lock()
  717. defer udc.writeMutex.Unlock()
  718. go func() {
  719. defer close(docBackIndexRowCh)
  720. // open a reader for backindex lookup
  721. var kvreader store.KVReader
  722. kvreader, err = udc.store.Reader()
  723. if err != nil {
  724. docBackIndexRowErr = err
  725. return
  726. }
  727. defer func() {
  728. if cerr := kvreader.Close(); err == nil && cerr != nil {
  729. docBackIndexRowErr = cerr
  730. }
  731. }()
  732. for docID, doc := range batch.IndexOps {
  733. backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID))
  734. if err != nil {
  735. docBackIndexRowErr = err
  736. return
  737. }
  738. docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow}
  739. }
  740. }()
  741. // wait for analysis result
  742. newRowsMap := make(map[string][]index.IndexRow)
  743. var itemsDeQueued uint64
  744. for itemsDeQueued < numUpdates {
  745. result := <-resultChan
  746. newRowsMap[result.DocID] = result.Rows
  747. itemsDeQueued++
  748. }
  749. close(resultChan)
  750. atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))
  751. docsAdded := uint64(0)
  752. docsDeleted := uint64(0)
  753. indexStart := time.Now()
  754. // prepare a list of rows
  755. var addRowsAll [][]UpsideDownCouchRow
  756. var updateRowsAll [][]UpsideDownCouchRow
  757. var deleteRowsAll [][]UpsideDownCouchRow
  758. // add the internal ops
  759. var updateRows []UpsideDownCouchRow
  760. var deleteRows []UpsideDownCouchRow
  761. for internalKey, internalValue := range batch.InternalOps {
  762. if internalValue == nil {
  763. // delete
  764. deleteInternalRow := NewInternalRow([]byte(internalKey), nil)
  765. deleteRows = append(deleteRows, deleteInternalRow)
  766. } else {
  767. updateInternalRow := NewInternalRow([]byte(internalKey), internalValue)
  768. updateRows = append(updateRows, updateInternalRow)
  769. }
  770. }
  771. if len(updateRows) > 0 {
  772. updateRowsAll = append(updateRowsAll, updateRows)
  773. }
  774. if len(deleteRows) > 0 {
  775. deleteRowsAll = append(deleteRowsAll, deleteRows)
  776. }
  777. // process back index rows as they arrive
  778. for dbir := range docBackIndexRowCh {
  779. if dbir.doc == nil && dbir.backIndexRow != nil {
  780. // delete
  781. deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil)
  782. if len(deleteRows) > 0 {
  783. deleteRowsAll = append(deleteRowsAll, deleteRows)
  784. }
  785. docsDeleted++
  786. } else if dbir.doc != nil {
  787. addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.backIndexRow, newRowsMap[dbir.docID])
  788. if len(addRows) > 0 {
  789. addRowsAll = append(addRowsAll, addRows)
  790. }
  791. if len(updateRows) > 0 {
  792. updateRowsAll = append(updateRowsAll, updateRows)
  793. }
  794. if len(deleteRows) > 0 {
  795. deleteRowsAll = append(deleteRowsAll, deleteRows)
  796. }
  797. if dbir.backIndexRow == nil {
  798. docsAdded++
  799. }
  800. }
  801. }
  802. if docBackIndexRowErr != nil {
  803. return docBackIndexRowErr
  804. }
  805. // start a writer for this batch
  806. var kvwriter store.KVWriter
  807. kvwriter, err = udc.store.Writer()
  808. if err != nil {
  809. return
  810. }
  811. err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
  812. if err != nil {
  813. _ = kvwriter.Close()
  814. atomic.AddUint64(&udc.stats.errors, 1)
  815. return
  816. }
  817. err = kvwriter.Close()
  818. atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
  819. if err == nil {
  820. udc.m.Lock()
  821. udc.docCount += docsAdded
  822. udc.docCount -= docsDeleted
  823. udc.m.Unlock()
  824. atomic.AddUint64(&udc.stats.updates, numUpdates)
  825. atomic.AddUint64(&udc.stats.deletes, docsDeleted)
  826. atomic.AddUint64(&udc.stats.batches, 1)
  827. atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
  828. } else {
  829. atomic.AddUint64(&udc.stats.errors, 1)
  830. }
  831. return
  832. }
  833. func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) {
  834. internalRow := NewInternalRow(key, val)
  835. udc.writeMutex.Lock()
  836. defer udc.writeMutex.Unlock()
  837. var writer store.KVWriter
  838. writer, err = udc.store.Writer()
  839. if err != nil {
  840. return
  841. }
  842. defer func() {
  843. if cerr := writer.Close(); err == nil && cerr != nil {
  844. err = cerr
  845. }
  846. }()
  847. batch := writer.NewBatch()
  848. batch.Set(internalRow.Key(), internalRow.Value())
  849. return writer.ExecuteBatch(batch)
  850. }
  851. func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) {
  852. internalRow := NewInternalRow(key, nil)
  853. udc.writeMutex.Lock()
  854. defer udc.writeMutex.Unlock()
  855. var writer store.KVWriter
  856. writer, err = udc.store.Writer()
  857. if err != nil {
  858. return
  859. }
  860. defer func() {
  861. if cerr := writer.Close(); err == nil && cerr != nil {
  862. err = cerr
  863. }
  864. }()
  865. batch := writer.NewBatch()
  866. batch.Delete(internalRow.Key())
  867. return writer.ExecuteBatch(batch)
  868. }
  869. func (udc *UpsideDownCouch) Reader() (index.IndexReader, error) {
  870. kvr, err := udc.store.Reader()
  871. if err != nil {
  872. return nil, fmt.Errorf("error opening store reader: %v", err)
  873. }
  874. udc.m.RLock()
  875. defer udc.m.RUnlock()
  876. return &IndexReader{
  877. index: udc,
  878. kvreader: kvr,
  879. docCount: udc.docCount,
  880. }, nil
  881. }
  882. func (udc *UpsideDownCouch) Stats() json.Marshaler {
  883. return udc.stats
  884. }
  885. func (udc *UpsideDownCouch) StatsMap() map[string]interface{} {
  886. return udc.stats.statsMap()
  887. }
  888. func (udc *UpsideDownCouch) Advanced() (store.KVStore, error) {
  889. return udc.store, nil
  890. }
  891. func (udc *UpsideDownCouch) fieldIndexOrNewRow(name string) (uint16, *FieldRow) {
  892. index, existed := udc.fieldCache.FieldNamed(name, true)
  893. if !existed {
  894. return index, NewFieldRow(index, name)
  895. }
  896. return index, nil
  897. }
  898. func init() {
  899. registry.RegisterIndexType(Name, NewUpsideDownCouch)
  900. }
  901. func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) {
  902. // use a temporary row structure to build key
  903. tempRow := BackIndexRow{
  904. doc: docID,
  905. }
  906. keyBuf := GetRowBuffer()
  907. if tempRow.KeySize() > len(keyBuf) {
  908. keyBuf = make([]byte, 2*tempRow.KeySize())
  909. }
  910. defer PutRowBuffer(keyBuf)
  911. keySize, err := tempRow.KeyTo(keyBuf)
  912. if err != nil {
  913. return nil, err
  914. }
  915. value, err := kvreader.Get(keyBuf[:keySize])
  916. if err != nil {
  917. return nil, err
  918. }
  919. if value == nil {
  920. return nil, nil
  921. }
  922. backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value)
  923. if err != nil {
  924. return nil, err
  925. }
  926. return backIndexRow, nil
  927. }