You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

index_impl.go 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bleve
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "os"
  19. "sync"
  20. "sync/atomic"
  21. "time"
  22. "golang.org/x/net/context"
  23. "github.com/blevesearch/bleve/document"
  24. "github.com/blevesearch/bleve/index"
  25. "github.com/blevesearch/bleve/index/store"
  26. "github.com/blevesearch/bleve/index/upsidedown"
  27. "github.com/blevesearch/bleve/mapping"
  28. "github.com/blevesearch/bleve/registry"
  29. "github.com/blevesearch/bleve/search"
  30. "github.com/blevesearch/bleve/search/collector"
  31. "github.com/blevesearch/bleve/search/facet"
  32. "github.com/blevesearch/bleve/search/highlight"
  33. )
  34. type indexImpl struct {
  35. path string
  36. name string
  37. meta *indexMeta
  38. i index.Index
  39. m mapping.IndexMapping
  40. mutex sync.RWMutex
  41. open bool
  42. stats *IndexStat
  43. }
  44. const storePath = "store"
  45. var mappingInternalKey = []byte("_mapping")
  46. func indexStorePath(path string) string {
  47. return path + string(os.PathSeparator) + storePath
  48. }
  49. func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (*indexImpl, error) {
  50. // first validate the mapping
  51. err := mapping.Validate()
  52. if err != nil {
  53. return nil, err
  54. }
  55. if kvconfig == nil {
  56. kvconfig = map[string]interface{}{}
  57. }
  58. if kvstore == "" {
  59. return nil, fmt.Errorf("bleve not configured for file based indexing")
  60. }
  61. rv := indexImpl{
  62. path: path,
  63. name: path,
  64. m: mapping,
  65. meta: newIndexMeta(indexType, kvstore, kvconfig),
  66. }
  67. rv.stats = &IndexStat{i: &rv}
  68. // at this point there is hope that we can be successful, so save index meta
  69. if path != "" {
  70. err = rv.meta.Save(path)
  71. if err != nil {
  72. return nil, err
  73. }
  74. kvconfig["create_if_missing"] = true
  75. kvconfig["error_if_exists"] = true
  76. kvconfig["path"] = indexStorePath(path)
  77. } else {
  78. kvconfig["path"] = ""
  79. }
  80. // open the index
  81. indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType)
  82. if indexTypeConstructor == nil {
  83. return nil, ErrorUnknownIndexType
  84. }
  85. rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue)
  86. if err != nil {
  87. return nil, err
  88. }
  89. err = rv.i.Open()
  90. if err != nil {
  91. if err == index.ErrorUnknownStorageType {
  92. return nil, ErrorUnknownStorageType
  93. }
  94. return nil, err
  95. }
  96. // now persist the mapping
  97. mappingBytes, err := json.Marshal(mapping)
  98. if err != nil {
  99. return nil, err
  100. }
  101. err = rv.i.SetInternal(mappingInternalKey, mappingBytes)
  102. if err != nil {
  103. return nil, err
  104. }
  105. // mark the index as open
  106. rv.mutex.Lock()
  107. defer rv.mutex.Unlock()
  108. rv.open = true
  109. indexStats.Register(&rv)
  110. return &rv, nil
  111. }
  112. func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) {
  113. rv = &indexImpl{
  114. path: path,
  115. name: path,
  116. }
  117. rv.stats = &IndexStat{i: rv}
  118. rv.meta, err = openIndexMeta(path)
  119. if err != nil {
  120. return nil, err
  121. }
  122. // backwards compatibility if index type is missing
  123. if rv.meta.IndexType == "" {
  124. rv.meta.IndexType = upsidedown.Name
  125. }
  126. storeConfig := rv.meta.Config
  127. if storeConfig == nil {
  128. storeConfig = map[string]interface{}{}
  129. }
  130. storeConfig["path"] = indexStorePath(path)
  131. storeConfig["create_if_missing"] = false
  132. storeConfig["error_if_exists"] = false
  133. for rck, rcv := range runtimeConfig {
  134. storeConfig[rck] = rcv
  135. }
  136. // open the index
  137. indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType)
  138. if indexTypeConstructor == nil {
  139. return nil, ErrorUnknownIndexType
  140. }
  141. rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue)
  142. if err != nil {
  143. return nil, err
  144. }
  145. err = rv.i.Open()
  146. if err != nil {
  147. if err == index.ErrorUnknownStorageType {
  148. return nil, ErrorUnknownStorageType
  149. }
  150. return nil, err
  151. }
  152. // now load the mapping
  153. indexReader, err := rv.i.Reader()
  154. if err != nil {
  155. return nil, err
  156. }
  157. defer func() {
  158. if cerr := indexReader.Close(); cerr != nil && err == nil {
  159. err = cerr
  160. }
  161. }()
  162. mappingBytes, err := indexReader.GetInternal(mappingInternalKey)
  163. if err != nil {
  164. return nil, err
  165. }
  166. var im *mapping.IndexMappingImpl
  167. err = json.Unmarshal(mappingBytes, &im)
  168. if err != nil {
  169. return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes))
  170. }
  171. // mark the index as open
  172. rv.mutex.Lock()
  173. defer rv.mutex.Unlock()
  174. rv.open = true
  175. // validate the mapping
  176. err = im.Validate()
  177. if err != nil {
  178. // note even if the mapping is invalid
  179. // we still return an open usable index
  180. return rv, err
  181. }
  182. rv.m = im
  183. indexStats.Register(rv)
  184. return rv, err
  185. }
  186. // Advanced returns implementation internals
  187. // necessary ONLY for advanced usage.
  188. func (i *indexImpl) Advanced() (index.Index, store.KVStore, error) {
  189. s, err := i.i.Advanced()
  190. if err != nil {
  191. return nil, nil, err
  192. }
  193. return i.i, s, nil
  194. }
  195. // Mapping returns the IndexMapping in use by this
  196. // Index.
  197. func (i *indexImpl) Mapping() mapping.IndexMapping {
  198. return i.m
  199. }
  200. // Index the object with the specified identifier.
  201. // The IndexMapping for this index will determine
  202. // how the object is indexed.
  203. func (i *indexImpl) Index(id string, data interface{}) (err error) {
  204. if id == "" {
  205. return ErrorEmptyID
  206. }
  207. i.mutex.RLock()
  208. defer i.mutex.RUnlock()
  209. if !i.open {
  210. return ErrorIndexClosed
  211. }
  212. doc := document.NewDocument(id)
  213. err = i.m.MapDocument(doc, data)
  214. if err != nil {
  215. return
  216. }
  217. err = i.i.Update(doc)
  218. return
  219. }
  220. // IndexAdvanced takes a document.Document object
  221. // skips the mapping and indexes it.
  222. func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) {
  223. if doc.ID == "" {
  224. return ErrorEmptyID
  225. }
  226. i.mutex.RLock()
  227. defer i.mutex.RUnlock()
  228. if !i.open {
  229. return ErrorIndexClosed
  230. }
  231. err = i.i.Update(doc)
  232. return
  233. }
  234. // Delete entries for the specified identifier from
  235. // the index.
  236. func (i *indexImpl) Delete(id string) (err error) {
  237. if id == "" {
  238. return ErrorEmptyID
  239. }
  240. i.mutex.RLock()
  241. defer i.mutex.RUnlock()
  242. if !i.open {
  243. return ErrorIndexClosed
  244. }
  245. err = i.i.Delete(id)
  246. return
  247. }
  248. // Batch executes multiple Index and Delete
  249. // operations at the same time. There are often
  250. // significant performance benefits when performing
  251. // operations in a batch.
  252. func (i *indexImpl) Batch(b *Batch) error {
  253. i.mutex.RLock()
  254. defer i.mutex.RUnlock()
  255. if !i.open {
  256. return ErrorIndexClosed
  257. }
  258. return i.i.Batch(b.internal)
  259. }
  260. // Document is used to find the values of all the
  261. // stored fields for a document in the index. These
  262. // stored fields are put back into a Document object
  263. // and returned.
  264. func (i *indexImpl) Document(id string) (doc *document.Document, err error) {
  265. i.mutex.RLock()
  266. defer i.mutex.RUnlock()
  267. if !i.open {
  268. return nil, ErrorIndexClosed
  269. }
  270. indexReader, err := i.i.Reader()
  271. if err != nil {
  272. return nil, err
  273. }
  274. defer func() {
  275. if cerr := indexReader.Close(); err == nil && cerr != nil {
  276. err = cerr
  277. }
  278. }()
  279. doc, err = indexReader.Document(id)
  280. if err != nil {
  281. return nil, err
  282. }
  283. return doc, nil
  284. }
  285. // DocCount returns the number of documents in the
  286. // index.
  287. func (i *indexImpl) DocCount() (count uint64, err error) {
  288. i.mutex.RLock()
  289. defer i.mutex.RUnlock()
  290. if !i.open {
  291. return 0, ErrorIndexClosed
  292. }
  293. // open a reader for this search
  294. indexReader, err := i.i.Reader()
  295. if err != nil {
  296. return 0, fmt.Errorf("error opening index reader %v", err)
  297. }
  298. defer func() {
  299. if cerr := indexReader.Close(); err == nil && cerr != nil {
  300. err = cerr
  301. }
  302. }()
  303. count, err = indexReader.DocCount()
  304. return
  305. }
  306. // Search executes a search request operation.
  307. // Returns a SearchResult object or an error.
  308. func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
  309. return i.SearchInContext(context.Background(), req)
  310. }
  311. // SearchInContext executes a search request operation within the provided
  312. // Context. Returns a SearchResult object or an error.
  313. func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
  314. i.mutex.RLock()
  315. defer i.mutex.RUnlock()
  316. searchStart := time.Now()
  317. if !i.open {
  318. return nil, ErrorIndexClosed
  319. }
  320. collector := collector.NewTopNCollector(req.Size, req.From, req.Sort)
  321. // open a reader for this search
  322. indexReader, err := i.i.Reader()
  323. if err != nil {
  324. return nil, fmt.Errorf("error opening index reader %v", err)
  325. }
  326. defer func() {
  327. if cerr := indexReader.Close(); err == nil && cerr != nil {
  328. err = cerr
  329. }
  330. }()
  331. searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
  332. Explain: req.Explain,
  333. IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
  334. })
  335. if err != nil {
  336. return nil, err
  337. }
  338. defer func() {
  339. if serr := searcher.Close(); err == nil && serr != nil {
  340. err = serr
  341. }
  342. }()
  343. if req.Facets != nil {
  344. facetsBuilder := search.NewFacetsBuilder(indexReader)
  345. for facetName, facetRequest := range req.Facets {
  346. if facetRequest.NumericRanges != nil {
  347. // build numeric range facet
  348. facetBuilder := facet.NewNumericFacetBuilder(facetRequest.Field, facetRequest.Size)
  349. for _, nr := range facetRequest.NumericRanges {
  350. facetBuilder.AddRange(nr.Name, nr.Min, nr.Max)
  351. }
  352. facetsBuilder.Add(facetName, facetBuilder)
  353. } else if facetRequest.DateTimeRanges != nil {
  354. // build date range facet
  355. facetBuilder := facet.NewDateTimeFacetBuilder(facetRequest.Field, facetRequest.Size)
  356. dateTimeParser := i.m.DateTimeParserNamed("")
  357. for _, dr := range facetRequest.DateTimeRanges {
  358. start, end := dr.ParseDates(dateTimeParser)
  359. facetBuilder.AddRange(dr.Name, start, end)
  360. }
  361. facetsBuilder.Add(facetName, facetBuilder)
  362. } else {
  363. // build terms facet
  364. facetBuilder := facet.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size)
  365. facetsBuilder.Add(facetName, facetBuilder)
  366. }
  367. }
  368. collector.SetFacetsBuilder(facetsBuilder)
  369. }
  370. err = collector.Collect(ctx, searcher, indexReader)
  371. if err != nil {
  372. return nil, err
  373. }
  374. hits := collector.Results()
  375. var highlighter highlight.Highlighter
  376. if req.Highlight != nil {
  377. // get the right highlighter
  378. highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
  379. if err != nil {
  380. return nil, err
  381. }
  382. if req.Highlight.Style != nil {
  383. highlighter, err = Config.Cache.HighlighterNamed(*req.Highlight.Style)
  384. if err != nil {
  385. return nil, err
  386. }
  387. }
  388. if highlighter == nil {
  389. return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
  390. }
  391. }
  392. for _, hit := range hits {
  393. if len(req.Fields) > 0 || highlighter != nil {
  394. doc, err := indexReader.Document(hit.ID)
  395. if err == nil && doc != nil {
  396. if len(req.Fields) > 0 {
  397. for _, f := range req.Fields {
  398. for _, docF := range doc.Fields {
  399. if f == "*" || docF.Name() == f {
  400. var value interface{}
  401. switch docF := docF.(type) {
  402. case *document.TextField:
  403. value = string(docF.Value())
  404. case *document.NumericField:
  405. num, err := docF.Number()
  406. if err == nil {
  407. value = num
  408. }
  409. case *document.DateTimeField:
  410. datetime, err := docF.DateTime()
  411. if err == nil {
  412. value = datetime.Format(time.RFC3339)
  413. }
  414. case *document.BooleanField:
  415. boolean, err := docF.Boolean()
  416. if err == nil {
  417. value = boolean
  418. }
  419. case *document.GeoPointField:
  420. lon, err := docF.Lon()
  421. if err == nil {
  422. lat, err := docF.Lat()
  423. if err == nil {
  424. value = []float64{lon, lat}
  425. }
  426. }
  427. }
  428. if value != nil {
  429. hit.AddFieldValue(docF.Name(), value)
  430. }
  431. }
  432. }
  433. }
  434. }
  435. if highlighter != nil {
  436. highlightFields := req.Highlight.Fields
  437. if highlightFields == nil {
  438. // add all fields with matches
  439. highlightFields = make([]string, 0, len(hit.Locations))
  440. for k := range hit.Locations {
  441. highlightFields = append(highlightFields, k)
  442. }
  443. }
  444. for _, hf := range highlightFields {
  445. highlighter.BestFragmentsInField(hit, doc, hf, 1)
  446. }
  447. }
  448. } else if doc == nil {
  449. // unexpected case, a doc ID that was found as a search hit
  450. // was unable to be found during document lookup
  451. return nil, ErrorIndexReadInconsistency
  452. }
  453. }
  454. if i.name != "" {
  455. hit.Index = i.name
  456. }
  457. }
  458. atomic.AddUint64(&i.stats.searches, 1)
  459. searchDuration := time.Since(searchStart)
  460. atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
  461. if Config.SlowSearchLogThreshold > 0 &&
  462. searchDuration > Config.SlowSearchLogThreshold {
  463. logger.Printf("slow search took %s - %v", searchDuration, req)
  464. }
  465. return &SearchResult{
  466. Status: &SearchStatus{
  467. Total: 1,
  468. Failed: 0,
  469. Successful: 1,
  470. Errors: make(map[string]error),
  471. },
  472. Request: req,
  473. Hits: hits,
  474. Total: collector.Total(),
  475. MaxScore: collector.MaxScore(),
  476. Took: searchDuration,
  477. Facets: collector.FacetResults(),
  478. }, nil
  479. }
  480. // Fields returns the name of all the fields this
  481. // Index has operated on.
  482. func (i *indexImpl) Fields() (fields []string, err error) {
  483. i.mutex.RLock()
  484. defer i.mutex.RUnlock()
  485. if !i.open {
  486. return nil, ErrorIndexClosed
  487. }
  488. indexReader, err := i.i.Reader()
  489. if err != nil {
  490. return nil, err
  491. }
  492. defer func() {
  493. if cerr := indexReader.Close(); err == nil && cerr != nil {
  494. err = cerr
  495. }
  496. }()
  497. fields, err = indexReader.Fields()
  498. if err != nil {
  499. return nil, err
  500. }
  501. return fields, nil
  502. }
  503. func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) {
  504. i.mutex.RLock()
  505. if !i.open {
  506. i.mutex.RUnlock()
  507. return nil, ErrorIndexClosed
  508. }
  509. indexReader, err := i.i.Reader()
  510. if err != nil {
  511. i.mutex.RUnlock()
  512. return nil, err
  513. }
  514. fieldDict, err := indexReader.FieldDict(field)
  515. if err != nil {
  516. i.mutex.RUnlock()
  517. return nil, err
  518. }
  519. return &indexImplFieldDict{
  520. index: i,
  521. indexReader: indexReader,
  522. fieldDict: fieldDict,
  523. }, nil
  524. }
  525. func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
  526. i.mutex.RLock()
  527. if !i.open {
  528. i.mutex.RUnlock()
  529. return nil, ErrorIndexClosed
  530. }
  531. indexReader, err := i.i.Reader()
  532. if err != nil {
  533. i.mutex.RUnlock()
  534. return nil, err
  535. }
  536. fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm)
  537. if err != nil {
  538. i.mutex.RUnlock()
  539. return nil, err
  540. }
  541. return &indexImplFieldDict{
  542. index: i,
  543. indexReader: indexReader,
  544. fieldDict: fieldDict,
  545. }, nil
  546. }
  547. func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
  548. i.mutex.RLock()
  549. if !i.open {
  550. i.mutex.RUnlock()
  551. return nil, ErrorIndexClosed
  552. }
  553. indexReader, err := i.i.Reader()
  554. if err != nil {
  555. i.mutex.RUnlock()
  556. return nil, err
  557. }
  558. fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix)
  559. if err != nil {
  560. i.mutex.RUnlock()
  561. return nil, err
  562. }
  563. return &indexImplFieldDict{
  564. index: i,
  565. indexReader: indexReader,
  566. fieldDict: fieldDict,
  567. }, nil
  568. }
  569. func (i *indexImpl) Close() error {
  570. i.mutex.Lock()
  571. defer i.mutex.Unlock()
  572. indexStats.UnRegister(i)
  573. i.open = false
  574. return i.i.Close()
  575. }
  576. func (i *indexImpl) Stats() *IndexStat {
  577. return i.stats
  578. }
  579. func (i *indexImpl) StatsMap() map[string]interface{} {
  580. return i.stats.statsMap()
  581. }
  582. func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) {
  583. i.mutex.RLock()
  584. defer i.mutex.RUnlock()
  585. if !i.open {
  586. return nil, ErrorIndexClosed
  587. }
  588. reader, err := i.i.Reader()
  589. if err != nil {
  590. return nil, err
  591. }
  592. defer func() {
  593. if cerr := reader.Close(); err == nil && cerr != nil {
  594. err = cerr
  595. }
  596. }()
  597. val, err = reader.GetInternal(key)
  598. if err != nil {
  599. return nil, err
  600. }
  601. return val, nil
  602. }
  603. func (i *indexImpl) SetInternal(key, val []byte) error {
  604. i.mutex.RLock()
  605. defer i.mutex.RUnlock()
  606. if !i.open {
  607. return ErrorIndexClosed
  608. }
  609. return i.i.SetInternal(key, val)
  610. }
  611. func (i *indexImpl) DeleteInternal(key []byte) error {
  612. i.mutex.RLock()
  613. defer i.mutex.RUnlock()
  614. if !i.open {
  615. return ErrorIndexClosed
  616. }
  617. return i.i.DeleteInternal(key)
  618. }
  619. // NewBatch creates a new empty batch.
  620. func (i *indexImpl) NewBatch() *Batch {
  621. return &Batch{
  622. index: i,
  623. internal: index.NewBatch(),
  624. }
  625. }
  626. func (i *indexImpl) Name() string {
  627. return i.name
  628. }
  629. func (i *indexImpl) SetName(name string) {
  630. indexStats.UnRegister(i)
  631. i.name = name
  632. indexStats.Register(i)
  633. }
  634. type indexImplFieldDict struct {
  635. index *indexImpl
  636. indexReader index.IndexReader
  637. fieldDict index.FieldDict
  638. }
  639. func (f *indexImplFieldDict) Next() (*index.DictEntry, error) {
  640. return f.fieldDict.Next()
  641. }
  642. func (f *indexImplFieldDict) Close() error {
  643. defer f.index.mutex.RUnlock()
  644. err := f.fieldDict.Close()
  645. if err != nil {
  646. return err
  647. }
  648. return f.indexReader.Close()
  649. }