You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

bleve.go 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. // Copyright 2018 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package issues
  5. import (
  6. "fmt"
  7. "os"
  8. "strconv"
  9. "github.com/blevesearch/bleve"
  10. "github.com/blevesearch/bleve/analysis/analyzer/custom"
  11. "github.com/blevesearch/bleve/analysis/token/lowercase"
  12. "github.com/blevesearch/bleve/analysis/token/unicodenorm"
  13. "github.com/blevesearch/bleve/analysis/tokenizer/unicode"
  14. "github.com/blevesearch/bleve/index/upsidedown"
  15. "github.com/blevesearch/bleve/mapping"
  16. "github.com/blevesearch/bleve/search/query"
  17. "github.com/ethantkoenig/rupture"
  18. )
  19. const (
  20. issueIndexerAnalyzer = "issueIndexer"
  21. issueIndexerDocType = "issueIndexerDocType"
  22. issueIndexerLatestVersion = 1
  23. )
  24. // indexerID a bleve-compatible unique identifier for an integer id
  25. func indexerID(id int64) string {
  26. return strconv.FormatInt(id, 36)
  27. }
  28. // idOfIndexerID the integer id associated with an indexer id
  29. func idOfIndexerID(indexerID string) (int64, error) {
  30. id, err := strconv.ParseInt(indexerID, 36, 64)
  31. if err != nil {
  32. return 0, fmt.Errorf("Unexpected indexer ID %s: %v", indexerID, err)
  33. }
  34. return id, nil
  35. }
  36. // numericEqualityQuery a numeric equality query for the given value and field
  37. func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
  38. f := float64(value)
  39. tru := true
  40. q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
  41. q.SetField(field)
  42. return q
  43. }
  44. func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
  45. q := bleve.NewMatchPhraseQuery(matchPhrase)
  46. q.FieldVal = field
  47. q.Analyzer = analyzer
  48. return q
  49. }
  50. const unicodeNormalizeName = "unicodeNormalize"
  51. func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
  52. return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]interface{}{
  53. "type": unicodenorm.Name,
  54. "form": unicodenorm.NFC,
  55. })
  56. }
  57. const maxBatchSize = 16
  58. // openIndexer open the index at the specified path, checking for metadata
  59. // updates and bleve version updates. If index needs to be created (or
  60. // re-created), returns (nil, nil)
  61. func openIndexer(path string, latestVersion int) (bleve.Index, error) {
  62. _, err := os.Stat(path)
  63. if err != nil && os.IsNotExist(err) {
  64. return nil, nil
  65. } else if err != nil {
  66. return nil, err
  67. }
  68. metadata, err := rupture.ReadIndexMetadata(path)
  69. if err != nil {
  70. return nil, err
  71. }
  72. if metadata.Version < latestVersion {
  73. // the indexer is using a previous version, so we should delete it and
  74. // re-populate
  75. return nil, os.RemoveAll(path)
  76. }
  77. index, err := bleve.Open(path)
  78. if err != nil && err == upsidedown.IncompatibleVersion {
  79. // the indexer was built with a previous version of bleve, so we should
  80. // delete it and re-populate
  81. return nil, os.RemoveAll(path)
  82. } else if err != nil {
  83. return nil, err
  84. }
  85. return index, nil
  86. }
  87. // BleveIndexerData an update to the issue indexer
  88. type BleveIndexerData IndexerData
  89. // Type returns the document type, for bleve's mapping.Classifier interface.
  90. func (i *BleveIndexerData) Type() string {
  91. return issueIndexerDocType
  92. }
  93. // createIssueIndexer create an issue indexer if one does not already exist
  94. func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) {
  95. mapping := bleve.NewIndexMapping()
  96. docMapping := bleve.NewDocumentMapping()
  97. numericFieldMapping := bleve.NewNumericFieldMapping()
  98. numericFieldMapping.IncludeInAll = false
  99. docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
  100. textFieldMapping := bleve.NewTextFieldMapping()
  101. textFieldMapping.Store = false
  102. textFieldMapping.IncludeInAll = false
  103. docMapping.AddFieldMappingsAt("Title", textFieldMapping)
  104. docMapping.AddFieldMappingsAt("Content", textFieldMapping)
  105. docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
  106. if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
  107. return nil, err
  108. } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{
  109. "type": custom.Name,
  110. "char_filters": []string{},
  111. "tokenizer": unicode.Name,
  112. "token_filters": []string{unicodeNormalizeName, lowercase.Name},
  113. }); err != nil {
  114. return nil, err
  115. }
  116. mapping.DefaultAnalyzer = issueIndexerAnalyzer
  117. mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
  118. mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
  119. index, err := bleve.New(path, mapping)
  120. if err != nil {
  121. return nil, err
  122. }
  123. if err = rupture.WriteIndexMetadata(path, &rupture.IndexMetadata{
  124. Version: latestVersion,
  125. }); err != nil {
  126. return nil, err
  127. }
  128. return index, nil
  129. }
  130. var (
  131. _ Indexer = &BleveIndexer{}
  132. )
  133. // BleveIndexer implements Indexer interface
  134. type BleveIndexer struct {
  135. indexDir string
  136. indexer bleve.Index
  137. }
  138. // NewBleveIndexer creates a new bleve local indexer
  139. func NewBleveIndexer(indexDir string) *BleveIndexer {
  140. return &BleveIndexer{
  141. indexDir: indexDir,
  142. }
  143. }
  144. // Init will initial the indexer
  145. func (b *BleveIndexer) Init() (bool, error) {
  146. var err error
  147. b.indexer, err = openIndexer(b.indexDir, issueIndexerLatestVersion)
  148. if err != nil {
  149. return false, err
  150. }
  151. if b.indexer != nil {
  152. return true, nil
  153. }
  154. b.indexer, err = createIssueIndexer(b.indexDir, issueIndexerLatestVersion)
  155. return false, err
  156. }
  157. // Index will save the index data
  158. func (b *BleveIndexer) Index(issues []*IndexerData) error {
  159. batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize)
  160. for _, issue := range issues {
  161. if err := batch.Index(indexerID(issue.ID), struct {
  162. RepoID int64
  163. Title string
  164. Content string
  165. Comments []string
  166. }{
  167. RepoID: issue.RepoID,
  168. Title: issue.Title,
  169. Content: issue.Content,
  170. Comments: issue.Comments,
  171. }); err != nil {
  172. return err
  173. }
  174. }
  175. return batch.Flush()
  176. }
  177. // Delete deletes indexes by ids
  178. func (b *BleveIndexer) Delete(ids ...int64) error {
  179. batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize)
  180. for _, id := range ids {
  181. if err := batch.Delete(indexerID(id)); err != nil {
  182. return err
  183. }
  184. }
  185. return batch.Flush()
  186. }
  187. // Search searches for issues by given conditions.
  188. // Returns the matching issue IDs
  189. func (b *BleveIndexer) Search(keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) {
  190. var repoQueriesP []*query.NumericRangeQuery
  191. for _, repoID := range repoIDs {
  192. repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID"))
  193. }
  194. repoQueries := make([]query.Query, len(repoQueriesP))
  195. for i, v := range repoQueriesP {
  196. repoQueries[i] = query.Query(v)
  197. }
  198. indexerQuery := bleve.NewConjunctionQuery(
  199. bleve.NewDisjunctionQuery(repoQueries...),
  200. bleve.NewDisjunctionQuery(
  201. newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
  202. newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
  203. newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
  204. ))
  205. search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
  206. result, err := b.indexer.Search(search)
  207. if err != nil {
  208. return nil, err
  209. }
  210. var ret = SearchResult{
  211. Hits: make([]Match, 0, len(result.Hits)),
  212. }
  213. for _, hit := range result.Hits {
  214. id, err := idOfIndexerID(hit.ID)
  215. if err != nil {
  216. return nil, err
  217. }
  218. ret.Hits = append(ret.Hits, Match{
  219. ID: id,
  220. })
  221. }
  222. return &ret, nil
  223. }