* Fix data race in bleve indexer Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>tags/v1.15.0-rc2
@@ -0,0 +1,59 @@ | |||
// Copyright 2021 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package bleve | |||
import ( | |||
"github.com/blevesearch/bleve/v2" | |||
) | |||
// FlushingBatch is a batch of operations that automatically flushes to the | |||
// underlying index once it reaches a certain size. | |||
type FlushingBatch struct { | |||
maxBatchSize int | |||
batch *bleve.Batch | |||
index bleve.Index | |||
} | |||
// NewFlushingBatch creates a new flushing batch for the specified index. Once | |||
// the number of operations in the batch reaches the specified limit, the batch | |||
// automatically flushes its operations to the index. | |||
func NewFlushingBatch(index bleve.Index, maxBatchSize int) *FlushingBatch { | |||
return &FlushingBatch{ | |||
maxBatchSize: maxBatchSize, | |||
batch: index.NewBatch(), | |||
index: index, | |||
} | |||
} | |||
// Index add a new index to batch | |||
func (b *FlushingBatch) Index(id string, data interface{}) error { | |||
if err := b.batch.Index(id, data); err != nil { | |||
return err | |||
} | |||
return b.flushIfFull() | |||
} | |||
// Delete add a delete index to batch | |||
func (b *FlushingBatch) Delete(id string) error { | |||
b.batch.Delete(id) | |||
return b.flushIfFull() | |||
} | |||
func (b *FlushingBatch) flushIfFull() error { | |||
if b.batch.Size() < b.maxBatchSize { | |||
return nil | |||
} | |||
return b.Flush() | |||
} | |||
// Flush submit the batch and create a new one | |||
func (b *FlushingBatch) Flush() error { | |||
err := b.index.Batch(b.batch) | |||
if err != nil { | |||
return err | |||
} | |||
b.batch = b.index.NewBatch() | |||
return nil | |||
} |
@@ -18,6 +18,7 @@ import ( | |||
"code.gitea.io/gitea/modules/analyze" | |||
"code.gitea.io/gitea/modules/charset" | |||
"code.gitea.io/gitea/modules/git" | |||
gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
@@ -176,7 +177,8 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { | |||
return indexer, created, err | |||
} | |||
func (b *BleveIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | |||
func (b *BleveIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, | |||
update fileUpdate, repo *models.Repository, batch *gitea_bleve.FlushingBatch) error { | |||
// Ignore vendored files in code search | |||
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | |||
return nil | |||
@@ -229,7 +231,7 @@ func (b *BleveIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader * | |||
}) | |||
} | |||
func (b *BleveIndexer) addDelete(filename string, repo *models.Repository, batch rupture.FlushingBatch) error { | |||
func (b *BleveIndexer) addDelete(filename string, repo *models.Repository, batch *gitea_bleve.FlushingBatch) error { | |||
id := filenameIndexerID(repo.ID, filename) | |||
return batch.Delete(id) | |||
} | |||
@@ -267,7 +269,7 @@ func (b *BleveIndexer) Close() { | |||
// Index indexes the data | |||
func (b *BleveIndexer) Index(repo *models.Repository, sha string, changes *repoChanges) error { | |||
batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize) | |||
batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) | |||
if len(changes.Updates) > 0 { | |||
batchWriter, batchReader, cancel := git.CatFileBatch(repo.RepoPath()) | |||
@@ -296,7 +298,7 @@ func (b *BleveIndexer) Delete(repoID int64) error { | |||
if err != nil { | |||
return err | |||
} | |||
batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize) | |||
batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) | |||
for _, hit := range result.Hits { | |||
if err = batch.Delete(hit.ID); err != nil { | |||
return err |
@@ -9,8 +9,10 @@ import ( | |||
"os" | |||
"strconv" | |||
gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/util" | |||
"github.com/blevesearch/bleve/v2" | |||
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom" | |||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase" | |||
@@ -197,7 +199,7 @@ func (b *BleveIndexer) Close() { | |||
// Index will save the index data | |||
func (b *BleveIndexer) Index(issues []*IndexerData) error { | |||
batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize) | |||
batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) | |||
for _, issue := range issues { | |||
if err := batch.Index(indexerID(issue.ID), struct { | |||
RepoID int64 | |||
@@ -218,7 +220,7 @@ func (b *BleveIndexer) Index(issues []*IndexerData) error { | |||
// Delete deletes indexes by ids | |||
func (b *BleveIndexer) Delete(ids ...int64) error { | |||
batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize) | |||
batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) | |||
for _, id := range ids { | |||
if err := batch.Delete(indexerID(id)); err != nil { | |||
return err |