diff options
author | zeripath <art27@cantab.net> | 2021-03-04 02:57:01 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-03 21:57:01 -0500 |
commit | 523efa433b61e00e7a14bd31cac315e43842d729 (patch) | |
tree | 684139f1eb909e169031fb11eb292fae6307bfce /modules/indexer | |
parent | 0044e804912099a6478b747230cdc37b2c8a3047 (diff) | |
download | gitea-523efa433b61e00e7a14bd31cac315e43842d729.tar.gz gitea-523efa433b61e00e7a14bd31cac315e43842d729.zip |
Move Bleve and Elastic code indexers to use a common cat-file --batch (#14781)
* Extract out the common cat-file batch calls
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Move bleve and elastic indexers to use a common cat-file --batch when indexing
Signed-off-by: Andrew Thornton <art27@cantab.net>
* move catfilebatch to batch_reader and rename to batch_reader.go
Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: 6543 <6543@obermui.de>
Co-authored-by: Lauris BH <lauris@nix.lv>
Diffstat (limited to 'modules/indexer')
-rw-r--r-- | modules/indexer/code/bleve.go | 30 | ||||
-rw-r--r-- | modules/indexer/code/elastic_search.go | 38 |
2 files changed, 52 insertions, 16 deletions
diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 1ebc74c43a..573ea8b88c 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -5,7 +5,10 @@ package code import ( + "bufio" "fmt" + "io" + "io/ioutil" "os" "strconv" "strings" @@ -173,7 +176,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { return indexer, created, err } -func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { +func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { return nil @@ -196,8 +199,16 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode return b.addDelete(update.Filename, repo, batch) } - fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha). - RunInDirBytes(repo.RepoPath()) + if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { + return err + } + + _, _, size, err := git.ReadBatchLine(batchReader) + if err != nil { + return err + } + + fileContents, err := ioutil.ReadAll(io.LimitReader(batchReader, size)) if err != nil { return err } else if !base.IsTextFile(fileContents) { @@ -254,10 +265,17 @@ func (b *BleveIndexer) Close() { // Index indexes the data func (b *BleveIndexer) Index(repo *models.Repository, sha string, changes *repoChanges) error { batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize) - for _, update := range changes.Updates { - if err := b.addUpdate(sha, update, repo, batch); err != nil { - return err + if len(changes.Updates) > 0 { + + batchWriter, batchReader, cancel := git.CatFileBatch(repo.RepoPath()) + defer cancel() + + for _, update := range changes.Updates { + if err := b.addUpdate(batchWriter, batchReader, sha, update, repo, batch); err != nil { + return err + } } + cancel() } for _, filename := range changes.RemovedFilenames { if err := b.addDelete(filename, repo, batch); err != nil { diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elastic_search.go index 130cd1430a..5327eb1e51 100644 --- a/modules/indexer/code/elastic_search.go +++ b/modules/indexer/code/elastic_search.go @@ -5,8 +5,11 @@ package code import ( + "bufio" "context" "fmt" + "io" + "io/ioutil" "strconv" "strings" "time" @@ -172,7 +175,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) { return exists, nil } -func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { +func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { return nil, nil @@ -195,8 +198,16 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha). - RunInDirBytes(repo.RepoPath()) + if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { + return nil, err + } + + _, _, size, err := git.ReadBatchLine(batchReader) + if err != nil { + return nil, err + } + + fileContents, err := ioutil.ReadAll(io.LimitReader(batchReader, size)) if err != nil { return nil, err } else if !base.IsTextFile(fileContents) { @@ -230,14 +241,21 @@ func (b *ElasticSearchIndexer) addDelete(filename string, repo *models.Repositor // Index will save the index data func (b *ElasticSearchIndexer) Index(repo *models.Repository, sha string, changes *repoChanges) error { reqs := make([]elastic.BulkableRequest, 0) - for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(sha, update, repo) - if err != nil { - return err - } - if len(updateReqs) > 0 { - reqs = append(reqs, updateReqs...) + if len(changes.Updates) > 0 { + + batchWriter, batchReader, cancel := git.CatFileBatch(repo.RepoPath()) + defer cancel() + + for _, update := range changes.Updates { + updateReqs, err := b.addUpdate(batchWriter, batchReader, sha, update, repo) + if err != nil { + return err + } + if len(updateReqs) > 0 { + reqs = append(reqs, updateReqs...) + } } + cancel() } for _, filename := range changes.RemovedFilenames { |