summaryrefslogtreecommitdiffstats
path: root/modules/indexer
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2021-02-17 21:32:25 +0000
committerGitHub <noreply@github.com>2021-02-17 22:32:25 +0100
commitae7e6cd474747dce1f65c0b1c6e1d6b09ab0bccb (patch)
tree669743542988c694b242f92d512bb219eb300635 /modules/indexer
parent7ba158183a34d71b3989512c059a01d35c4c4673 (diff)
downloadgitea-ae7e6cd474747dce1f65c0b1c6e1d6b09ab0bccb.tar.gz
gitea-ae7e6cd474747dce1f65c0b1c6e1d6b09ab0bccb.zip
Reduce calls to git cat-file -s (#14682)
* Reduce calls to git cat-file -s There are multiple places where there are repeated calls to git cat-file -s due to the blobs not being created with their size. Through judicious use of git ls-tree -l and slight adjustments to the indexer code we can avoid a lot of these calls. * simplify by always expecting the long format * Also always set the sized field and tell the indexer the update is sized
Diffstat (limited to 'modules/indexer')
-rw-r--r--modules/indexer/code/bleve.go20
-rw-r--r--modules/indexer/code/elastic_search.go20
-rw-r--r--modules/indexer/code/git.go8
3 files changed, 32 insertions, 16 deletions
diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go
index 826efde4c1..1ebc74c43a 100644
--- a/modules/indexer/code/bleve.go
+++ b/modules/indexer/code/bleve.go
@@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode
return nil
}
- stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
- RunInDir(repo.RepoPath())
- if err != nil {
- return err
+ size := update.Size
+
+ if !update.Sized {
+ stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
+ RunInDir(repo.RepoPath())
+ if err != nil {
+ return err
+ }
+ if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
+ return fmt.Errorf("Misformatted git cat-file output: %v", err)
+ }
}
- if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
- return fmt.Errorf("Misformatted git cat-file output: %v", err)
- } else if int64(size) > setting.Indexer.MaxIndexerFileSize {
+
+ if size > setting.Indexer.MaxIndexerFileSize {
return b.addDelete(update.Filename, repo, batch)
}
diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elastic_search.go
index f81dbb34d4..c9d604b694 100644
--- a/modules/indexer/code/elastic_search.go
+++ b/modules/indexer/code/elastic_search.go
@@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
return nil, nil
}
- stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
- RunInDir(repo.RepoPath())
- if err != nil {
- return nil, err
+ size := update.Size
+
+ if !update.Sized {
+ stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
+ RunInDir(repo.RepoPath())
+ if err != nil {
+ return nil, err
+ }
+ if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
+ return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
+ }
}
- if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
- return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
- } else if int64(size) > setting.Indexer.MaxIndexerFileSize {
+
+ if size > setting.Indexer.MaxIndexerFileSize {
return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil
}
diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go
index 37ab5ac3d3..919d785406 100644
--- a/modules/indexer/code/git.go
+++ b/modules/indexer/code/git.go
@@ -17,6 +17,8 @@ import (
type fileUpdate struct {
Filename string
BlobSha string
+ Size int64
+ Sized bool
}
// repoChanges changes (file additions/updates/removals) to a repo
@@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
updates[idxCount] = fileUpdate{
Filename: entry.Name(),
BlobSha: entry.ID.String(),
+ Size: entry.Size(),
+ Sized: true,
}
idxCount++
}
@@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
// genesisChanges get changes to add repo to the indexer for the first time
func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
var changes repoChanges
- stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision).
+ stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision).
RunInDirBytes(repo.RepoPath())
if err != nil {
return nil, err
@@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges,
}
}
- cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--")
+ cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--")
cmd.AddArguments(updatedFilenames...)
lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath())
if err != nil {