aboutsummaryrefslogtreecommitdiffstats
path: root/modules/indexer
diff options
context:
space:
mode:
authorLunny Xiao <xiaolunwen@gmail.com>2024-08-21 01:04:57 +0800
committerGitHub <noreply@github.com>2024-08-20 17:04:57 +0000
commitc03baab678ba5b2e9d974aea147e660417f5d3f7 (patch)
tree2b890d549a150cc1c9f6101218789601f40b960f /modules/indexer
parent8b92eba21f5c5cca277b8101ada0ea7a1fb32ae0 (diff)
downloadgitea-c03baab678ba5b2e9d974aea147e660417f5d3f7.tar.gz
gitea-c03baab678ba5b2e9d974aea147e660417f5d3f7.zip
Refactor the usage of batch catfile (#31754)
When opening a repository, it will call `ensureValidRepository` and also `CatFileBatch`. But sometimes these will not be used until repository closed. So it's a waste of CPU to invoke 3 times git command for every open repository. This PR removed all of these from `OpenRepository` but only kept checking whether the folder exists. When a batch is necessary, the necessary functions will be invoked.
Diffstat (limited to 'modules/indexer')
-rw-r--r--modules/indexer/code/bleve/bleve.go20
-rw-r--r--modules/indexer/code/elasticsearch/elasticsearch.go20
2 files changed, 22 insertions, 18 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go
index 8056b58ec2..542bdfb501 100644
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -16,10 +16,10 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
- "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
@@ -189,21 +189,23 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
if len(changes.Updates) > 0 {
- // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
- if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil {
- log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err)
+ r, err := gitrepo.OpenRepository(ctx, repo)
+ if err != nil {
return err
}
-
- batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath())
- defer cancel()
+ defer r.Close()
+ gitBatch, err := r.NewBatch(ctx)
+ if err != nil {
+ return err
+ }
+ defer gitBatch.Close()
for _, update := range changes.Updates {
- if err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo, batch); err != nil {
+ if err := b.addUpdate(ctx, gitBatch.Writer, gitBatch.Reader, sha, update, repo, batch); err != nil {
return err
}
}
- cancel()
+ gitBatch.Close()
}
for _, filename := range changes.RemovedFilenames {
if err := b.addDelete(filename, repo, batch); err != nil {
diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go
index e4622fd66e..0bda180fac 100644
--- a/modules/indexer/code/elasticsearch/elasticsearch.go
+++ b/modules/indexer/code/elasticsearch/elasticsearch.go
@@ -15,11 +15,11 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
"code.gitea.io/gitea/modules/json"
- "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
@@ -154,17 +154,19 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
reqs := make([]elastic.BulkableRequest, 0)
if len(changes.Updates) > 0 {
- // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
- if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil {
- log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err)
+ r, err := gitrepo.OpenRepository(ctx, repo)
+ if err != nil {
return err
}
-
- batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath())
- defer cancel()
+ defer r.Close()
+ batch, err := r.NewBatch(ctx)
+ if err != nil {
+ return err
+ }
+ defer batch.Close()
for _, update := range changes.Updates {
- updateReqs, err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo)
+ updateReqs, err := b.addUpdate(ctx, batch.Writer, batch.Reader, sha, update, repo)
if err != nil {
return err
}
@@ -172,7 +174,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st
reqs = append(reqs, updateReqs...)
}
}
- cancel()
+ batch.Close()
}
for _, filename := range changes.RemovedFilenames {