diff options
author | Lunny Xiao <xiaolunwen@gmail.com> | 2025-05-22 13:54:42 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-22 13:54:42 -0700 |
commit | b595f81b7908fa317879fc1223f6fc41e997ae6d (patch) | |
tree | 934ba84d07cd7c109cadd8f224f46dd6a3046db8 | |
parent | 06ccda06c49ae86187c1168a4610d814ea33362b (diff) | |
download | gitea-b595f81b7908fa317879fc1223f6fc41e997ae6d.tar.gz gitea-b595f81b7908fa317879fc1223f6fc41e997ae6d.zip |
Performance optimization for tags synchronization (#34355)
The tags synchronization is very slow for a non-mirror repository with
many tags especially forking. This PR make all repositories' tags
synchronization use the same function and remove the low performance
synchronization function. The commit count of tag now will not be stored
into database when syncing. Since the commits count will always be read
from cache or git data, the `NumCommits` in the release table will be
updated for the first read from git data.
-rw-r--r-- | cmd/hook.go | 2 | ||||
-rw-r--r-- | models/repo/release.go | 61 | ||||
-rw-r--r-- | modules/repository/repo.go | 133 | ||||
-rw-r--r-- | services/context/repo.go | 9 | ||||
-rw-r--r-- | services/repository/migrate.go | 4 | ||||
-rw-r--r-- | services/repository/push.go | 10 | ||||
-rw-r--r-- | templates/repo/release/list.tmpl | 2 |
7 files changed, 31 insertions, 190 deletions
diff --git a/cmd/hook.go b/cmd/hook.go index 41e3c3ce34..6f0aa5a203 100644 --- a/cmd/hook.go +++ b/cmd/hook.go @@ -24,7 +24,7 @@ import ( ) const ( - hookBatchSize = 30 + hookBatchSize = 500 ) var ( diff --git a/models/repo/release.go b/models/repo/release.go index 663d310bc0..06cfa37342 100644 --- a/models/repo/release.go +++ b/models/repo/release.go @@ -161,6 +161,11 @@ func UpdateRelease(ctx context.Context, rel *Release) error { return err } +func UpdateReleaseNumCommits(ctx context.Context, rel *Release) error { + _, err := db.GetEngine(ctx).ID(rel.ID).Cols("num_commits").Update(rel) + return err +} + // AddReleaseAttachments adds a release attachments func AddReleaseAttachments(ctx context.Context, releaseID int64, attachmentUUIDs []string) (err error) { // Check attachments @@ -418,8 +423,8 @@ func UpdateReleasesMigrationsByType(ctx context.Context, gitServiceType structs. return err } -// PushUpdateDeleteTagsContext updates a number of delete tags with context -func PushUpdateDeleteTagsContext(ctx context.Context, repo *Repository, tags []string) error { +// PushUpdateDeleteTags updates a number of delete tags with context +func PushUpdateDeleteTags(ctx context.Context, repo *Repository, tags []string) error { if len(tags) == 0 { return nil } @@ -448,58 +453,6 @@ func PushUpdateDeleteTagsContext(ctx context.Context, repo *Repository, tags []s return nil } -// PushUpdateDeleteTag must be called for any push actions to delete tag -func PushUpdateDeleteTag(ctx context.Context, repo *Repository, tagName string) error { - rel, err := GetRelease(ctx, repo.ID, tagName) - if err != nil { - if IsErrReleaseNotExist(err) { - return nil - } - return fmt.Errorf("GetRelease: %w", err) - } - if rel.IsTag { - if _, err = db.DeleteByID[Release](ctx, rel.ID); err != nil { - return fmt.Errorf("Delete: %w", err) - } - } else { - rel.IsDraft = true - rel.NumCommits = 0 - rel.Sha1 = "" - if _, err = db.GetEngine(ctx).ID(rel.ID).AllCols().Update(rel); err != nil { - return fmt.Errorf("Update: %w", err) - } - } - - return nil -} - -// SaveOrUpdateTag must be called for any push actions to add tag -func SaveOrUpdateTag(ctx context.Context, repo *Repository, newRel *Release) error { - rel, err := GetRelease(ctx, repo.ID, newRel.TagName) - if err != nil && !IsErrReleaseNotExist(err) { - return fmt.Errorf("GetRelease: %w", err) - } - - if rel == nil { - rel = newRel - if _, err = db.GetEngine(ctx).Insert(rel); err != nil { - return fmt.Errorf("InsertOne: %w", err) - } - } else { - rel.Sha1 = newRel.Sha1 - rel.CreatedUnix = newRel.CreatedUnix - rel.NumCommits = newRel.NumCommits - rel.IsDraft = false - if rel.IsTag && newRel.PublisherID > 0 { - rel.PublisherID = newRel.PublisherID - } - if _, err = db.GetEngine(ctx).ID(rel.ID).AllCols().Update(rel); err != nil { - return fmt.Errorf("Update: %w", err) - } - } - return nil -} - // RemapExternalUser ExternalUserRemappable interface func (r *Release) RemapExternalUser(externalName string, externalID, userID int64) error { r.OriginalAuthor = externalName diff --git a/modules/repository/repo.go b/modules/repository/repo.go index bc147a4dd5..ad4a53b858 100644 --- a/modules/repository/repo.go +++ b/modules/repository/repo.go @@ -9,13 +9,10 @@ import ( "fmt" "io" "strings" - "time" "code.gitea.io/gitea/models/db" git_model "code.gitea.io/gitea/models/git" repo_model "code.gitea.io/gitea/models/repo" - user_model "code.gitea.io/gitea/models/user" - "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/lfs" @@ -59,118 +56,6 @@ func SyncRepoTags(ctx context.Context, repoID int64) error { return SyncReleasesWithTags(ctx, repo, gitRepo) } -// SyncReleasesWithTags synchronizes release table with repository tags -func SyncReleasesWithTags(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { - log.Debug("SyncReleasesWithTags: in Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) - - // optimized procedure for pull-mirrors which saves a lot of time (in - // particular for repos with many tags). - if repo.IsMirror { - return pullMirrorReleaseSync(ctx, repo, gitRepo) - } - - existingRelTags := make(container.Set[string]) - opts := repo_model.FindReleasesOptions{ - IncludeDrafts: true, - IncludeTags: true, - ListOptions: db.ListOptions{PageSize: 50}, - RepoID: repo.ID, - } - for page := 1; ; page++ { - opts.Page = page - rels, err := db.Find[repo_model.Release](gitRepo.Ctx, opts) - if err != nil { - return fmt.Errorf("unable to GetReleasesByRepoID in Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) - } - if len(rels) == 0 { - break - } - for _, rel := range rels { - if rel.IsDraft { - continue - } - commitID, err := gitRepo.GetTagCommitID(rel.TagName) - if err != nil && !git.IsErrNotExist(err) { - return fmt.Errorf("unable to GetTagCommitID for %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) - } - if git.IsErrNotExist(err) || commitID != rel.Sha1 { - if err := repo_model.PushUpdateDeleteTag(ctx, repo, rel.TagName); err != nil { - return fmt.Errorf("unable to PushUpdateDeleteTag: %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) - } - } else { - existingRelTags.Add(strings.ToLower(rel.TagName)) - } - } - } - - _, err := gitRepo.WalkReferences(git.ObjectTag, 0, 0, func(sha1, refname string) error { - tagName := strings.TrimPrefix(refname, git.TagPrefix) - if existingRelTags.Contains(strings.ToLower(tagName)) { - return nil - } - - if err := PushUpdateAddTag(ctx, repo, gitRepo, tagName, sha1, refname); err != nil { - // sometimes, some tags will be sync failed. i.e. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tag/?h=v2.6.11 - // this is a tree object, not a tag object which created before git - log.Error("unable to PushUpdateAddTag: %q to Repo[%d:%s/%s]: %v", tagName, repo.ID, repo.OwnerName, repo.Name, err) - } - - return nil - }) - return err -} - -// PushUpdateAddTag must be called for any push actions to add tag -func PushUpdateAddTag(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, tagName, sha1, refname string) error { - tag, err := gitRepo.GetTagWithID(sha1, tagName) - if err != nil { - return fmt.Errorf("unable to GetTag: %w", err) - } - commit, err := gitRepo.GetTagCommit(tag.Name) - if err != nil { - return fmt.Errorf("unable to get tag Commit: %w", err) - } - - sig := tag.Tagger - if sig == nil { - sig = commit.Author - } - if sig == nil { - sig = commit.Committer - } - - var author *user_model.User - createdAt := time.Unix(1, 0) - - if sig != nil { - author, err = user_model.GetUserByEmail(ctx, sig.Email) - if err != nil && !user_model.IsErrUserNotExist(err) { - return fmt.Errorf("unable to GetUserByEmail for %q: %w", sig.Email, err) - } - createdAt = sig.When - } - - commitsCount, err := commit.CommitsCount() - if err != nil { - return fmt.Errorf("unable to get CommitsCount: %w", err) - } - - rel := repo_model.Release{ - RepoID: repo.ID, - TagName: tagName, - LowerTagName: strings.ToLower(tagName), - Sha1: commit.ID.String(), - NumCommits: commitsCount, - CreatedUnix: timeutil.TimeStamp(createdAt.Unix()), - IsTag: true, - } - if author != nil { - rel.PublisherID = author.ID - } - - return repo_model.SaveOrUpdateTag(ctx, repo, &rel) -} - // StoreMissingLfsObjectsInRepository downloads missing LFS objects func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, lfsClient lfs.Client) error { contentStore := lfs.NewContentStore() @@ -286,18 +171,19 @@ func (shortRelease) TableName() string { return "release" } -// pullMirrorReleaseSync is a pull-mirror specific tag<->release table +// SyncReleasesWithTags is a tag<->release table // synchronization which overwrites all Releases from the repository tags. This // can be relied on since a pull-mirror is always identical to its -// upstream. Hence, after each sync we want the pull-mirror release set to be +// upstream. Hence, after each sync we want the release set to be // identical to the upstream tag set. This is much more efficient for // repositories like https://github.com/vim/vim (with over 13000 tags). -func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { - log.Trace("pullMirrorReleaseSync: rebuilding releases for pull-mirror Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) - tags, numTags, err := gitRepo.GetTagInfos(0, 0) +func SyncReleasesWithTags(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { + log.Debug("SyncReleasesWithTags: in Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) + tags, _, err := gitRepo.GetTagInfos(0, 0) if err != nil { return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) } + var added, deleted, updated int err = db.WithTx(ctx, func(ctx context.Context) error { dbReleases, err := db.Find[shortRelease](ctx, repo_model.FindReleasesOptions{ RepoID: repo.ID, @@ -318,9 +204,7 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git TagName: tag.Name, LowerTagName: strings.ToLower(tag.Name), Sha1: tag.Object.String(), - // NOTE: ignored, since NumCommits are unused - // for pull-mirrors (only relevant when - // displaying releases, IsTag: false) + // NOTE: ignored, The NumCommits value is calculated and cached on demand when the UI requires it. NumCommits: -1, CreatedUnix: timeutil.TimeStamp(tag.Tagger.When.Unix()), IsTag: true, @@ -349,13 +233,14 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git return fmt.Errorf("unable to update tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) } } + added, deleted, updated = len(deletes), len(updates), len(inserts) return nil }) if err != nil { return fmt.Errorf("unable to rebuild release table for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) } - log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags) + log.Trace("SyncReleasesWithTags: %d tags added, %d tags deleted, %d tags updated", added, deleted, updated) return nil } diff --git a/services/context/repo.go b/services/context/repo.go index ea772c508d..61841aa90b 100644 --- a/services/context/repo.go +++ b/services/context/repo.go @@ -936,6 +936,15 @@ func RepoRefByType(detectRefType git.RefType) func(*Context) { ctx.ServerError("GetCommitsCount", err) return } + if ctx.Repo.RefFullName.IsTag() { + rel, err := repo_model.GetRelease(ctx, ctx.Repo.Repository.ID, ctx.Repo.RefFullName.TagName()) + if err == nil && rel.NumCommits <= 0 { + rel.NumCommits = ctx.Repo.CommitsCount + if err := repo_model.UpdateReleaseNumCommits(ctx, rel); err != nil { + log.Error("UpdateReleaseNumCommits", err) + } + } + } ctx.Data["CommitsCount"] = ctx.Repo.CommitsCount ctx.Repo.GitRepo.LastCommitCache = git.NewLastCommitCache(ctx.Repo.CommitsCount, ctx.Repo.Repository.FullName(), ctx.Repo.GitRepo, cache.GetCache()) } diff --git a/services/repository/migrate.go b/services/repository/migrate.go index 003be1a9ab..0859158b89 100644 --- a/services/repository/migrate.go +++ b/services/repository/migrate.go @@ -149,9 +149,9 @@ func MigrateRepositoryGitData(ctx context.Context, u *user_model.User, return repo, fmt.Errorf("SyncRepoBranchesWithRepo: %v", err) } + // if releases migration are not requested, we will sync all tags here + // otherwise, the releases sync will be done out of this function if !opts.Releases { - // note: this will greatly improve release (tag) sync - // for pull-mirrors with many tags repo.IsMirror = opts.Mirror if err = repo_module.SyncReleasesWithTags(ctx, repo, gitRepo); err != nil { log.Error("Failed to synchronize tags to releases for repository: %v", err) diff --git a/services/repository/push.go b/services/repository/push.go index 31794034ba..3735c5f3a4 100644 --- a/services/repository/push.go +++ b/services/repository/push.go @@ -344,7 +344,7 @@ func pushDeleteBranch(ctx context.Context, repo *repo_model.Repository, pusher * // PushUpdateAddDeleteTags updates a number of added and delete tags func PushUpdateAddDeleteTags(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, addTags, delTags []string) error { return db.WithTx(ctx, func(ctx context.Context) error { - if err := repo_model.PushUpdateDeleteTagsContext(ctx, repo, delTags); err != nil { + if err := repo_model.PushUpdateDeleteTags(ctx, repo, delTags); err != nil { return err } return pushUpdateAddTags(ctx, repo, gitRepo, addTags) @@ -415,11 +415,6 @@ func pushUpdateAddTags(ctx context.Context, repo *repo_model.Repository, gitRepo createdAt = sig.When } - commitsCount, err := commit.CommitsCount() - if err != nil { - return fmt.Errorf("CommitsCount: %w", err) - } - rel, has := relMap[lowerTag] parts := strings.SplitN(tag.Message, "\n", 2) @@ -435,7 +430,7 @@ func pushUpdateAddTags(ctx context.Context, repo *repo_model.Repository, gitRepo LowerTagName: lowerTag, Target: "", Sha1: commit.ID.String(), - NumCommits: commitsCount, + NumCommits: -1, // the commits count will be updated when the UI needs it Note: note, IsDraft: false, IsPrerelease: false, @@ -450,7 +445,6 @@ func pushUpdateAddTags(ctx context.Context, repo *repo_model.Repository, gitRepo } else { rel.Sha1 = commit.ID.String() rel.CreatedUnix = timeutil.TimeStamp(createdAt.Unix()) - rel.NumCommits = commitsCount if rel.IsTag { rel.Title = parts[0] rel.Note = note diff --git a/templates/repo/release/list.tmpl b/templates/repo/release/list.tmpl index 01ec0ff188..1a7d911acb 100644 --- a/templates/repo/release/list.tmpl +++ b/templates/repo/release/list.tmpl @@ -61,7 +61,7 @@ {{if $release.CreatedUnix}} <span class="time">{{DateUtils.TimeSince $release.CreatedUnix}}</span> {{end}} - {{if and (not $release.IsDraft) ($.Permission.CanRead ctx.Consts.RepoUnitTypeCode)}} + {{if and (gt $release.NumCommits 0) (not $release.IsDraft) ($.Permission.CanRead ctx.Consts.RepoUnitTypeCode)}} | <span class="ahead"><a href="{{$.RepoLink}}/compare/{{$release.TagName | PathEscapeSegments}}...{{$release.TargetBehind | PathEscapeSegments}}">{{ctx.Locale.Tr "repo.release.ahead.commits" $release.NumCommitsBehind}}</a> {{ctx.Locale.Tr "repo.release.ahead.target" $release.TargetBehind}}</span> {{end}} </p> |