aboutsummaryrefslogtreecommitdiffstats
path: root/services
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2022-12-15 20:44:16 +0000
committerGitHub <noreply@github.com>2022-12-15 20:44:16 +0000
commit651fe4bb7dda16dee48b31ee964493a05e979c78 (patch)
treefb15044c4b904ad0cef29152209a4e9662dc187c /services
parent3243dbe1a9e3ff7031f243c72232bcc31bbdec75 (diff)
downloadgitea-651fe4bb7dda16dee48b31ee964493a05e979c78.tar.gz
gitea-651fe4bb7dda16dee48b31ee964493a05e979c78.zip
Add doctor command for full GC of LFS (#21978)
The recent PR adding orphaned checks to the LFS storage is not sufficient to completely GC LFS, as it is possible for LFSMetaObjects to remain associated with repos but still need to be garbage collected. Imagine a situation where a branch is uploaded containing LFS files but that branch is later completely deleted. The LFSMetaObjects will remain associated with the Repository but the Repository will no longer contain any pointers to the object. This PR adds a second doctor command to perform a full GC. Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'services')
-rw-r--r--services/cron/tasks_basic.go2
-rw-r--r--services/repository/check.go86
-rw-r--r--services/repository/lfs.go105
3 files changed, 154 insertions, 39 deletions
diff --git a/services/cron/tasks_basic.go b/services/cron/tasks_basic.go
index acf3896b71..05aef6623d 100644
--- a/services/cron/tasks_basic.go
+++ b/services/cron/tasks_basic.go
@@ -63,7 +63,7 @@ func registerRepoHealthCheck() {
for _, arg := range rhcConfig.Args {
args = append(args, git.CmdArg(arg))
}
- return repo_service.GitFsck(ctx, rhcConfig.Timeout, args)
+ return repo_service.GitFsckRepos(ctx, rhcConfig.Timeout, args)
})
}
diff --git a/services/repository/check.go b/services/repository/check.go
index 6e29dc93d1..293cb04d38 100644
--- a/services/repository/check.go
+++ b/services/repository/check.go
@@ -22,8 +22,8 @@ import (
"xorm.io/builder"
)
-// GitFsck calls 'git fsck' to check repository health.
-func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
+// GitFsckRepos calls 'git fsck' to check repository health.
+func GitFsckRepos(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Doing: GitFsck")
if err := db.Iterate(
@@ -35,15 +35,7 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return db.ErrCancelledf("before fsck of %s", repo.FullName())
default:
}
- log.Trace("Running health check on repository %v", repo)
- repoPath := repo.RepoPath()
- if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
- log.Warn("Failed to health check repository (%v): %v", repo, err)
- if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
- log.Error("CreateRepositoryNotice: %v", err)
- }
- }
- return nil
+ return GitFsckRepo(ctx, repo, timeout, args)
},
); err != nil {
log.Trace("Error: GitFsck: %v", err)
@@ -54,6 +46,19 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return nil
}
+// GitFsckRepo calls 'git fsck' to check an individual repository's health.
+func GitFsckRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
+ log.Trace("Running health check on repository %-v", repo)
+ repoPath := repo.RepoPath()
+ if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
+ log.Warn("Failed to health check repository (%-v): %v", repo, err)
+ if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
+ log.Error("CreateRepositoryNotice: %v", err)
+ }
+ }
+ return nil
+}
+
// GitGcRepos calls 'git gc' to remove unnecessary files and optimize the local repository
func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) error {
log.Trace("Doing: GitGcRepos")
@@ -68,33 +73,7 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return db.ErrCancelledf("before GC of %s", repo.FullName())
default:
}
- log.Trace("Running git gc on %v", repo)
- command := git.NewCommand(ctx, args...).
- SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
- var stdout string
- var err error
- stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})
-
- if err != nil {
- log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
- desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
- if err = system_model.CreateRepositoryNotice(desc); err != nil {
- log.Error("CreateRepositoryNotice: %v", err)
- }
- return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
- }
-
- // Now update the size of the repository
- if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
- log.Error("Updating size as part of garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
- desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
- if err = system_model.CreateRepositoryNotice(desc); err != nil {
- log.Error("CreateRepositoryNotice: %v", err)
- }
- return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
- }
-
- return nil
+ return GitGcRepo(ctx, repo, timeout, args)
},
); err != nil {
return err
@@ -104,6 +83,37 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return nil
}
+// GitGcRepo calls 'git gc' to remove unnecessary files and optimize the local repository
+func GitGcRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
+ log.Trace("Running git gc on %-v", repo)
+ command := git.NewCommand(ctx, args...).
+ SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
+ var stdout string
+ var err error
+ stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})
+
+ if err != nil {
+ log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
+ desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
+ if err = system_model.CreateRepositoryNotice(desc); err != nil {
+ log.Error("CreateRepositoryNotice: %v", err)
+ }
+ return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
+ }
+
+ // Now update the size of the repository
+ if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
+ log.Error("Updating size as part of garbage collection failed for %-v. Stdout: %s\nError: %v", repo, stdout, err)
+ desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
+ if err = system_model.CreateRepositoryNotice(desc); err != nil {
+ log.Error("CreateRepositoryNotice: %v", err)
+ }
+ return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
+ }
+
+ return nil
+}
+
func gatherMissingRepoRecords(ctx context.Context) ([]*repo_model.Repository, error) {
repos := make([]*repo_model.Repository, 0, 10)
if err := db.Iterate(
diff --git a/services/repository/lfs.go b/services/repository/lfs.go
new file mode 100644
index 0000000000..0e88d359a8
--- /dev/null
+++ b/services/repository/lfs.go
@@ -0,0 +1,105 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package repository
+
+import (
+ "context"
+ "fmt"
+ "time"
+
+ "code.gitea.io/gitea/models/db"
+ git_model "code.gitea.io/gitea/models/git"
+ repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/lfs"
+ "code.gitea.io/gitea/modules/log"
+
+ "xorm.io/builder"
+)
+
+func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error {
+ log.Trace("Doing: GarbageCollectLFSMetaObjects")
+
+ if err := db.Iterate(
+ ctx,
+ builder.And(builder.Gt{"id": 0}),
+ func(ctx context.Context, repo *repo_model.Repository) error {
+ return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix)
+ },
+ ); err != nil {
+ return err
+ }
+
+ log.Trace("Finished: GarbageCollectLFSMetaObjects")
+ return nil
+}
+
+func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error {
+ if logger != nil {
+ logger.Info("Checking %-v", repo)
+ }
+ total, orphaned, collected, deleted := 0, 0, 0, 0
+ if logger != nil {
+ defer func() {
+ if orphaned == 0 {
+ logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
+ } else if !autofix {
+ logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
+ } else {
+ logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
+ }
+ }()
+ }
+
+ gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
+ if err != nil {
+ log.Error("Unable to open git repository %-v: %v", repo, err)
+ return err
+ }
+ defer gitRepo.Close()
+
+ store := lfs.NewContentStore()
+
+ return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
+ total++
+ pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
+
+ if gitRepo.IsObjectExist(pointerSha.String()) {
+ return nil
+ }
+ orphaned++
+
+ if !autofix {
+ return nil
+ }
+ // Non-existent pointer file
+ _, err = git_model.RemoveLFSMetaObjectByOidFn(repo.ID, metaObject.Oid, func(count int64) error {
+ if count > 0 {
+ return nil
+ }
+
+ if err := store.Delete(metaObject.RelativePath()); err != nil {
+ log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err)
+ }
+ deleted++
+ return nil
+ })
+ if err != nil {
+ return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err)
+ }
+ collected++
+
+ return nil
+ }, &git_model.IterateLFSMetaObjectsForRepoOptions{
+ // Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
+ // and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
+ // an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
+ // changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
+ // objects.
+ //
+ // It is likely that a week is potentially excessive but it should definitely be enough that any
+ // unassociated LFS object is genuinely unassociated.
+ OlderThan: time.Now().Add(-24 * 7 * time.Hour),
+ })
+}