summaryrefslogtreecommitdiffstats
path: root/services/repository/lfs.go
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2023-01-16 19:50:53 +0000
committerGitHub <noreply@github.com>2023-01-16 13:50:53 -0600
commit2cc3a6381cab34113fd2dc2c24ef0efc22c4336d (patch)
tree6c2e2db756ae2818471de99bc5dca4d2aeddadc7 /services/repository/lfs.go
parent04c97aa36473bc0070a2fe46e86dc645dc75ee85 (diff)
downloadgitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.tar.gz
gitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.zip
Add cron method to gc LFS MetaObjects (#22385)
This PR adds a task to the cron service to allow garbage collection of LFS meta objects. As repositories may have a large number of LFSMetaObjects, an updated column is added to this table and it is used to perform a generational GC to attempt to reduce the amount of work. (There may need to be a bit more work here but this is probably enough for the moment.) Fix #7045 Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'services/repository/lfs.go')
-rw-r--r--services/repository/lfs.go89
1 files changed, 62 insertions, 27 deletions
diff --git a/services/repository/lfs.go b/services/repository/lfs.go
index 7806e20a9f..aeb808a72f 100644
--- a/services/repository/lfs.go
+++ b/services/repository/lfs.go
@@ -5,49 +5,67 @@ package repository
import (
"context"
+ "errors"
"fmt"
"time"
- "code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
-
- "xorm.io/builder"
+ "code.gitea.io/gitea/modules/setting"
)
-func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error {
+// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
+type GarbageCollectLFSMetaObjectsOptions struct {
+ Logger log.Logger
+ AutoFix bool
+ OlderThan time.Time
+ UpdatedLessRecentlyThan time.Time
+ NumberToCheckPerRepo int64
+ ProportionToCheckPerRepo float64
+}
+
+// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
+func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMetaObjectsOptions) error {
log.Trace("Doing: GarbageCollectLFSMetaObjects")
+ defer log.Trace("Finished: GarbageCollectLFSMetaObjects")
- if err := db.Iterate(
- ctx,
- builder.And(builder.Gt{"id": 0}),
- func(ctx context.Context, repo *repo_model.Repository) error {
- return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix)
- },
- ); err != nil {
- return err
+ if !setting.LFS.StartServer {
+ if opts.Logger != nil {
+ opts.Logger.Info("LFS support is disabled")
+ }
+ return nil
}
- log.Trace("Finished: GarbageCollectLFSMetaObjects")
- return nil
+ return git_model.IterateRepositoryIDsWithLFSMetaObjects(ctx, func(ctx context.Context, repoID, count int64) error {
+ repo, err := repo_model.GetRepositoryByID(ctx, repoID)
+ if err != nil {
+ return err
+ }
+
+ if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
+ opts.NumberToCheckPerRepo = newMinimum
+ }
+ return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
+ })
}
-func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error {
- if logger != nil {
- logger.Info("Checking %-v", repo)
+// GarbageCollectLFSMetaObjectsForRepo garbage collects LFS objects for a specific repository
+func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, opts GarbageCollectLFSMetaObjectsOptions) error {
+ if opts.Logger != nil {
+ opts.Logger.Info("Checking %-v", repo)
}
- total, orphaned, collected, deleted := 0, 0, 0, 0
- if logger != nil {
+ total, orphaned, collected, deleted := int64(0), 0, 0, 0
+ if opts.Logger != nil {
defer func() {
if orphaned == 0 {
- logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
- } else if !autofix {
- logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
+ opts.Logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
+ } else if !opts.AutoFix {
+ opts.Logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
} else {
- logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
+ opts.Logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
}
}()
}
@@ -60,17 +78,21 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
defer gitRepo.Close()
store := lfs.NewContentStore()
+ errStop := errors.New("STOPERR")
- return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
+ err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
+ if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
+ return errStop
+ }
total++
pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
if gitRepo.IsObjectExist(pointerSha.String()) {
- return nil
+ return git_model.MarkLFSMetaObject(ctx, metaObject.ID)
}
orphaned++
- if !autofix {
+ if !opts.AutoFix {
return nil
}
// Non-existent pointer file
@@ -100,6 +122,19 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
//
// It is likely that a week is potentially excessive but it should definitely be enough that any
// unassociated LFS object is genuinely unassociated.
- OlderThan: time.Now().Add(-24 * 7 * time.Hour),
+ OlderThan: opts.OlderThan,
+ UpdatedLessRecentlyThan: opts.UpdatedLessRecentlyThan,
+ OrderByUpdated: true,
+ LoopFunctionAlwaysUpdates: true,
})
+
+ if err == errStop {
+ if opts.Logger != nil {
+ opts.Logger.Info("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
+ }
+ return nil
+ } else if err != nil {
+ return err
+ }
+ return nil
}