diff options
author | zeripath <art27@cantab.net> | 2023-01-16 19:50:53 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-16 13:50:53 -0600 |
commit | 2cc3a6381cab34113fd2dc2c24ef0efc22c4336d (patch) | |
tree | 6c2e2db756ae2818471de99bc5dca4d2aeddadc7 /services/repository/lfs.go | |
parent | 04c97aa36473bc0070a2fe46e86dc645dc75ee85 (diff) | |
download | gitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.tar.gz gitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.zip |
Add cron method to gc LFS MetaObjects (#22385)
This PR adds a task to the cron service to allow garbage collection of
LFS meta objects. As repositories may have a large number of
LFSMetaObjects, an updated column is added to this table and it is used
to perform a generational GC to attempt to reduce the amount of work.
(There may need to be a bit more work here but this is probably enough
for the moment.)
Fix #7045
Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'services/repository/lfs.go')
-rw-r--r-- | services/repository/lfs.go | 89 |
1 files changed, 62 insertions, 27 deletions
diff --git a/services/repository/lfs.go b/services/repository/lfs.go index 7806e20a9f..aeb808a72f 100644 --- a/services/repository/lfs.go +++ b/services/repository/lfs.go @@ -5,49 +5,67 @@ package repository import ( "context" + "errors" "fmt" "time" - "code.gitea.io/gitea/models/db" git_model "code.gitea.io/gitea/models/git" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" - - "xorm.io/builder" + "code.gitea.io/gitea/modules/setting" ) -func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error { +// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function +type GarbageCollectLFSMetaObjectsOptions struct { + Logger log.Logger + AutoFix bool + OlderThan time.Time + UpdatedLessRecentlyThan time.Time + NumberToCheckPerRepo int64 + ProportionToCheckPerRepo float64 +} + +// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories +func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMetaObjectsOptions) error { log.Trace("Doing: GarbageCollectLFSMetaObjects") + defer log.Trace("Finished: GarbageCollectLFSMetaObjects") - if err := db.Iterate( - ctx, - builder.And(builder.Gt{"id": 0}), - func(ctx context.Context, repo *repo_model.Repository) error { - return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix) - }, - ); err != nil { - return err + if !setting.LFS.StartServer { + if opts.Logger != nil { + opts.Logger.Info("LFS support is disabled") + } + return nil } - log.Trace("Finished: GarbageCollectLFSMetaObjects") - return nil + return git_model.IterateRepositoryIDsWithLFSMetaObjects(ctx, func(ctx context.Context, repoID, count int64) error { + repo, err := repo_model.GetRepositoryByID(ctx, repoID) + if err != nil { + return err + } + + if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 { + opts.NumberToCheckPerRepo = newMinimum + } + return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts) + }) } -func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error { - if logger != nil { - logger.Info("Checking %-v", repo) +// GarbageCollectLFSMetaObjectsForRepo garbage collects LFS objects for a specific repository +func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, opts GarbageCollectLFSMetaObjectsOptions) error { + if opts.Logger != nil { + opts.Logger.Info("Checking %-v", repo) } - total, orphaned, collected, deleted := 0, 0, 0, 0 - if logger != nil { + total, orphaned, collected, deleted := int64(0), 0, 0, 0 + if opts.Logger != nil { defer func() { if orphaned == 0 { - logger.Info("Found %d total LFSMetaObjects in %-v", total, repo) - } else if !autofix { - logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo) + opts.Logger.Info("Found %d total LFSMetaObjects in %-v", total, repo) + } else if !opts.AutoFix { + opts.Logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo) } else { - logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted) + opts.Logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted) } }() } @@ -60,17 +78,21 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R defer gitRepo.Close() store := lfs.NewContentStore() + errStop := errors.New("STOPERR") - return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error { + err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error { + if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo { + return errStop + } total++ pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent())) if gitRepo.IsObjectExist(pointerSha.String()) { - return nil + return git_model.MarkLFSMetaObject(ctx, metaObject.ID) } orphaned++ - if !autofix { + if !opts.AutoFix { return nil } // Non-existent pointer file @@ -100,6 +122,19 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R // // It is likely that a week is potentially excessive but it should definitely be enough that any // unassociated LFS object is genuinely unassociated. - OlderThan: time.Now().Add(-24 * 7 * time.Hour), + OlderThan: opts.OlderThan, + UpdatedLessRecentlyThan: opts.UpdatedLessRecentlyThan, + OrderByUpdated: true, + LoopFunctionAlwaysUpdates: true, }) + + if err == errStop { + if opts.Logger != nil { + opts.Logger.Info("Processing stopped at %d total LFSMetaObjects in %-v", total, repo) + } + return nil + } else if err != nil { + return err + } + return nil } |