diff options
author | zeripath <art27@cantab.net> | 2023-01-16 19:50:53 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-16 13:50:53 -0600 |
commit | 2cc3a6381cab34113fd2dc2c24ef0efc22c4336d (patch) | |
tree | 6c2e2db756ae2818471de99bc5dca4d2aeddadc7 /services/cron | |
parent | 04c97aa36473bc0070a2fe46e86dc645dc75ee85 (diff) | |
download | gitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.tar.gz gitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.zip |
Add cron method to gc LFS MetaObjects (#22385)
This PR adds a task to the cron service to allow garbage collection of
LFS meta objects. As repositories may have a large number of
LFSMetaObjects, an updated column is added to this table and it is used
to perform a generational GC to attempt to reduce the amount of work.
(There may need to be a bit more work here but this is probably enough
for the moment.)
Fix #7045
Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'services/cron')
-rw-r--r-- | services/cron/tasks_extended.go | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/services/cron/tasks_extended.go b/services/cron/tasks_extended.go index 4486be0c2f..520d940edf 100644 --- a/services/cron/tasks_extended.go +++ b/services/cron/tasks_extended.go @@ -175,6 +175,48 @@ func registerDeleteOldSystemNotices() { }) } +func registerGCLFS() { + if !setting.LFS.StartServer { + return + } + type GCLFSConfig struct { + OlderThanConfig + LastUpdatedMoreThanAgo time.Duration + NumberToCheckPerRepo int64 + ProportionToCheckPerRepo float64 + } + + RegisterTaskFatal("gc_lfs", &GCLFSConfig{ + OlderThanConfig: OlderThanConfig{ + BaseConfig: BaseConfig{ + Enabled: false, + RunAtStart: false, + Schedule: "@every 24h", + }, + // Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload + // and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby + // an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid + // changes in new branches that might lead to lfs objects becoming temporarily unassociated with git + // objects. + // + // It is likely that a week is potentially excessive but it should definitely be enough that any + // unassociated LFS object is genuinely unassociated. + OlderThan: 24 * time.Hour * 7, + }, + // Only GC things that haven't been looked at in the past 3 days + LastUpdatedMoreThanAgo: 24 * time.Hour * 3, + NumberToCheckPerRepo: 100, + ProportionToCheckPerRepo: 0.6, + }, func(ctx context.Context, _ *user_model.User, config Config) error { + gcLFSConfig := config.(*GCLFSConfig) + return repo_service.GarbageCollectLFSMetaObjects(ctx, repo_service.GarbageCollectLFSMetaObjectsOptions{ + AutoFix: true, + OlderThan: time.Now().Add(-gcLFSConfig.OlderThan), + UpdatedLessRecentlyThan: time.Now().Add(-gcLFSConfig.LastUpdatedMoreThanAgo), + }) + }) +} + func initExtendedTasks() { registerDeleteInactiveUsers() registerDeleteRepositoryArchives() @@ -188,4 +230,5 @@ func initExtendedTasks() { registerDeleteOldActions() registerUpdateGiteaChecker() registerDeleteOldSystemNotices() + registerGCLFS() } |