diff options
author | zeripath <art27@cantab.net> | 2023-01-16 19:50:53 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-16 13:50:53 -0600 |
commit | 2cc3a6381cab34113fd2dc2c24ef0efc22c4336d (patch) | |
tree | 6c2e2db756ae2818471de99bc5dca4d2aeddadc7 /models | |
parent | 04c97aa36473bc0070a2fe46e86dc645dc75ee85 (diff) | |
download | gitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.tar.gz gitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.zip |
Add cron method to gc LFS MetaObjects (#22385)
This PR adds a task to the cron service to allow garbage collection of
LFS meta objects. As repositories may have a large number of
LFSMetaObjects, an updated column is added to this table and it is used
to perform a generational GC to attempt to reduce the amount of work.
(There may need to be a bit more work here but this is probably enough
for the moment.)
Fix #7045
Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'models')
-rw-r--r-- | models/git/lfs.go | 69 | ||||
-rw-r--r-- | models/migrations/migrations.go | 5 | ||||
-rw-r--r-- | models/migrations/v1_19/v238.go | 27 |
3 files changed, 98 insertions, 3 deletions
diff --git a/models/git/lfs.go b/models/git/lfs.go index 3494264688..0ba8e919d0 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -115,6 +115,7 @@ type LFSMetaObject struct { RepositoryID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"` Existing bool `xorm:"-"` CreatedUnix timeutil.TimeStamp `xorm:"created"` + UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` } func init() { @@ -334,8 +335,45 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) { return lfsSize, nil } +// IterateRepositoryIDsWithLFSMetaObjects iterates across the repositories that have LFSMetaObjects +func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error { + batchSize := setting.Database.IterateBufferSize + sess := db.GetEngine(ctx) + id := int64(0) + type RepositoryCount struct { + RepositoryID int64 + Count int64 + } + for { + counts := make([]*RepositoryCount, 0, batchSize) + sess.Select("repository_id, COUNT(id) AS count"). + Table("lfs_meta_object"). + Where("repository_id > ?", id). + GroupBy("repository_id"). + OrderBy("repository_id ASC") + + if err := sess.Limit(batchSize, 0).Find(&counts); err != nil { + return err + } + if len(counts) == 0 { + return nil + } + + for _, count := range counts { + if err := f(ctx, count.RepositoryID, count.Count); err != nil { + return err + } + } + id = counts[len(counts)-1].RepositoryID + } +} + +// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo type IterateLFSMetaObjectsForRepoOptions struct { - OlderThan time.Time + OlderThan time.Time + UpdatedLessRecentlyThan time.Time + OrderByUpdated bool + LoopFunctionAlwaysUpdates bool } // IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo @@ -348,28 +386,53 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont LFSMetaObject } + id := int64(0) + for { beans := make([]*CountLFSMetaObject, 0, batchSize) - // SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`"). Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid"). Where("`lfs_meta_object`.repository_id = ?", repoID) if !opts.OlderThan.IsZero() { sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan) } + if !opts.UpdatedLessRecentlyThan.IsZero() { + sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan) + } sess.GroupBy("`lfs_meta_object`.id") + if opts.OrderByUpdated { + sess.OrderBy("`lfs_meta_object`.updated_unix ASC") + } else { + sess.And("`lfs_meta_object`.id > ?", id) + sess.OrderBy("`lfs_meta_object`.id ASC") + } if err := sess.Limit(batchSize, start).Find(&beans); err != nil { return err } if len(beans) == 0 { return nil } - start += len(beans) + if !opts.LoopFunctionAlwaysUpdates { + start += len(beans) + } for _, bean := range beans { if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil { return err } } + id = beans[len(beans)-1].ID + } +} + +// MarkLFSMetaObject updates the updated time for the provided LFSMetaObject +func MarkLFSMetaObject(ctx context.Context, id int64) error { + obj := &LFSMetaObject{ + UpdatedUnix: timeutil.TimeStampNow(), + } + count, err := db.GetEngine(ctx).ID(id).Update(obj) + if count != 1 { + log.Error("Unexpectedly updated %d LFSMetaObjects with ID: %d", count, id) } + return err } diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go index 9d9c8f5165..4e211617c0 100644 --- a/models/migrations/migrations.go +++ b/models/migrations/migrations.go @@ -432,6 +432,9 @@ var migrations = []Migration{ NewMigration("Update counts of all open milestones", v1_18.UpdateOpenMilestoneCounts), // v230 -> v231 NewMigration("Add ConfidentialClient column (default true) to OAuth2Application table", v1_18.AddConfidentialClientColumnToOAuth2ApplicationTable), + + // Gitea 1.18.0 ends at v231 + // v231 -> v232 NewMigration("Add index for hook_task", v1_19.AddIndexForHookTask), // v232 -> v233 @@ -446,6 +449,8 @@ var migrations = []Migration{ NewMigration("Create secrets table", v1_19.CreateSecretsTable), // v237 -> v238 NewMigration("Drop ForeignReference table", v1_19.DropForeignReferenceTable), + // v238 -> v239 + NewMigration("Add updated unix to LFSMetaObject", v1_19.AddUpdatedUnixToLFSMetaObject), } // GetCurrentDBVersion returns the current db version diff --git a/models/migrations/v1_19/v238.go b/models/migrations/v1_19/v238.go new file mode 100644 index 0000000000..266e6cea58 --- /dev/null +++ b/models/migrations/v1_19/v238.go @@ -0,0 +1,27 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package v1_19 //nolint + +import ( + "code.gitea.io/gitea/modules/timeutil" + + "xorm.io/xorm" +) + +// AddUpdatedUnixToLFSMetaObject adds an updated column to the LFSMetaObject to allow for garbage collection +func AddUpdatedUnixToLFSMetaObject(x *xorm.Engine) error { + // Drop the table introduced in `v211`, it's considered badly designed and doesn't look like to be used. + // See: https://github.com/go-gitea/gitea/issues/21086#issuecomment-1318217453 + // LFSMetaObject stores metadata for LFS tracked files. + type LFSMetaObject struct { + ID int64 `xorm:"pk autoincr"` + Oid string `json:"oid" xorm:"UNIQUE(s) INDEX NOT NULL"` + Size int64 `json:"size" xorm:"NOT NULL"` + RepositoryID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"` + CreatedUnix timeutil.TimeStamp `xorm:"created"` + UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` + } + + return x.Sync(new(LFSMetaObject)) +} |