summaryrefslogtreecommitdiffstats
path: root/models
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2023-01-16 19:50:53 +0000
committerGitHub <noreply@github.com>2023-01-16 13:50:53 -0600
commit2cc3a6381cab34113fd2dc2c24ef0efc22c4336d (patch)
tree6c2e2db756ae2818471de99bc5dca4d2aeddadc7 /models
parent04c97aa36473bc0070a2fe46e86dc645dc75ee85 (diff)
downloadgitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.tar.gz
gitea-2cc3a6381cab34113fd2dc2c24ef0efc22c4336d.zip
Add cron method to gc LFS MetaObjects (#22385)
This PR adds a task to the cron service to allow garbage collection of LFS meta objects. As repositories may have a large number of LFSMetaObjects, an updated column is added to this table and it is used to perform a generational GC to attempt to reduce the amount of work. (There may need to be a bit more work here but this is probably enough for the moment.) Fix #7045 Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'models')
-rw-r--r--models/git/lfs.go69
-rw-r--r--models/migrations/migrations.go5
-rw-r--r--models/migrations/v1_19/v238.go27
3 files changed, 98 insertions, 3 deletions
diff --git a/models/git/lfs.go b/models/git/lfs.go
index 3494264688..0ba8e919d0 100644
--- a/models/git/lfs.go
+++ b/models/git/lfs.go
@@ -115,6 +115,7 @@ type LFSMetaObject struct {
RepositoryID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"`
Existing bool `xorm:"-"`
CreatedUnix timeutil.TimeStamp `xorm:"created"`
+ UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
}
func init() {
@@ -334,8 +335,45 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
return lfsSize, nil
}
+// IterateRepositoryIDsWithLFSMetaObjects iterates across the repositories that have LFSMetaObjects
+func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
+ batchSize := setting.Database.IterateBufferSize
+ sess := db.GetEngine(ctx)
+ id := int64(0)
+ type RepositoryCount struct {
+ RepositoryID int64
+ Count int64
+ }
+ for {
+ counts := make([]*RepositoryCount, 0, batchSize)
+ sess.Select("repository_id, COUNT(id) AS count").
+ Table("lfs_meta_object").
+ Where("repository_id > ?", id).
+ GroupBy("repository_id").
+ OrderBy("repository_id ASC")
+
+ if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
+ return err
+ }
+ if len(counts) == 0 {
+ return nil
+ }
+
+ for _, count := range counts {
+ if err := f(ctx, count.RepositoryID, count.Count); err != nil {
+ return err
+ }
+ }
+ id = counts[len(counts)-1].RepositoryID
+ }
+}
+
+// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
type IterateLFSMetaObjectsForRepoOptions struct {
- OlderThan time.Time
+ OlderThan time.Time
+ UpdatedLessRecentlyThan time.Time
+ OrderByUpdated bool
+ LoopFunctionAlwaysUpdates bool
}
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
@@ -348,28 +386,53 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
LFSMetaObject
}
+ id := int64(0)
+
for {
beans := make([]*CountLFSMetaObject, 0, batchSize)
- // SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id
sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
Where("`lfs_meta_object`.repository_id = ?", repoID)
if !opts.OlderThan.IsZero() {
sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan)
}
+ if !opts.UpdatedLessRecentlyThan.IsZero() {
+ sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
+ }
sess.GroupBy("`lfs_meta_object`.id")
+ if opts.OrderByUpdated {
+ sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
+ } else {
+ sess.And("`lfs_meta_object`.id > ?", id)
+ sess.OrderBy("`lfs_meta_object`.id ASC")
+ }
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
return err
}
if len(beans) == 0 {
return nil
}
- start += len(beans)
+ if !opts.LoopFunctionAlwaysUpdates {
+ start += len(beans)
+ }
for _, bean := range beans {
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
return err
}
}
+ id = beans[len(beans)-1].ID
+ }
+}
+
+// MarkLFSMetaObject updates the updated time for the provided LFSMetaObject
+func MarkLFSMetaObject(ctx context.Context, id int64) error {
+ obj := &LFSMetaObject{
+ UpdatedUnix: timeutil.TimeStampNow(),
+ }
+ count, err := db.GetEngine(ctx).ID(id).Update(obj)
+ if count != 1 {
+ log.Error("Unexpectedly updated %d LFSMetaObjects with ID: %d", count, id)
}
+ return err
}
diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go
index 9d9c8f5165..4e211617c0 100644
--- a/models/migrations/migrations.go
+++ b/models/migrations/migrations.go
@@ -432,6 +432,9 @@ var migrations = []Migration{
NewMigration("Update counts of all open milestones", v1_18.UpdateOpenMilestoneCounts),
// v230 -> v231
NewMigration("Add ConfidentialClient column (default true) to OAuth2Application table", v1_18.AddConfidentialClientColumnToOAuth2ApplicationTable),
+
+ // Gitea 1.18.0 ends at v231
+
// v231 -> v232
NewMigration("Add index for hook_task", v1_19.AddIndexForHookTask),
// v232 -> v233
@@ -446,6 +449,8 @@ var migrations = []Migration{
NewMigration("Create secrets table", v1_19.CreateSecretsTable),
// v237 -> v238
NewMigration("Drop ForeignReference table", v1_19.DropForeignReferenceTable),
+ // v238 -> v239
+ NewMigration("Add updated unix to LFSMetaObject", v1_19.AddUpdatedUnixToLFSMetaObject),
}
// GetCurrentDBVersion returns the current db version
diff --git a/models/migrations/v1_19/v238.go b/models/migrations/v1_19/v238.go
new file mode 100644
index 0000000000..266e6cea58
--- /dev/null
+++ b/models/migrations/v1_19/v238.go
@@ -0,0 +1,27 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package v1_19 //nolint
+
+import (
+ "code.gitea.io/gitea/modules/timeutil"
+
+ "xorm.io/xorm"
+)
+
+// AddUpdatedUnixToLFSMetaObject adds an updated column to the LFSMetaObject to allow for garbage collection
+func AddUpdatedUnixToLFSMetaObject(x *xorm.Engine) error {
+ // Drop the table introduced in `v211`, it's considered badly designed and doesn't look like to be used.
+ // See: https://github.com/go-gitea/gitea/issues/21086#issuecomment-1318217453
+ // LFSMetaObject stores metadata for LFS tracked files.
+ type LFSMetaObject struct {
+ ID int64 `xorm:"pk autoincr"`
+ Oid string `json:"oid" xorm:"UNIQUE(s) INDEX NOT NULL"`
+ Size int64 `json:"size" xorm:"NOT NULL"`
+ RepositoryID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"`
+ CreatedUnix timeutil.TimeStamp `xorm:"created"`
+ UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
+ }
+
+ return x.Sync(new(LFSMetaObject))
+}