summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--models/git/lfs.go54
-rw-r--r--modules/doctor/lfs.go37
-rw-r--r--services/cron/tasks_basic.go2
-rw-r--r--services/repository/check.go86
-rw-r--r--services/repository/lfs.go105
5 files changed, 245 insertions, 39 deletions
diff --git a/models/git/lfs.go b/models/git/lfs.go
index a86e84c050..8d418b928d 100644
--- a/models/git/lfs.go
+++ b/models/git/lfs.go
@@ -6,6 +6,7 @@ package git
import (
"context"
"fmt"
+ "time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/perm"
@@ -14,6 +15,7 @@ import (
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
@@ -180,6 +182,12 @@ func GetLFSMetaObjectByOid(repoID int64, oid string) (*LFSMetaObject, error) {
// RemoveLFSMetaObjectByOid removes a LFSMetaObject entry from database by its OID.
// It may return ErrLFSObjectNotExist or a database error.
func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) {
+ return RemoveLFSMetaObjectByOidFn(repoID, oid, nil)
+}
+
+// RemoveLFSMetaObjectByOidFn removes a LFSMetaObject entry from database by its OID.
+// It may return ErrLFSObjectNotExist or a database error. It will run Fn with the current count within the transaction
+func RemoveLFSMetaObjectByOidFn(repoID int64, oid string, fn func(count int64) error) (int64, error) {
if len(oid) == 0 {
return 0, ErrLFSObjectNotExist
}
@@ -200,6 +208,12 @@ func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) {
return count, err
}
+ if fn != nil {
+ if err := fn(count); err != nil {
+ return count, err
+ }
+ }
+
return count, committer.Commit()
}
@@ -319,3 +333,43 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
}
return lfsSize, nil
}
+
+type IterateLFSMetaObjectsForRepoOptions struct {
+ OlderThan time.Time
+}
+
+// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
+func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
+ var start int
+ batchSize := setting.Database.IterateBufferSize
+ engine := db.GetEngine(ctx)
+ type CountLFSMetaObject struct {
+ Count int64
+ LFSMetaObject
+ }
+
+ for {
+ beans := make([]*CountLFSMetaObject, 0, batchSize)
+ // SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id
+ sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
+ Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
+ Where("`lfs_meta_object`.repository_id = ?", repoID)
+ if !opts.OlderThan.IsZero() {
+ sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan)
+ }
+ sess.GroupBy("`lfs_meta_object`.id")
+ if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
+ return err
+ }
+ if len(beans) == 0 {
+ return nil
+ }
+ start += len(beans)
+
+ for _, bean := range beans {
+ if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
+ return err
+ }
+ }
+ }
+}
diff --git a/modules/doctor/lfs.go b/modules/doctor/lfs.go
new file mode 100644
index 0000000000..410ed5a9a5
--- /dev/null
+++ b/modules/doctor/lfs.go
@@ -0,0 +1,37 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package doctor
+
+import (
+ "context"
+ "fmt"
+
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/services/repository"
+)
+
+func init() {
+ Register(&Check{
+ Title: "Garbage collect LFS",
+ Name: "gc-lfs",
+ IsDefault: false,
+ Run: garbageCollectLFSCheck,
+ AbortIfFailed: false,
+ SkipDatabaseInitialization: false,
+ Priority: 1,
+ })
+}
+
+func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool) error {
+ if !setting.LFS.StartServer {
+ return fmt.Errorf("LFS support is disabled")
+ }
+
+ if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil {
+ return err
+ }
+
+ return checkStorage(&checkStorageOptions{LFS: true})(ctx, logger, autofix)
+}
diff --git a/services/cron/tasks_basic.go b/services/cron/tasks_basic.go
index acf3896b71..05aef6623d 100644
--- a/services/cron/tasks_basic.go
+++ b/services/cron/tasks_basic.go
@@ -63,7 +63,7 @@ func registerRepoHealthCheck() {
for _, arg := range rhcConfig.Args {
args = append(args, git.CmdArg(arg))
}
- return repo_service.GitFsck(ctx, rhcConfig.Timeout, args)
+ return repo_service.GitFsckRepos(ctx, rhcConfig.Timeout, args)
})
}
diff --git a/services/repository/check.go b/services/repository/check.go
index 6e29dc93d1..293cb04d38 100644
--- a/services/repository/check.go
+++ b/services/repository/check.go
@@ -22,8 +22,8 @@ import (
"xorm.io/builder"
)
-// GitFsck calls 'git fsck' to check repository health.
-func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
+// GitFsckRepos calls 'git fsck' to check repository health.
+func GitFsckRepos(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Doing: GitFsck")
if err := db.Iterate(
@@ -35,15 +35,7 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return db.ErrCancelledf("before fsck of %s", repo.FullName())
default:
}
- log.Trace("Running health check on repository %v", repo)
- repoPath := repo.RepoPath()
- if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
- log.Warn("Failed to health check repository (%v): %v", repo, err)
- if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
- log.Error("CreateRepositoryNotice: %v", err)
- }
- }
- return nil
+ return GitFsckRepo(ctx, repo, timeout, args)
},
); err != nil {
log.Trace("Error: GitFsck: %v", err)
@@ -54,6 +46,19 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return nil
}
+// GitFsckRepo calls 'git fsck' to check an individual repository's health.
+func GitFsckRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
+ log.Trace("Running health check on repository %-v", repo)
+ repoPath := repo.RepoPath()
+ if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
+ log.Warn("Failed to health check repository (%-v): %v", repo, err)
+ if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
+ log.Error("CreateRepositoryNotice: %v", err)
+ }
+ }
+ return nil
+}
+
// GitGcRepos calls 'git gc' to remove unnecessary files and optimize the local repository
func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) error {
log.Trace("Doing: GitGcRepos")
@@ -68,33 +73,7 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return db.ErrCancelledf("before GC of %s", repo.FullName())
default:
}
- log.Trace("Running git gc on %v", repo)
- command := git.NewCommand(ctx, args...).
- SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
- var stdout string
- var err error
- stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})
-
- if err != nil {
- log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
- desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
- if err = system_model.CreateRepositoryNotice(desc); err != nil {
- log.Error("CreateRepositoryNotice: %v", err)
- }
- return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
- }
-
- // Now update the size of the repository
- if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
- log.Error("Updating size as part of garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
- desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
- if err = system_model.CreateRepositoryNotice(desc); err != nil {
- log.Error("CreateRepositoryNotice: %v", err)
- }
- return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
- }
-
- return nil
+ return GitGcRepo(ctx, repo, timeout, args)
},
); err != nil {
return err
@@ -104,6 +83,37 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return nil
}
+// GitGcRepo calls 'git gc' to remove unnecessary files and optimize the local repository
+func GitGcRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
+ log.Trace("Running git gc on %-v", repo)
+ command := git.NewCommand(ctx, args...).
+ SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
+ var stdout string
+ var err error
+ stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})
+
+ if err != nil {
+ log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
+ desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
+ if err = system_model.CreateRepositoryNotice(desc); err != nil {
+ log.Error("CreateRepositoryNotice: %v", err)
+ }
+ return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
+ }
+
+ // Now update the size of the repository
+ if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
+ log.Error("Updating size as part of garbage collection failed for %-v. Stdout: %s\nError: %v", repo, stdout, err)
+ desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
+ if err = system_model.CreateRepositoryNotice(desc); err != nil {
+ log.Error("CreateRepositoryNotice: %v", err)
+ }
+ return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
+ }
+
+ return nil
+}
+
func gatherMissingRepoRecords(ctx context.Context) ([]*repo_model.Repository, error) {
repos := make([]*repo_model.Repository, 0, 10)
if err := db.Iterate(
diff --git a/services/repository/lfs.go b/services/repository/lfs.go
new file mode 100644
index 0000000000..0e88d359a8
--- /dev/null
+++ b/services/repository/lfs.go
@@ -0,0 +1,105 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package repository
+
+import (
+ "context"
+ "fmt"
+ "time"
+
+ "code.gitea.io/gitea/models/db"
+ git_model "code.gitea.io/gitea/models/git"
+ repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/lfs"
+ "code.gitea.io/gitea/modules/log"
+
+ "xorm.io/builder"
+)
+
+func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error {
+ log.Trace("Doing: GarbageCollectLFSMetaObjects")
+
+ if err := db.Iterate(
+ ctx,
+ builder.And(builder.Gt{"id": 0}),
+ func(ctx context.Context, repo *repo_model.Repository) error {
+ return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix)
+ },
+ ); err != nil {
+ return err
+ }
+
+ log.Trace("Finished: GarbageCollectLFSMetaObjects")
+ return nil
+}
+
+func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error {
+ if logger != nil {
+ logger.Info("Checking %-v", repo)
+ }
+ total, orphaned, collected, deleted := 0, 0, 0, 0
+ if logger != nil {
+ defer func() {
+ if orphaned == 0 {
+ logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
+ } else if !autofix {
+ logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
+ } else {
+ logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
+ }
+ }()
+ }
+
+ gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
+ if err != nil {
+ log.Error("Unable to open git repository %-v: %v", repo, err)
+ return err
+ }
+ defer gitRepo.Close()
+
+ store := lfs.NewContentStore()
+
+ return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
+ total++
+ pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
+
+ if gitRepo.IsObjectExist(pointerSha.String()) {
+ return nil
+ }
+ orphaned++
+
+ if !autofix {
+ return nil
+ }
+ // Non-existent pointer file
+ _, err = git_model.RemoveLFSMetaObjectByOidFn(repo.ID, metaObject.Oid, func(count int64) error {
+ if count > 0 {
+ return nil
+ }
+
+ if err := store.Delete(metaObject.RelativePath()); err != nil {
+ log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err)
+ }
+ deleted++
+ return nil
+ })
+ if err != nil {
+ return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err)
+ }
+ collected++
+
+ return nil
+ }, &git_model.IterateLFSMetaObjectsForRepoOptions{
+ // Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
+ // and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
+ // an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
+ // changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
+ // objects.
+ //
+ // It is likely that a week is potentially excessive but it should definitely be enough that any
+ // unassociated LFS object is genuinely unassociated.
+ OlderThan: time.Now().Add(-24 * 7 * time.Hour),
+ })
+}