summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2022-11-15 08:08:59 +0000
committerGitHub <noreply@github.com>2022-11-15 16:08:59 +0800
commitc772934ff623b3a76efbe306f597695330a71287 (patch)
tree4639644b2bcc0e8ee3573283c8c7190a80b24a59
parentde6dfb714153c17a06406c866805a17a476c63bd (diff)
downloadgitea-c772934ff623b3a76efbe306f597695330a71287.tar.gz
gitea-c772934ff623b3a76efbe306f597695330a71287.zip
Adjust gitea doctor --run storages to check all storage types (#21785)
The doctor check `storages` currently only checks the attachment storage. This PR adds some basic garbage collection functionality for the other types of storage. Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
-rw-r--r--models/git/lfs.go6
-rw-r--r--models/packages/package_blob.go7
-rw-r--r--models/repo/archiver.go36
-rw-r--r--models/repo/attachment.go6
-rw-r--r--models/repo/avatar.go7
-rw-r--r--models/user/avatar.go7
-rw-r--r--modules/doctor/storage.go232
-rw-r--r--modules/git/repo_archive.go12
-rw-r--r--modules/packages/content_store.go12
-rw-r--r--routers/web/repo/lfs.go2
10 files changed, 296 insertions, 31 deletions
diff --git a/models/git/lfs.go b/models/git/lfs.go
index 74721dabb1..87e07d7a5e 100644
--- a/models/git/lfs.go
+++ b/models/git/lfs.go
@@ -235,9 +235,9 @@ func LFSObjectAccessible(user *user_model.User, oid string) (bool, error) {
return count > 0, err
}
-// LFSObjectIsAssociated checks if a provided Oid is associated
-func LFSObjectIsAssociated(oid string) (bool, error) {
- return db.GetEngine(db.DefaultContext).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}})
+// ExistsLFSObject checks if a provided Oid exists within the DB
+func ExistsLFSObject(ctx context.Context, oid string) (bool, error) {
+ return db.GetEngine(ctx).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}})
}
// LFSAutoAssociate auto associates accessible LFSMetaObjects
diff --git a/models/packages/package_blob.go b/models/packages/package_blob.go
index 8c701d4285..fcb71a96ec 100644
--- a/models/packages/package_blob.go
+++ b/models/packages/package_blob.go
@@ -62,6 +62,13 @@ func GetBlobByID(ctx context.Context, blobID int64) (*PackageBlob, error) {
return pb, nil
}
+// ExistPackageBlobWithSHA returns if a package blob exists with the provided sha
+func ExistPackageBlobWithSHA(ctx context.Context, blobSha256 string) (bool, error) {
+ return db.GetEngine(ctx).Exist(&PackageBlob{
+ HashSHA256: blobSha256,
+ })
+}
+
// FindExpiredUnreferencedBlobs gets all blobs without associated files older than the specific duration
func FindExpiredUnreferencedBlobs(ctx context.Context, olderThan time.Duration) ([]*PackageBlob, error) {
pbs := make([]*PackageBlob, 0, 10)
diff --git a/models/repo/archiver.go b/models/repo/archiver.go
index 003911943f..84358ce0dc 100644
--- a/models/repo/archiver.go
+++ b/models/repo/archiver.go
@@ -7,11 +7,14 @@ package repo
import (
"context"
"fmt"
+ "strconv"
+ "strings"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/timeutil"
+ "code.gitea.io/gitea/modules/util"
"xorm.io/builder"
)
@@ -44,6 +47,28 @@ func (archiver *RepoArchiver) RelativePath() string {
return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String())
}
+// repoArchiverForRelativePath takes a relativePath created from (archiver *RepoArchiver) RelativePath() and creates a shell repoArchiver struct representing it
+func repoArchiverForRelativePath(relativePath string) (*RepoArchiver, error) {
+ parts := strings.SplitN(relativePath, "/", 3)
+ if len(parts) != 3 {
+ return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument}
+ }
+ repoID, err := strconv.ParseInt(parts[0], 10, 64)
+ if err != nil {
+ return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument}
+ }
+ nameExts := strings.SplitN(parts[2], ".", 2)
+ if len(nameExts) != 2 {
+ return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument}
+ }
+
+ return &RepoArchiver{
+ RepoID: repoID,
+ CommitID: parts[1] + nameExts[0],
+ Type: git.ToArchiveType(nameExts[1]),
+ }, nil
+}
+
var delRepoArchiver = new(RepoArchiver)
// DeleteRepoArchiver delete archiver
@@ -65,6 +90,17 @@ func GetRepoArchiver(ctx context.Context, repoID int64, tp git.ArchiveType, comm
return nil, nil
}
+// ExistsRepoArchiverWithStoragePath checks if there is a RepoArchiver for a given storage path
+func ExistsRepoArchiverWithStoragePath(ctx context.Context, storagePath string) (bool, error) {
+ // We need to invert the path provided func (archiver *RepoArchiver) RelativePath() above
+ archiver, err := repoArchiverForRelativePath(storagePath)
+ if err != nil {
+ return false, err
+ }
+
+ return db.GetEngine(ctx).Exist(archiver)
+}
+
// AddRepoArchiver adds an archiver
func AddRepoArchiver(ctx context.Context, archiver *RepoArchiver) error {
_, err := db.GetEngine(ctx).Insert(archiver)
diff --git a/models/repo/attachment.go b/models/repo/attachment.go
index 180d7730ba..df7528df09 100644
--- a/models/repo/attachment.go
+++ b/models/repo/attachment.go
@@ -122,9 +122,9 @@ func GetAttachmentsByUUIDs(ctx context.Context, uuids []string) ([]*Attachment,
return attachments, db.GetEngine(ctx).In("uuid", uuids).Find(&attachments)
}
-// ExistAttachmentsByUUID returns true if attachment is exist by given UUID
-func ExistAttachmentsByUUID(uuid string) (bool, error) {
- return db.GetEngine(db.DefaultContext).Where("`uuid`=?", uuid).Exist(new(Attachment))
+// ExistAttachmentsByUUID returns true if attachment exists with the given UUID
+func ExistAttachmentsByUUID(ctx context.Context, uuid string) (bool, error) {
+ return db.GetEngine(ctx).Where("`uuid`=?", uuid).Exist(new(Attachment))
}
// GetAttachmentsByIssueID returns all attachments of an issue.
diff --git a/models/repo/avatar.go b/models/repo/avatar.go
index 1bc37598fe..84b9f5ac21 100644
--- a/models/repo/avatar.go
+++ b/models/repo/avatar.go
@@ -24,6 +24,13 @@ func (repo *Repository) CustomAvatarRelativePath() string {
return repo.Avatar
}
+// ExistsWithAvatarAtStoragePath returns true if there is a user with this Avatar
+func ExistsWithAvatarAtStoragePath(ctx context.Context, storagePath string) (bool, error) {
+ // See func (repo *Repository) CustomAvatarRelativePath()
+ // repo.Avatar is used directly as the storage path - therefore we can check for existence directly using the path
+ return db.GetEngine(ctx).Where("`avatar`=?", storagePath).Exist(new(Repository))
+}
+
// RelAvatarLink returns a relative link to the repository's avatar.
func (repo *Repository) RelAvatarLink() string {
return repo.relAvatarLink(db.DefaultContext)
diff --git a/models/user/avatar.go b/models/user/avatar.go
index 102206f3a2..f523766746 100644
--- a/models/user/avatar.go
+++ b/models/user/avatar.go
@@ -111,3 +111,10 @@ func (u *User) IsUploadAvatarChanged(data []byte) bool {
avatarID := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%d-%x", u.ID, md5.Sum(data)))))
return u.Avatar != avatarID
}
+
+// ExistsWithAvatarAtStoragePath returns true if there is a user with this Avatar
+func ExistsWithAvatarAtStoragePath(ctx context.Context, storagePath string) (bool, error) {
+ // See func (u *User) CustomAvatarRelativePath()
+ // u.Avatar is used directly as the storage path - therefore we can check for existence directly using the path
+ return db.GetEngine(ctx).Where("`avatar`=?", storagePath).Exist(new(User))
+}
diff --git a/modules/doctor/storage.go b/modules/doctor/storage.go
index dafd989cf0..8ae9168ea6 100644
--- a/modules/doctor/storage.go
+++ b/modules/doctor/storage.go
@@ -6,71 +6,255 @@ package doctor
import (
"context"
+ "errors"
+ "io/fs"
+ "strings"
- repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/models/git"
+ "code.gitea.io/gitea/models/packages"
+ "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/models/user"
+ "code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/log"
+ packages_module "code.gitea.io/gitea/modules/packages"
"code.gitea.io/gitea/modules/storage"
+ "code.gitea.io/gitea/modules/util"
)
-func checkAttachmentStorageFiles(logger log.Logger, autofix bool) error {
- var total, garbageNum int
- var deletePaths []string
- if err := storage.Attachments.IterateObjects(func(p string, obj storage.Object) error {
+type commonStorageCheckOptions struct {
+ storer storage.ObjectStorage
+ isOrphaned func(path string, obj storage.Object, stat fs.FileInfo) (bool, error)
+ name string
+}
+
+func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, opts *commonStorageCheckOptions) error {
+ totalCount, orphanedCount := 0, 0
+ totalSize, orphanedSize := int64(0), int64(0)
+
+ var pathsToDelete []string
+ if err := opts.storer.IterateObjects(func(p string, obj storage.Object) error {
defer obj.Close()
- total++
+ totalCount++
stat, err := obj.Stat()
if err != nil {
return err
}
- exist, err := repo_model.ExistAttachmentsByUUID(stat.Name())
+ totalSize += stat.Size()
+
+ orphaned, err := opts.isOrphaned(p, obj, stat)
if err != nil {
return err
}
- if !exist {
- garbageNum++
+ if orphaned {
+ orphanedCount++
+ orphanedSize += stat.Size()
if autofix {
- deletePaths = append(deletePaths, p)
+ pathsToDelete = append(pathsToDelete, p)
}
}
return nil
}); err != nil {
- logger.Error("storage.Attachments.IterateObjects failed: %v", err)
+ logger.Error("Error whilst iterating %s storage: %v", opts.name, err)
return err
}
- if garbageNum > 0 {
+ if orphanedCount > 0 {
if autofix {
var deletedNum int
- for _, p := range deletePaths {
- if err := storage.Attachments.Delete(p); err != nil {
- log.Error("Delete attachment %s failed: %v", p, err)
+ for _, p := range pathsToDelete {
+ if err := opts.storer.Delete(p); err != nil {
+ log.Error("Error whilst deleting %s from %s storage: %v", p, opts.name, err)
} else {
deletedNum++
}
}
- logger.Info("%d missed information attachment detected, %d deleted.", garbageNum, deletedNum)
+ logger.Info("Deleted %d/%d orphaned %s(s)", deletedNum, orphanedCount, opts.name)
} else {
- logger.Warn("Checked %d attachment, %d missed information.", total, garbageNum)
+ logger.Warn("Found %d/%d (%s/%s) orphaned %s(s)", orphanedCount, totalCount, base.FileSize(orphanedSize), base.FileSize(totalSize), opts.name)
}
+ } else {
+ logger.Info("Found %d (%s) %s(s)", totalCount, base.FileSize(totalSize), opts.name)
}
return nil
}
-func checkStorageFiles(ctx context.Context, logger log.Logger, autofix bool) error {
- if err := storage.Init(); err != nil {
- logger.Error("storage.Init failed: %v", err)
- return err
+type checkStorageOptions struct {
+ All bool
+ Attachments bool
+ LFS bool
+ Avatars bool
+ RepoAvatars bool
+ RepoArchives bool
+ Packages bool
+}
+
+// checkStorage will return a doctor check function to check the requested storage types for "orphaned" stored object/files and optionally delete them
+func checkStorage(opts *checkStorageOptions) func(ctx context.Context, logger log.Logger, autofix bool) error {
+ return func(ctx context.Context, logger log.Logger, autofix bool) error {
+ if err := storage.Init(); err != nil {
+ logger.Error("storage.Init failed: %v", err)
+ return err
+ }
+
+ if opts.Attachments || opts.All {
+ if err := commonCheckStorage(ctx, logger, autofix,
+ &commonStorageCheckOptions{
+ storer: storage.Attachments,
+ isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
+ exists, err := repo.ExistAttachmentsByUUID(ctx, stat.Name())
+ return !exists, err
+ },
+ name: "attachment",
+ }); err != nil {
+ return err
+ }
+ }
+
+ if opts.LFS || opts.All {
+ if err := commonCheckStorage(ctx, logger, autofix,
+ &commonStorageCheckOptions{
+ storer: storage.LFS,
+ isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
+ // The oid of an LFS stored object is the name but with all the path.Separators removed
+ oid := strings.ReplaceAll(path, "/", "")
+ exists, err := git.ExistsLFSObject(ctx, oid)
+ return !exists, err
+ },
+ name: "LFS file",
+ }); err != nil {
+ return err
+ }
+ }
+
+ if opts.Avatars || opts.All {
+ if err := commonCheckStorage(ctx, logger, autofix,
+ &commonStorageCheckOptions{
+ storer: storage.Avatars,
+ isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
+ exists, err := user.ExistsWithAvatarAtStoragePath(ctx, path)
+ return !exists, err
+ },
+ name: "avatar",
+ }); err != nil {
+ return err
+ }
+ }
+
+ if opts.RepoAvatars || opts.All {
+ if err := commonCheckStorage(ctx, logger, autofix,
+ &commonStorageCheckOptions{
+ storer: storage.RepoAvatars,
+ isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
+ exists, err := repo.ExistsWithAvatarAtStoragePath(ctx, path)
+ return !exists, err
+ },
+ name: "repo avatar",
+ }); err != nil {
+ return err
+ }
+ }
+
+ if opts.RepoArchives || opts.All {
+ if err := commonCheckStorage(ctx, logger, autofix,
+ &commonStorageCheckOptions{
+ storer: storage.RepoAvatars,
+ isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
+ exists, err := repo.ExistsRepoArchiverWithStoragePath(ctx, path)
+ if err == nil || errors.Is(err, util.ErrInvalidArgument) {
+ // invalid arguments mean that the object is not a valid repo archiver and it should be removed
+ return !exists, nil
+ }
+ return !exists, err
+ },
+ name: "repo archive",
+ }); err != nil {
+ return err
+ }
+ }
+
+ if opts.Packages || opts.All {
+ if err := commonCheckStorage(ctx, logger, autofix,
+ &commonStorageCheckOptions{
+ storer: storage.Packages,
+ isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
+ key, err := packages_module.RelativePathToKey(path)
+ if err != nil {
+ // If there is an error here then the relative path does not match a valid package
+ // Therefore it is orphaned by default
+ return true, nil
+ }
+
+ exists, err := packages.ExistPackageBlobWithSHA(ctx, string(key))
+
+ return !exists, err
+ },
+ name: "package blob",
+ }); err != nil {
+ return err
+ }
+ }
+
+ return nil
}
- return checkAttachmentStorageFiles(logger, autofix)
}
func init() {
Register(&Check{
- Title: "Check if there is garbage storage files",
+ Title: "Check if there are orphaned storage files",
Name: "storages",
IsDefault: false,
- Run: checkStorageFiles,
+ Run: checkStorage(&checkStorageOptions{All: true}),
+ AbortIfFailed: false,
+ SkipDatabaseInitialization: false,
+ Priority: 1,
+ })
+
+ Register(&Check{
+ Title: "Check if there are orphaned attachments in storage",
+ Name: "storage-attachments",
+ IsDefault: false,
+ Run: checkStorage(&checkStorageOptions{Attachments: true}),
+ AbortIfFailed: false,
+ SkipDatabaseInitialization: false,
+ Priority: 1,
+ })
+
+ Register(&Check{
+ Title: "Check if there are orphaned lfs files in storage",
+ Name: "storage-lfs",
+ IsDefault: false,
+ Run: checkStorage(&checkStorageOptions{LFS: true}),
+ AbortIfFailed: false,
+ SkipDatabaseInitialization: false,
+ Priority: 1,
+ })
+
+ Register(&Check{
+ Title: "Check if there are orphaned avatars in storage",
+ Name: "storage-avatars",
+ IsDefault: false,
+ Run: checkStorage(&checkStorageOptions{Avatars: true, RepoAvatars: true}),
+ AbortIfFailed: false,
+ SkipDatabaseInitialization: false,
+ Priority: 1,
+ })
+
+ Register(&Check{
+ Title: "Check if there are orphaned archives in storage",
+ Name: "storage-archives",
+ IsDefault: false,
+ Run: checkStorage(&checkStorageOptions{RepoArchives: true}),
+ AbortIfFailed: false,
+ SkipDatabaseInitialization: false,
+ Priority: 1,
+ })
+
+ Register(&Check{
+ Title: "Check if there are orphaned package blobs in storage",
+ Name: "storage-packages",
+ IsDefault: false,
+ Run: checkStorage(&checkStorageOptions{Packages: true}),
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,
diff --git a/modules/git/repo_archive.go b/modules/git/repo_archive.go
index a0cbfba5d9..13be2004ca 100644
--- a/modules/git/repo_archive.go
+++ b/modules/git/repo_archive.go
@@ -38,6 +38,18 @@ func (a ArchiveType) String() string {
return "unknown"
}
+func ToArchiveType(s string) ArchiveType {
+ switch s {
+ case "zip":
+ return ZIP
+ case "tar.gz":
+ return TARGZ
+ case "bundle":
+ return BUNDLE
+ }
+ return 0
+}
+
// CreateArchive create archive content to the target path
func (repo *Repository) CreateArchive(ctx context.Context, format ArchiveType, target io.Writer, usePrefix bool, commitID string) error {
if format.String() == "unknown" {
diff --git a/modules/packages/content_store.go b/modules/packages/content_store.go
index a3a5d1a666..be416ac269 100644
--- a/modules/packages/content_store.go
+++ b/modules/packages/content_store.go
@@ -7,8 +7,10 @@ package packages
import (
"io"
"path"
+ "strings"
"code.gitea.io/gitea/modules/storage"
+ "code.gitea.io/gitea/modules/util"
)
// BlobHash256Key is the key to address a blob content
@@ -45,3 +47,13 @@ func (s *ContentStore) Delete(key BlobHash256Key) error {
func KeyToRelativePath(key BlobHash256Key) string {
return path.Join(string(key)[0:2], string(key)[2:4], string(key))
}
+
+// RelativePathToKey converts a relative path aa/bb/aabb000000... to the sha256 key aabb000000...
+func RelativePathToKey(relativePath string) (BlobHash256Key, error) {
+ parts := strings.SplitN(relativePath, "/", 3)
+ if len(parts) != 3 || len(parts[0]) != 2 || len(parts[1]) != 2 || len(parts[2]) < 4 || parts[0]+parts[1] != parts[2][0:4] {
+ return "", util.ErrInvalidArgument
+ }
+
+ return BlobHash256Key(parts[2]), nil
+}
diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go
index 67cb6837a5..9bf4307bfe 100644
--- a/routers/web/repo/lfs.go
+++ b/routers/web/repo/lfs.go
@@ -478,7 +478,7 @@ func LFSPointerFiles(ctx *context.Context) {
return err
}
if !result.Associatable {
- associated, err := git_model.LFSObjectIsAssociated(pointerBlob.Oid)
+ associated, err := git_model.ExistsLFSObject(ctx, pointerBlob.Oid)
if err != nil {
return err
}