From c772934ff623b3a76efbe306f597695330a71287 Mon Sep 17 00:00:00 2001 From: zeripath Date: Tue, 15 Nov 2022 08:08:59 +0000 Subject: Adjust gitea doctor --run storages to check all storage types (#21785) The doctor check `storages` currently only checks the attachment storage. This PR adds some basic garbage collection functionality for the other types of storage. Signed-off-by: Andrew Thornton Co-authored-by: Lunny Xiao --- modules/doctor/storage.go | 232 ++++++++++++++++++++++++++++++++++---- modules/git/repo_archive.go | 12 ++ modules/packages/content_store.go | 12 ++ 3 files changed, 232 insertions(+), 24 deletions(-) (limited to 'modules') diff --git a/modules/doctor/storage.go b/modules/doctor/storage.go index dafd989cf0..8ae9168ea6 100644 --- a/modules/doctor/storage.go +++ b/modules/doctor/storage.go @@ -6,71 +6,255 @@ package doctor import ( "context" + "errors" + "io/fs" + "strings" - repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/models/git" + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/log" + packages_module "code.gitea.io/gitea/modules/packages" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/util" ) -func checkAttachmentStorageFiles(logger log.Logger, autofix bool) error { - var total, garbageNum int - var deletePaths []string - if err := storage.Attachments.IterateObjects(func(p string, obj storage.Object) error { +type commonStorageCheckOptions struct { + storer storage.ObjectStorage + isOrphaned func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) + name string +} + +func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, opts *commonStorageCheckOptions) error { + totalCount, orphanedCount := 0, 0 + totalSize, orphanedSize := int64(0), int64(0) + + var pathsToDelete []string + if err := opts.storer.IterateObjects(func(p string, obj storage.Object) error { defer obj.Close() - total++ + totalCount++ stat, err := obj.Stat() if err != nil { return err } - exist, err := repo_model.ExistAttachmentsByUUID(stat.Name()) + totalSize += stat.Size() + + orphaned, err := opts.isOrphaned(p, obj, stat) if err != nil { return err } - if !exist { - garbageNum++ + if orphaned { + orphanedCount++ + orphanedSize += stat.Size() if autofix { - deletePaths = append(deletePaths, p) + pathsToDelete = append(pathsToDelete, p) } } return nil }); err != nil { - logger.Error("storage.Attachments.IterateObjects failed: %v", err) + logger.Error("Error whilst iterating %s storage: %v", opts.name, err) return err } - if garbageNum > 0 { + if orphanedCount > 0 { if autofix { var deletedNum int - for _, p := range deletePaths { - if err := storage.Attachments.Delete(p); err != nil { - log.Error("Delete attachment %s failed: %v", p, err) + for _, p := range pathsToDelete { + if err := opts.storer.Delete(p); err != nil { + log.Error("Error whilst deleting %s from %s storage: %v", p, opts.name, err) } else { deletedNum++ } } - logger.Info("%d missed information attachment detected, %d deleted.", garbageNum, deletedNum) + logger.Info("Deleted %d/%d orphaned %s(s)", deletedNum, orphanedCount, opts.name) } else { - logger.Warn("Checked %d attachment, %d missed information.", total, garbageNum) + logger.Warn("Found %d/%d (%s/%s) orphaned %s(s)", orphanedCount, totalCount, base.FileSize(orphanedSize), base.FileSize(totalSize), opts.name) } + } else { + logger.Info("Found %d (%s) %s(s)", totalCount, base.FileSize(totalSize), opts.name) } return nil } -func checkStorageFiles(ctx context.Context, logger log.Logger, autofix bool) error { - if err := storage.Init(); err != nil { - logger.Error("storage.Init failed: %v", err) - return err +type checkStorageOptions struct { + All bool + Attachments bool + LFS bool + Avatars bool + RepoAvatars bool + RepoArchives bool + Packages bool +} + +// checkStorage will return a doctor check function to check the requested storage types for "orphaned" stored object/files and optionally delete them +func checkStorage(opts *checkStorageOptions) func(ctx context.Context, logger log.Logger, autofix bool) error { + return func(ctx context.Context, logger log.Logger, autofix bool) error { + if err := storage.Init(); err != nil { + logger.Error("storage.Init failed: %v", err) + return err + } + + if opts.Attachments || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Attachments, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistAttachmentsByUUID(ctx, stat.Name()) + return !exists, err + }, + name: "attachment", + }); err != nil { + return err + } + } + + if opts.LFS || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.LFS, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + // The oid of an LFS stored object is the name but with all the path.Separators removed + oid := strings.ReplaceAll(path, "/", "") + exists, err := git.ExistsLFSObject(ctx, oid) + return !exists, err + }, + name: "LFS file", + }); err != nil { + return err + } + } + + if opts.Avatars || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Avatars, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := user.ExistsWithAvatarAtStoragePath(ctx, path) + return !exists, err + }, + name: "avatar", + }); err != nil { + return err + } + } + + if opts.RepoAvatars || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.RepoAvatars, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistsWithAvatarAtStoragePath(ctx, path) + return !exists, err + }, + name: "repo avatar", + }); err != nil { + return err + } + } + + if opts.RepoArchives || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.RepoAvatars, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistsRepoArchiverWithStoragePath(ctx, path) + if err == nil || errors.Is(err, util.ErrInvalidArgument) { + // invalid arguments mean that the object is not a valid repo archiver and it should be removed + return !exists, nil + } + return !exists, err + }, + name: "repo archive", + }); err != nil { + return err + } + } + + if opts.Packages || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Packages, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + key, err := packages_module.RelativePathToKey(path) + if err != nil { + // If there is an error here then the relative path does not match a valid package + // Therefore it is orphaned by default + return true, nil + } + + exists, err := packages.ExistPackageBlobWithSHA(ctx, string(key)) + + return !exists, err + }, + name: "package blob", + }); err != nil { + return err + } + } + + return nil } - return checkAttachmentStorageFiles(logger, autofix) } func init() { Register(&Check{ - Title: "Check if there is garbage storage files", + Title: "Check if there are orphaned storage files", Name: "storages", IsDefault: false, - Run: checkStorageFiles, + Run: checkStorage(&checkStorageOptions{All: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned attachments in storage", + Name: "storage-attachments", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{Attachments: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned lfs files in storage", + Name: "storage-lfs", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{LFS: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned avatars in storage", + Name: "storage-avatars", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{Avatars: true, RepoAvatars: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned archives in storage", + Name: "storage-archives", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{RepoArchives: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned package blobs in storage", + Name: "storage-packages", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{Packages: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, diff --git a/modules/git/repo_archive.go b/modules/git/repo_archive.go index a0cbfba5d9..13be2004ca 100644 --- a/modules/git/repo_archive.go +++ b/modules/git/repo_archive.go @@ -38,6 +38,18 @@ func (a ArchiveType) String() string { return "unknown" } +func ToArchiveType(s string) ArchiveType { + switch s { + case "zip": + return ZIP + case "tar.gz": + return TARGZ + case "bundle": + return BUNDLE + } + return 0 +} + // CreateArchive create archive content to the target path func (repo *Repository) CreateArchive(ctx context.Context, format ArchiveType, target io.Writer, usePrefix bool, commitID string) error { if format.String() == "unknown" { diff --git a/modules/packages/content_store.go b/modules/packages/content_store.go index a3a5d1a666..be416ac269 100644 --- a/modules/packages/content_store.go +++ b/modules/packages/content_store.go @@ -7,8 +7,10 @@ package packages import ( "io" "path" + "strings" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/util" ) // BlobHash256Key is the key to address a blob content @@ -45,3 +47,13 @@ func (s *ContentStore) Delete(key BlobHash256Key) error { func KeyToRelativePath(key BlobHash256Key) string { return path.Join(string(key)[0:2], string(key)[2:4], string(key)) } + +// RelativePathToKey converts a relative path aa/bb/aabb000000... to the sha256 key aabb000000... +func RelativePathToKey(relativePath string) (BlobHash256Key, error) { + parts := strings.SplitN(relativePath, "/", 3) + if len(parts) != 3 || len(parts[0]) != 2 || len(parts[1]) != 2 || len(parts[2]) < 4 || parts[0]+parts[1] != parts[2][0:4] { + return "", util.ErrInvalidArgument + } + + return BlobHash256Key(parts[2]), nil +} -- cgit v1.2.3