aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLunny Xiao <xiaolunwen@gmail.com>2021-06-24 05:12:38 +0800
committerGitHub <noreply@github.com>2021-06-23 17:12:38 -0400
commitb223d361955f8b722f7dd0b358b2e57e6f359edf (patch)
treecaa934320b264b969df679508eb19458e0cc6029
parentc9c7afda1a80bda7b61ded222163db796132b78f (diff)
downloadgitea-b223d361955f8b722f7dd0b358b2e57e6f359edf.tar.gz
gitea-b223d361955f8b722f7dd0b358b2e57e6f359edf.zip
Rework repository archive (#14723)
* Use storage to store archive files * Fix backend lint * Add archiver table on database * Finish archive download * Fix test * Add database migrations * Add status for archiver * Fix lint * Add queue * Add doctor to check and delete old archives * Improve archive queue * Fix tests * improve archive storage * Delete repo archives * Add missing fixture * fix fixture * Fix fixture * Fix test * Fix archiver cleaning * Fix bug * Add docs for repository archive storage * remove repo-archive configuration * Fix test * Fix test * Fix lint Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: techknowlogick <techknowlogick@gitea.io>
-rw-r--r--custom/conf/app.example.ini10
-rw-r--r--docs/content/doc/advanced/config-cheat-sheet.en-us.md17
-rw-r--r--docs/content/doc/advanced/config-cheat-sheet.zh-cn.md15
-rw-r--r--integrations/gitea-repositories-meta/user27/repo49.git/refs/heads/test/archive1
-rw-r--r--models/fixtures/repo_archiver.yml1
-rw-r--r--models/migrations/migrations.go2
-rw-r--r--models/migrations/v181.go1
-rw-r--r--models/migrations/v185.go22
-rw-r--r--models/models.go1
-rw-r--r--models/repo.go97
-rw-r--r--models/repo_archiver.go86
-rw-r--r--models/unit_tests.go2
-rw-r--r--modules/context/context.go15
-rw-r--r--modules/doctor/checkOldArchives.go59
-rw-r--r--modules/git/repo_archive.go (renamed from modules/git/commit_archive.go)31
-rw-r--r--modules/setting/repository.go6
-rw-r--r--modules/setting/storage.go4
-rw-r--r--modules/storage/storage.go15
-rw-r--r--routers/api/v1/repo/file.go3
-rw-r--r--routers/common/repo.go26
-rw-r--r--routers/init.go4
-rw-r--r--routers/web/repo/repo.go114
-rw-r--r--routers/web/web.go3
-rw-r--r--services/archiver/archiver.go394
-rw-r--r--services/archiver/archiver_test.go159
25 files changed, 628 insertions, 460 deletions
diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini
index 38a27509f7..5adfb0546f 100644
--- a/custom/conf/app.example.ini
+++ b/custom/conf/app.example.ini
@@ -2050,6 +2050,16 @@ PATH =
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; settings for repository archives, will override storage setting
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;[storage.repo-archive]
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; storage type
+;STORAGE_TYPE = local
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; lfs storage will override storage
;;
;[lfs]
diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
index 8f1f9ce42d..5e976174fb 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
@@ -995,6 +995,23 @@ MINIO_USE_SSL = false
And used by `[attachment]`, `[lfs]` and etc. as `STORAGE_TYPE`.
+## Repository Archive Storage (`storage.repo-archive`)
+
+Configuration for repository archive storage. It will inherit from default `[storage]` or
+`[storage.xxx]` when set `STORAGE_TYPE` to `xxx`. The default of `PATH`
+is `data/repo-archive` and the default of `MINIO_BASE_PATH` is `repo-archive/`.
+
+- `STORAGE_TYPE`: **local**: Storage type for repo archive, `local` for local disk or `minio` for s3 compatible object storage service or other name defined with `[storage.xxx]`
+- `SERVE_DIRECT`: **false**: Allows the storage driver to redirect to authenticated URLs to serve files directly. Currently, only Minio/S3 is supported via signed URLs, local does nothing.
+- `PATH`: **./data/repo-archive**: Where to store archive files, only available when `STORAGE_TYPE` is `local`.
+- `MINIO_ENDPOINT`: **localhost:9000**: Minio endpoint to connect only available when `STORAGE_TYPE` is `minio`
+- `MINIO_ACCESS_KEY_ID`: Minio accessKeyID to connect only available when `STORAGE_TYPE` is `minio`
+- `MINIO_SECRET_ACCESS_KEY`: Minio secretAccessKey to connect only available when `STORAGE_TYPE is` `minio`
+- `MINIO_BUCKET`: **gitea**: Minio bucket to store the lfs only available when `STORAGE_TYPE` is `minio`
+- `MINIO_LOCATION`: **us-east-1**: Minio location to create bucket only available when `STORAGE_TYPE` is `minio`
+- `MINIO_BASE_PATH`: **repo-archive/**: Minio base path on the bucket only available when `STORAGE_TYPE` is `minio`
+- `MINIO_USE_SSL`: **false**: Minio enabled ssl only available when `STORAGE_TYPE` is `minio`
+
## Other (`other`)
- `SHOW_FOOTER_BRANDING`: **false**: Show Gitea branding in the footer.
diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md
index 79cfd94cc7..2303a631d5 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md
@@ -382,6 +382,21 @@ MINIO_USE_SSL = false
然后你在 `[attachment]`, `[lfs]` 等中可以把这个名字用作 `STORAGE_TYPE` 的值。
+## Repository Archive Storage (`storage.repo-archive`)
+
+Repository archive 的存储配置。 如果 `STORAGE_TYPE` 为空,则此配置将从 `[storage]` 继承。如果不为 `local` 或者 `minio` 而为 `xxx`, 则从 `[storage.xxx]` 继承。当继承时, `PATH` 默认为 `data/repo-archive`,`MINIO_BASE_PATH` 默认为 `repo-archive/`。
+
+- `STORAGE_TYPE`: **local**: Repository archive 的存储类型,`local` 将存储到磁盘,`minio` 将存储到 s3 兼容的对象服务。
+- `SERVE_DIRECT`: **false**: 允许直接重定向到存储系统。当前,仅 Minio/S3 是支持的。
+- `PATH`: 存放 Repository archive 上传的文件的地方,默认是 `data/repo-archive`。
+- `MINIO_ENDPOINT`: **localhost:9000**: Minio 地址,仅当 `STORAGE_TYPE` 为 `minio` 时有效。
+- `MINIO_ACCESS_KEY_ID`: Minio accessKeyID,仅当 `STORAGE_TYPE` 为 `minio` 时有效。
+- `MINIO_SECRET_ACCESS_KEY`: Minio secretAccessKey,仅当 `STORAGE_TYPE` 为 `minio` 时有效。
+- `MINIO_BUCKET`: **gitea**: Minio bucket,仅当 `STORAGE_TYPE` 为 `minio` 时有效。
+- `MINIO_LOCATION`: **us-east-1**: Minio location ,仅当 `STORAGE_TYPE` 为 `minio` 时有效。
+- `MINIO_BASE_PATH`: **repo-archive/**: Minio base path ,仅当 `STORAGE_TYPE` 为 `minio` 时有效。
+- `MINIO_USE_SSL`: **false**: Minio 是否启用 ssl ,仅当 `STORAGE_TYPE` 为 `minio` 时有效。
+
## Other (`other`)
- `SHOW_FOOTER_BRANDING`: 为真则在页面底部显示Gitea的字样。
diff --git a/integrations/gitea-repositories-meta/user27/repo49.git/refs/heads/test/archive b/integrations/gitea-repositories-meta/user27/repo49.git/refs/heads/test/archive
new file mode 100644
index 0000000000..0f13243bfd
--- /dev/null
+++ b/integrations/gitea-repositories-meta/user27/repo49.git/refs/heads/test/archive
@@ -0,0 +1 @@
+aacbdfe9e1c4b47f60abe81849045fa4e96f1d75
diff --git a/models/fixtures/repo_archiver.yml b/models/fixtures/repo_archiver.yml
new file mode 100644
index 0000000000..ca780a73aa
--- /dev/null
+++ b/models/fixtures/repo_archiver.yml
@@ -0,0 +1 @@
+[] # empty
diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go
index 880f55092d..4e17a6a2c8 100644
--- a/models/migrations/migrations.go
+++ b/models/migrations/migrations.go
@@ -319,6 +319,8 @@ var migrations = []Migration{
NewMigration("Create PushMirror table", createPushMirrorTable),
// v184 -> v185
NewMigration("Rename Task errors to message", renameTaskErrorsToMessage),
+ // v185 -> v186
+ NewMigration("Add new table repo_archiver", addRepoArchiver),
}
// GetCurrentDBVersion returns the current db version
diff --git a/models/migrations/v181.go b/models/migrations/v181.go
index 6ba4edc155..65045593ad 100644
--- a/models/migrations/v181.go
+++ b/models/migrations/v181.go
@@ -1,3 +1,4 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
diff --git a/models/migrations/v185.go b/models/migrations/v185.go
new file mode 100644
index 0000000000..0969948897
--- /dev/null
+++ b/models/migrations/v185.go
@@ -0,0 +1,22 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package migrations
+
+import (
+ "xorm.io/xorm"
+)
+
+func addRepoArchiver(x *xorm.Engine) error {
+ // RepoArchiver represents all archivers
+ type RepoArchiver struct {
+ ID int64 `xorm:"pk autoincr"`
+ RepoID int64 `xorm:"index unique(s)"`
+ Type int `xorm:"unique(s)"`
+ Status int
+ CommitID string `xorm:"VARCHAR(40) unique(s)"`
+ CreatedUnix int64 `xorm:"INDEX NOT NULL created"`
+ }
+ return x.Sync2(new(RepoArchiver))
+}
diff --git a/models/models.go b/models/models.go
index c325fd3811..3266be0f4a 100644
--- a/models/models.go
+++ b/models/models.go
@@ -136,6 +136,7 @@ func init() {
new(RepoTransfer),
new(IssueIndex),
new(PushMirror),
+ new(RepoArchiver),
)
gonicNames := []string{"SSL", "UID"}
diff --git a/models/repo.go b/models/repo.go
index dc4e03a28a..2baf6e9bdd 100644
--- a/models/repo.go
+++ b/models/repo.go
@@ -1587,6 +1587,22 @@ func DeleteRepository(doer *User, uid, repoID int64) error {
return err
}
+ // Remove archives
+ var archives []*RepoArchiver
+ if err = sess.Where("repo_id=?", repoID).Find(&archives); err != nil {
+ return err
+ }
+
+ for _, v := range archives {
+ v.Repo = repo
+ p, _ := v.RelativePath()
+ removeStorageWithNotice(sess, storage.RepoArchives, "Delete repo archive file", p)
+ }
+
+ if _, err := sess.Delete(&RepoArchiver{RepoID: repoID}); err != nil {
+ return err
+ }
+
if repo.NumForks > 0 {
if _, err = sess.Exec("UPDATE `repository` SET fork_id=0,is_fork=? WHERE fork_id=?", false, repo.ID); err != nil {
log.Error("reset 'fork_id' and 'is_fork': %v", err)
@@ -1768,64 +1784,45 @@ func DeleteRepositoryArchives(ctx context.Context) error {
func DeleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration) error {
log.Trace("Doing: ArchiveCleanup")
- if err := x.Where("id > 0").Iterate(new(Repository), func(idx int, bean interface{}) error {
- return deleteOldRepositoryArchives(ctx, olderThan, idx, bean)
- }); err != nil {
- log.Trace("Error: ArchiveClean: %v", err)
- return err
- }
-
- log.Trace("Finished: ArchiveCleanup")
- return nil
-}
-
-func deleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration, idx int, bean interface{}) error {
- repo := bean.(*Repository)
- basePath := filepath.Join(repo.RepoPath(), "archives")
-
- for _, ty := range []string{"zip", "targz"} {
- select {
- case <-ctx.Done():
- return ErrCancelledf("before deleting old repository archives with filetype %s for %s", ty, repo.FullName())
- default:
- }
-
- path := filepath.Join(basePath, ty)
- file, err := os.Open(path)
- if err != nil {
- if !os.IsNotExist(err) {
- log.Warn("Unable to open directory %s: %v", path, err)
- return err
- }
-
- // If the directory doesn't exist, that's okay.
- continue
- }
-
- files, err := file.Readdir(0)
- file.Close()
+ for {
+ var archivers []RepoArchiver
+ err := x.Where("created_unix < ?", time.Now().Add(-olderThan).Unix()).
+ Asc("created_unix").
+ Limit(100).
+ Find(&archivers)
if err != nil {
- log.Warn("Unable to read directory %s: %v", path, err)
+ log.Trace("Error: ArchiveClean: %v", err)
return err
}
- minimumOldestTime := time.Now().Add(-olderThan)
- for _, info := range files {
- if info.ModTime().Before(minimumOldestTime) && !info.IsDir() {
- select {
- case <-ctx.Done():
- return ErrCancelledf("before deleting old repository archive file %s with filetype %s for %s", info.Name(), ty, repo.FullName())
- default:
- }
- toDelete := filepath.Join(path, info.Name())
- // This is a best-effort purge, so we do not check error codes to confirm removal.
- if err = util.Remove(toDelete); err != nil {
- log.Trace("Unable to delete %s, but proceeding: %v", toDelete, err)
- }
+ for _, archiver := range archivers {
+ if err := deleteOldRepoArchiver(ctx, &archiver); err != nil {
+ return err
}
}
+ if len(archivers) < 100 {
+ break
+ }
}
+ log.Trace("Finished: ArchiveCleanup")
+ return nil
+}
+
+var delRepoArchiver = new(RepoArchiver)
+
+func deleteOldRepoArchiver(ctx context.Context, archiver *RepoArchiver) error {
+ p, err := archiver.RelativePath()
+ if err != nil {
+ return err
+ }
+ _, err = x.ID(archiver.ID).Delete(delRepoArchiver)
+ if err != nil {
+ return err
+ }
+ if err := storage.RepoArchives.Delete(p); err != nil {
+ log.Error("delete repo archive file failed: %v", err)
+ }
return nil
}
diff --git a/models/repo_archiver.go b/models/repo_archiver.go
new file mode 100644
index 0000000000..833a22ee13
--- /dev/null
+++ b/models/repo_archiver.go
@@ -0,0 +1,86 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package models
+
+import (
+ "fmt"
+
+ "code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/timeutil"
+)
+
+// RepoArchiverStatus represents repo archive status
+type RepoArchiverStatus int
+
+// enumerate all repo archive statuses
+const (
+ RepoArchiverGenerating = iota // the archiver is generating
+ RepoArchiverReady // it's ready
+)
+
+// RepoArchiver represents all archivers
+type RepoArchiver struct {
+ ID int64 `xorm:"pk autoincr"`
+ RepoID int64 `xorm:"index unique(s)"`
+ Repo *Repository `xorm:"-"`
+ Type git.ArchiveType `xorm:"unique(s)"`
+ Status RepoArchiverStatus
+ CommitID string `xorm:"VARCHAR(40) unique(s)"`
+ CreatedUnix timeutil.TimeStamp `xorm:"INDEX NOT NULL created"`
+}
+
+// LoadRepo loads repository
+func (archiver *RepoArchiver) LoadRepo() (*Repository, error) {
+ if archiver.Repo != nil {
+ return archiver.Repo, nil
+ }
+
+ var repo Repository
+ has, err := x.ID(archiver.RepoID).Get(&repo)
+ if err != nil {
+ return nil, err
+ }
+ if !has {
+ return nil, ErrRepoNotExist{
+ ID: archiver.RepoID,
+ }
+ }
+ return &repo, nil
+}
+
+// RelativePath returns relative path
+func (archiver *RepoArchiver) RelativePath() (string, error) {
+ repo, err := archiver.LoadRepo()
+ if err != nil {
+ return "", err
+ }
+
+ return fmt.Sprintf("%s/%s/%s.%s", repo.FullName(), archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()), nil
+}
+
+// GetRepoArchiver get an archiver
+func GetRepoArchiver(ctx DBContext, repoID int64, tp git.ArchiveType, commitID string) (*RepoArchiver, error) {
+ var archiver RepoArchiver
+ has, err := ctx.e.Where("repo_id=?", repoID).And("`type`=?", tp).And("commit_id=?", commitID).Get(&archiver)
+ if err != nil {
+ return nil, err
+ }
+ if has {
+ return &archiver, nil
+ }
+ return nil, nil
+}
+
+// AddRepoArchiver adds an archiver
+func AddRepoArchiver(ctx DBContext, archiver *RepoArchiver) error {
+ _, err := ctx.e.Insert(archiver)
+ return err
+}
+
+// UpdateRepoArchiverStatus updates archiver's status
+func UpdateRepoArchiverStatus(ctx DBContext, archiver *RepoArchiver) error {
+ _, err := ctx.e.ID(archiver.ID).Cols("status").Update(archiver)
+ return err
+}
diff --git a/models/unit_tests.go b/models/unit_tests.go
index 5a145fa2c0..f8d6819333 100644
--- a/models/unit_tests.go
+++ b/models/unit_tests.go
@@ -74,6 +74,8 @@ func MainTest(m *testing.M, pathToGiteaRoot string) {
setting.RepoAvatar.Storage.Path = filepath.Join(setting.AppDataPath, "repo-avatars")
+ setting.RepoArchive.Storage.Path = filepath.Join(setting.AppDataPath, "repo-archive")
+
if err = storage.Init(); err != nil {
fatalTestError("storage.Init: %v\n", err)
}
diff --git a/modules/context/context.go b/modules/context/context.go
index 7b3fd2899a..64f8b12084 100644
--- a/modules/context/context.go
+++ b/modules/context/context.go
@@ -380,6 +380,21 @@ func (ctx *Context) ServeFile(file string, names ...string) {
http.ServeFile(ctx.Resp, ctx.Req, file)
}
+// ServeStream serves file via io stream
+func (ctx *Context) ServeStream(rd io.Reader, name string) {
+ ctx.Resp.Header().Set("Content-Description", "File Transfer")
+ ctx.Resp.Header().Set("Content-Type", "application/octet-stream")
+ ctx.Resp.Header().Set("Content-Disposition", "attachment; filename="+name)
+ ctx.Resp.Header().Set("Content-Transfer-Encoding", "binary")
+ ctx.Resp.Header().Set("Expires", "0")
+ ctx.Resp.Header().Set("Cache-Control", "must-revalidate")
+ ctx.Resp.Header().Set("Pragma", "public")
+ _, err := io.Copy(ctx.Resp, rd)
+ if err != nil {
+ ctx.ServerError("Download file failed", err)
+ }
+}
+
// Error returned an error to web browser
func (ctx *Context) Error(status int, contents ...string) {
var v = http.StatusText(status)
diff --git a/modules/doctor/checkOldArchives.go b/modules/doctor/checkOldArchives.go
new file mode 100644
index 0000000000..a4e2ffbd1f
--- /dev/null
+++ b/modules/doctor/checkOldArchives.go
@@ -0,0 +1,59 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package doctor
+
+import (
+ "os"
+ "path/filepath"
+
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/util"
+)
+
+func checkOldArchives(logger log.Logger, autofix bool) error {
+ numRepos := 0
+ numReposUpdated := 0
+ err := iterateRepositories(func(repo *models.Repository) error {
+ if repo.IsEmpty {
+ return nil
+ }
+
+ p := filepath.Join(repo.RepoPath(), "archives")
+ isDir, err := util.IsDir(p)
+ if err != nil {
+ log.Warn("check if %s is directory failed: %v", p, err)
+ }
+ if isDir {
+ numRepos++
+ if autofix {
+ if err := os.RemoveAll(p); err == nil {
+ numReposUpdated++
+ } else {
+ log.Warn("remove %s failed: %v", p, err)
+ }
+ }
+ }
+ return nil
+ })
+
+ if autofix {
+ logger.Info("%d / %d old archives in repository deleted", numReposUpdated, numRepos)
+ } else {
+ logger.Info("%d old archives in repository need to be deleted", numRepos)
+ }
+
+ return err
+}
+
+func init() {
+ Register(&Check{
+ Title: "Check old archives",
+ Name: "check-old-archives",
+ IsDefault: false,
+ Run: checkOldArchives,
+ Priority: 7,
+ })
+}
diff --git a/modules/git/commit_archive.go b/modules/git/repo_archive.go
index d075ba0911..07003aa6b2 100644
--- a/modules/git/commit_archive.go
+++ b/modules/git/repo_archive.go
@@ -8,6 +8,7 @@ package git
import (
"context"
"fmt"
+ "io"
"path/filepath"
"strings"
)
@@ -33,32 +34,28 @@ func (a ArchiveType) String() string {
return "unknown"
}
-// CreateArchiveOpts represents options for creating an archive
-type CreateArchiveOpts struct {
- Format ArchiveType
- Prefix bool
-}
-
// CreateArchive create archive content to the target path
-func (c *Commit) CreateArchive(ctx context.Context, target string, opts CreateArchiveOpts) error {
- if opts.Format.String() == "unknown" {
- return fmt.Errorf("unknown format: %v", opts.Format)
+func (repo *Repository) CreateArchive(ctx context.Context, format ArchiveType, target io.Writer, usePrefix bool, commitID string) error {
+ if format.String() == "unknown" {
+ return fmt.Errorf("unknown format: %v", format)
}
args := []string{
"archive",
}
- if opts.Prefix {
- args = append(args, "--prefix="+filepath.Base(strings.TrimSuffix(c.repo.Path, ".git"))+"/")
+ if usePrefix {
+ args = append(args, "--prefix="+filepath.Base(strings.TrimSuffix(repo.Path, ".git"))+"/")
}
args = append(args,
- "--format="+opts.Format.String(),
- "-o",
- target,
- c.ID.String(),
+ "--format="+format.String(),
+ commitID,
)
- _, err := NewCommandContext(ctx, args...).RunInDir(c.repo.Path)
- return err
+ var stderr strings.Builder
+ err := NewCommandContext(ctx, args...).RunInDirPipeline(repo.Path, target, &stderr)
+ if err != nil {
+ return ConcatenateError(err, stderr.String())
+ }
+ return nil
}
diff --git a/modules/setting/repository.go b/modules/setting/repository.go
index a7666895e1..c2a6357d94 100644
--- a/modules/setting/repository.go
+++ b/modules/setting/repository.go
@@ -251,6 +251,10 @@ var (
}
RepoRootPath string
ScriptType = "bash"
+
+ RepoArchive = struct {
+ Storage
+ }{}
)
func newRepository() {
@@ -328,4 +332,6 @@ func newRepository() {
if !filepath.IsAbs(Repository.Upload.TempPath) {
Repository.Upload.TempPath = path.Join(AppWorkPath, Repository.Upload.TempPath)
}
+
+ RepoArchive.Storage = getStorage("repo-archive", "", nil)
}
diff --git a/modules/setting/storage.go b/modules/setting/storage.go
index 3ab08d8d2a..075152db59 100644
--- a/modules/setting/storage.go
+++ b/modules/setting/storage.go
@@ -43,6 +43,10 @@ func getStorage(name, typ string, targetSec *ini.Section) Storage {
sec.Key("MINIO_LOCATION").MustString("us-east-1")
sec.Key("MINIO_USE_SSL").MustBool(false)
+ if targetSec == nil {
+ targetSec, _ = Cfg.NewSection(name)
+ }
+
var storage Storage
storage.Section = targetSec
storage.Type = typ
diff --git a/modules/storage/storage.go b/modules/storage/storage.go
index 984f154db4..b3708908f8 100644
--- a/modules/storage/storage.go
+++ b/modules/storage/storage.go
@@ -114,6 +114,9 @@ var (
Avatars ObjectStorage
// RepoAvatars represents repository avatars storage
RepoAvatars ObjectStorage
+
+ // RepoArchives represents repository archives storage
+ RepoArchives ObjectStorage
)
// Init init the stoarge
@@ -130,7 +133,11 @@ func Init() error {
return err
}
- return initLFS()
+ if err := initLFS(); err != nil {
+ return err
+ }
+
+ return initRepoArchives()
}
// NewStorage takes a storage type and some config and returns an ObjectStorage or an error
@@ -169,3 +176,9 @@ func initRepoAvatars() (err error) {
RepoAvatars, err = NewStorage(setting.RepoAvatar.Storage.Type, &setting.RepoAvatar.Storage)
return
}
+
+func initRepoArchives() (err error) {
+ log.Info("Initialising Repository Archive storage with type: %s", setting.RepoArchive.Storage.Type)
+ RepoArchives, err = NewStorage(setting.RepoArchive.Storage.Type, &setting.RepoArchive.Storage)
+ return
+}
diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go
index 39a60df33f..e6427ea4f4 100644
--- a/routers/api/v1/repo/file.go
+++ b/routers/api/v1/repo/file.go
@@ -18,6 +18,7 @@ import (
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/web"
"code.gitea.io/gitea/routers/common"
+ "code.gitea.io/gitea/routers/web/repo"
)
// GetRawFile get a file by path on a repository
@@ -126,7 +127,7 @@ func GetArchive(ctx *context.APIContext) {
ctx.Repo.GitRepo = gitRepo
defer gitRepo.Close()
- common.Download(ctx.Context)
+ repo.Download(ctx.Context)
}
// GetEditorconfig get editor config of a repository
diff --git a/routers/common/repo.go b/routers/common/repo.go
index c61b5ec57f..22403da097 100644
--- a/routers/common/repo.go
+++ b/routers/common/repo.go
@@ -7,7 +7,6 @@ package common
import (
"fmt"
"io"
- "net/http"
"path"
"path/filepath"
"strings"
@@ -19,7 +18,6 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/typesniffer"
- "code.gitea.io/gitea/services/archiver"
)
// ServeBlob download a git.Blob
@@ -41,30 +39,6 @@ func ServeBlob(ctx *context.Context, blob *git.Blob) error {
return ServeData(ctx, ctx.Repo.TreePath, blob.Size(), dataRc)
}
-// Download an archive of a repository
-func Download(ctx *context.Context) {
- uri := ctx.Params("*")
- aReq := archiver.DeriveRequestFrom(ctx, uri)
-
- if aReq == nil {
- ctx.Error(http.StatusNotFound)
- return
- }
-
- downloadName := ctx.Repo.Repository.Name + "-" + aReq.GetArchiveName()
- complete := aReq.IsComplete()
- if !complete {
- aReq = archiver.ArchiveRepository(aReq)
- complete = aReq.WaitForCompletion(ctx)
- }
-
- if complete {
- ctx.ServeFile(aReq.GetArchivePath(), downloadName)
- } else {
- ctx.Error(http.StatusNotFound)
- }
-}
-
// ServeData download file from io.Reader
func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error {
buf := make([]byte, 1024)
diff --git a/routers/init.go b/routers/init.go
index 4c28a95395..bbf39a3f50 100644
--- a/routers/init.go
+++ b/routers/init.go
@@ -33,6 +33,7 @@ import (
"code.gitea.io/gitea/routers/common"
"code.gitea.io/gitea/routers/private"
web_routers "code.gitea.io/gitea/routers/web"
+ "code.gitea.io/gitea/services/archiver"
"code.gitea.io/gitea/services/auth"
"code.gitea.io/gitea/services/mailer"
mirror_service "code.gitea.io/gitea/services/mirror"
@@ -63,6 +64,9 @@ func NewServices() {
mailer.NewContext()
_ = cache.NewContext()
notification.NewContext()
+ if err := archiver.Init(); err != nil {
+ log.Fatal("archiver init failed: %v", err)
+ }
}
// GlobalInit is for global configuration reload-able.
diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go
index f149e92a8b..919fd4620d 100644
--- a/routers/web/repo/repo.go
+++ b/routers/web/repo/repo.go
@@ -15,8 +15,10 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
+ "code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/web"
archiver_service "code.gitea.io/gitea/services/archiver"
"code.gitea.io/gitea/services/forms"
@@ -364,25 +366,123 @@ func RedirectDownload(ctx *context.Context) {
ctx.Error(http.StatusNotFound)
}
+// Download an archive of a repository
+func Download(ctx *context.Context) {
+ uri := ctx.Params("*")
+ aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri)
+ if err != nil {
+ ctx.ServerError("archiver_service.NewRequest", err)
+ return
+ }
+ if aReq == nil {
+ ctx.Error(http.StatusNotFound)
+ return
+ }
+
+ archiver, err := models.GetRepoArchiver(models.DefaultDBContext(), aReq.RepoID, aReq.Type, aReq.CommitID)
+ if err != nil {
+ ctx.ServerError("models.GetRepoArchiver", err)
+ return
+ }
+ if archiver != nil && archiver.Status == models.RepoArchiverReady {
+ download(ctx, aReq.GetArchiveName(), archiver)
+ return
+ }
+
+ if err := archiver_service.StartArchive(aReq); err != nil {
+ ctx.ServerError("archiver_service.StartArchive", err)
+ return
+ }
+
+ var times int
+ var t = time.NewTicker(time.Second * 1)
+ defer t.Stop()
+
+ for {
+ select {
+ case <-graceful.GetManager().HammerContext().Done():
+ log.Warn("exit archive download because system stop")
+ return
+ case <-t.C:
+ if times > 20 {
+ ctx.ServerError("wait download timeout", nil)
+ return
+ }
+ times++
+ archiver, err = models.GetRepoArchiver(models.DefaultDBContext(), aReq.RepoID, aReq.Type, aReq.CommitID)
+ if err != nil {
+ ctx.ServerError("archiver_service.StartArchive", err)
+ return
+ }
+ if archiver != nil && archiver.Status == models.RepoArchiverReady {
+ download(ctx, aReq.GetArchiveName(), archiver)
+ return
+ }
+ }
+ }
+}
+
+func download(ctx *context.Context, archiveName string, archiver *models.RepoArchiver) {
+ downloadName := ctx.Repo.Repository.Name + "-" + archiveName
+
+ rPath, err := archiver.RelativePath()
+ if err != nil {
+ ctx.ServerError("archiver.RelativePath", err)
+ return
+ }
+
+ if setting.RepoArchive.ServeDirect {
+ //If we have a signed url (S3, object storage), redirect to this directly.
+ u, err := storage.RepoArchives.URL(rPath, downloadName)
+ if u != nil && err == nil {
+ ctx.Redirect(u.String())
+ return
+ }
+ }
+
+ //If we have matched and access to release or issue
+ fr, err := storage.RepoArchives.Open(rPath)
+ if err != nil {
+ ctx.ServerError("Open", err)
+ return
+ }
+ defer fr.Close()
+ ctx.ServeStream(fr, downloadName)
+}
+
// InitiateDownload will enqueue an archival request, as needed. It may submit
// a request that's already in-progress, but the archiver service will just
// kind of drop it on the floor if this is the case.
func InitiateDownload(ctx *context.Context) {
uri := ctx.Params("*")
- aReq := archiver_service.DeriveRequestFrom(ctx, uri)
-
+ aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri)
+ if err != nil {
+ ctx.ServerError("archiver_service.NewRequest", err)
+ return
+ }
if aReq == nil {
ctx.Error(http.StatusNotFound)
return
}
- complete := aReq.IsComplete()
- if !complete {
- aReq = archiver_service.ArchiveRepository(aReq)
- complete, _ = aReq.TimedWaitForCompletion(ctx, 2*time.Second)
+ archiver, err := models.GetRepoArchiver(models.DefaultDBContext(), aReq.RepoID, aReq.Type, aReq.CommitID)
+ if err != nil {
+ ctx.ServerError("archiver_service.StartArchive", err)
+ return
+ }
+ if archiver == nil || archiver.Status != models.RepoArchiverReady {
+ if err := archiver_service.StartArchive(aReq); err != nil {
+ ctx.ServerError("archiver_service.StartArchive", err)
+ return
+ }
+ }
+
+ var completed bool
+ if archiver != nil && archiver.Status == models.RepoArchiverReady {
+ completed = true
}
ctx.JSON(http.StatusOK, map[string]interface{}{
- "complete": complete,
+ "complete": completed,
})
}
diff --git a/routers/web/web.go b/routers/web/web.go
index 2c8a6411a1..883213479c 100644
--- a/routers/web/web.go
+++ b/routers/web/web.go
@@ -22,7 +22,6 @@ import (
"code.gitea.io/gitea/modules/validation"
"code.gitea.io/gitea/modules/web"
"code.gitea.io/gitea/routers/api/v1/misc"
- "code.gitea.io/gitea/routers/common"
"code.gitea.io/gitea/routers/web/admin"
"code.gitea.io/gitea/routers/web/dev"
"code.gitea.io/gitea/routers/web/events"
@@ -888,7 +887,7 @@ func RegisterRoutes(m *web.Route) {
}, context.RepoRef(), repo.MustBeNotEmpty, context.RequireRepoReaderOr(models.UnitTypeCode))
m.Group("/archive", func() {
- m.Get("/*", common.Download)
+ m.Get("/*", repo.Download)
m.Post("/*", repo.InitiateDownload)
}, repo.MustBeNotEmpty, reqRepoCodeReader)
diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go
index dfa6334d95..00c0281306 100644
--- a/services/archiver/archiver.go
+++ b/services/archiver/archiver.go
@@ -6,22 +6,20 @@
package archiver
import (
+ "errors"
+ "fmt"
"io"
- "io/ioutil"
"os"
- "path"
"regexp"
"strings"
- "sync"
- "time"
- "code.gitea.io/gitea/modules/base"
- "code.gitea.io/gitea/modules/context"
+ "code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
- "code.gitea.io/gitea/modules/util"
+ "code.gitea.io/gitea/modules/storage"
)
// ArchiveRequest defines the parameters of an archive request, which notably
@@ -30,223 +28,174 @@ import (
// This is entirely opaque to external entities, though, and mostly used as a
// handle elsewhere.
type ArchiveRequest struct {
- uri string
- repo *git.Repository
- refName string
- ext string
- archivePath string
- archiveType git.ArchiveType
- archiveComplete bool
- commit *git.Commit
- cchan chan struct{}
+ RepoID int64
+ refName string
+ Type git.ArchiveType
+ CommitID string
}
-var archiveInProgress []*ArchiveRequest
-var archiveMutex sync.Mutex
-
// SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all
// the way to 64.
var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`)
-// These facilitate testing, by allowing the unit tests to control (to some extent)
-// the goroutine used for processing the queue.
-var archiveQueueMutex *sync.Mutex
-var archiveQueueStartCond *sync.Cond
-var archiveQueueReleaseCond *sync.Cond
-
-// GetArchivePath returns the path from which we can serve this archive.
-func (aReq *ArchiveRequest) GetArchivePath() string {
- return aReq.archivePath
-}
-
-// GetArchiveName returns the name of the caller, based on the ref used by the
-// caller to create this request.
-func (aReq *ArchiveRequest) GetArchiveName() string {
- return aReq.refName + aReq.ext
-}
-
-// IsComplete returns the completion status of this request.
-func (aReq *ArchiveRequest) IsComplete() bool {
- return aReq.archiveComplete
-}
-
-// WaitForCompletion will wait for this request to complete, with no timeout.
-// It returns whether the archive was actually completed, as the channel could
-// have also been closed due to an error.
-func (aReq *ArchiveRequest) WaitForCompletion(ctx *context.Context) bool {
- select {
- case <-aReq.cchan:
- case <-ctx.Done():
- }
-
- return aReq.IsComplete()
-}
-
-// TimedWaitForCompletion will wait for this request to complete, with timeout
-// happening after the specified Duration. It returns whether the archive is
-// now complete and whether we hit the timeout or not. The latter may not be
-// useful if the request is complete or we started to shutdown.
-func (aReq *ArchiveRequest) TimedWaitForCompletion(ctx *context.Context, dur time.Duration) (bool, bool) {
- timeout := false
- select {
- case <-time.After(dur):
- timeout = true
- case <-aReq.cchan:
- case <-ctx.Done():
- }
-
- return aReq.IsComplete(), timeout
-}
-
-// The caller must hold the archiveMutex across calls to getArchiveRequest.
-func getArchiveRequest(repo *git.Repository, commit *git.Commit, archiveType git.ArchiveType) *ArchiveRequest {
- for _, r := range archiveInProgress {
- // Need to be referring to the same repository.
- if r.repo.Path == repo.Path && r.commit.ID == commit.ID && r.archiveType == archiveType {
- return r
- }
- }
- return nil
-}
-
-// DeriveRequestFrom creates an archival request, based on the URI. The
+// NewRequest creates an archival request, based on the URI. The
// resulting ArchiveRequest is suitable for being passed to ArchiveRepository()
// if it's determined that the request still needs to be satisfied.
-func DeriveRequestFrom(ctx *context.Context, uri string) *ArchiveRequest {
- if ctx.Repo == nil || ctx.Repo.GitRepo == nil {
- log.Trace("Repo not initialized")
- return nil
- }
+func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) {
r := &ArchiveRequest{
- uri: uri,
- repo: ctx.Repo.GitRepo,
+ RepoID: repoID,
}
+ var ext string
switch {
case strings.HasSuffix(uri, ".zip"):
- r.ext = ".zip"
- r.archivePath = path.Join(r.repo.Path, "archives/zip")
- r.archiveType = git.ZIP
+ ext = ".zip"
+ r.Type = git.ZIP
case strings.HasSuffix(uri, ".tar.gz"):
- r.ext = ".tar.gz"
- r.archivePath = path.Join(r.repo.Path, "archives/targz")
- r.archiveType = git.TARGZ
+ ext = ".tar.gz"
+ r.Type = git.TARGZ
default:
- log.Trace("Unknown format: %s", uri)
- return nil
+ return nil, fmt.Errorf("Unknown format: %s", uri)
}
- r.refName = strings.TrimSuffix(r.uri, r.ext)
- isDir, err := util.IsDir(r.archivePath)
- if err != nil {
- ctx.ServerError("Download -> util.IsDir(archivePath)", err)
- return nil
- }
- if !isDir {
- if err := os.MkdirAll(r.archivePath, os.ModePerm); err != nil {
- ctx.ServerError("Download -> os.MkdirAll(archivePath)", err)
- return nil
- }
- }
+ r.refName = strings.TrimSuffix(uri, ext)
+ var err error
// Get corresponding commit.
- if r.repo.IsBranchExist(r.refName) {
- r.commit, err = r.repo.GetBranchCommit(r.refName)
+ if repo.IsBranchExist(r.refName) {
+ r.CommitID, err = repo.GetBranchCommitID(r.refName)
if err != nil {
- ctx.ServerError("GetBranchCommit", err)
- return nil
+ return nil, err
}
- } else if r.repo.IsTagExist(r.refName) {
- r.commit, err = r.repo.GetTagCommit(r.refName)
+ } else if repo.IsTagExist(r.refName) {
+ r.CommitID, err = repo.GetTagCommitID(r.refName)
if err != nil {
- ctx.ServerError("GetTagCommit", err)
- return nil
+ return nil, err
}
} else if shaRegex.MatchString(r.refName) {
- r.commit, err = r.repo.GetCommit(r.refName)
- if err != nil {
- ctx.NotFound("GetCommit", nil)
- return nil
+ if repo.IsCommitExist(r.refName) {
+ r.CommitID = r.refName
+ } else {
+ return nil, git.ErrNotExist{
+ ID: r.refName,
+ }
}
} else {
- ctx.NotFound("DeriveRequestFrom", nil)
- return nil
+ return nil, fmt.Errorf("Unknow ref %s type", r.refName)
}
- archiveMutex.Lock()
- defer archiveMutex.Unlock()
- if rExisting := getArchiveRequest(r.repo, r.commit, r.archiveType); rExisting != nil {
- return rExisting
- }
+ return r, nil
+}
+
+// GetArchiveName returns the name of the caller, based on the ref used by the
+// caller to create this request.
+func (aReq *ArchiveRequest) GetArchiveName() string {
+ return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String()
+}
- r.archivePath = path.Join(r.archivePath, base.ShortSha(r.commit.ID.String())+r.ext)
- r.archiveComplete, err = util.IsFile(r.archivePath)
+func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) {
+ ctx, commiter, err := models.TxDBContext()
if err != nil {
- ctx.ServerError("util.IsFile", err)
- return nil
+ return nil, err
}
- return r
-}
+ defer commiter.Close()
-func doArchive(r *ArchiveRequest) {
- var (
- err error
- tmpArchive *os.File
- destArchive *os.File
- )
-
- // Close the channel to indicate to potential waiters that this request
- // has finished.
- defer close(r.cchan)
-
- // It could have happened that we enqueued two archival requests, due to
- // race conditions and difficulties in locking. Do one last check that
- // the archive we're referring to doesn't already exist. If it does exist,
- // then just mark the request as complete and move on.
- isFile, err := util.IsFile(r.archivePath)
+ archiver, err := models.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID)
if err != nil {
- log.Error("Unable to check if %s util.IsFile: %v. Will ignore and recreate.", r.archivePath, err)
+ return nil, err
}
- if isFile {
- r.archiveComplete = true
- return
+
+ if archiver != nil {
+ // FIXME: If another process are generating it, we think it's not ready and just return
+ // Or we should wait until the archive generated.
+ if archiver.Status == models.RepoArchiverGenerating {
+ return nil, nil
+ }
+ } else {
+ archiver = &models.RepoArchiver{
+ RepoID: r.RepoID,
+ Type: r.Type,
+ CommitID: r.CommitID,
+ Status: models.RepoArchiverGenerating,
+ }
+ if err := models.AddRepoArchiver(ctx, archiver); err != nil {
+ return nil, err
+ }
}
- // Create a temporary file to use while the archive is being built. We
- // will then copy it into place (r.archivePath) once it's fully
- // constructed.
- tmpArchive, err = ioutil.TempFile("", "archive")
+ rPath, err := archiver.RelativePath()
if err != nil {
- log.Error("Unable to create a temporary archive file! Error: %v", err)
- return
+ return nil, err
+ }
+
+ _, err = storage.RepoArchives.Stat(rPath)
+ if err == nil {
+ if archiver.Status == models.RepoArchiverGenerating {
+ archiver.Status = models.RepoArchiverReady
+ return archiver, models.UpdateRepoArchiverStatus(ctx, archiver)
+ }
+ return archiver, nil
+ }
+
+ if !errors.Is(err, os.ErrNotExist) {
+ return nil, fmt.Errorf("unable to stat archive: %v", err)
}
+
+ rd, w := io.Pipe()
defer func() {
- tmpArchive.Close()
- os.Remove(tmpArchive.Name())
+ w.Close()
+ rd.Close()
}()
+ var done = make(chan error)
+ repo, err := archiver.LoadRepo()
+ if err != nil {
+ return nil, fmt.Errorf("archiver.LoadRepo failed: %v", err)
+ }
- if err = r.commit.CreateArchive(graceful.GetManager().ShutdownContext(), tmpArchive.Name(), git.CreateArchiveOpts{
- Format: r.archiveType,
- Prefix: setting.Repository.PrefixArchiveFiles,
- }); err != nil {
- log.Error("Download -> CreateArchive "+tmpArchive.Name(), err)
- return
+ gitRepo, err := git.OpenRepository(repo.RepoPath())
+ if err != nil {
+ return nil, err
}
+ defer gitRepo.Close()
+
+ go func(done chan error, w *io.PipeWriter, archiver *models.RepoArchiver, gitRepo *git.Repository) {
+ defer func() {
+ if r := recover(); r != nil {
+ done <- fmt.Errorf("%v", r)
+ }
+ }()
+
+ err = gitRepo.CreateArchive(
+ graceful.GetManager().ShutdownContext(),
+ archiver.Type,
+ w,
+ setting.Repository.PrefixArchiveFiles,
+ archiver.CommitID,
+ )
+ _ = w.CloseWithError(err)
+ done <- err
+ }(done, w, archiver, gitRepo)
+
+ // TODO: add lfs data to zip
+ // TODO: add submodule data to zip
- // Now we copy it into place
- if destArchive, err = os.Create(r.archivePath); err != nil {
- log.Error("Unable to open archive " + r.archivePath)
- return
+ if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil {
+ return nil, fmt.Errorf("unable to write archive: %v", err)
}
- _, err = io.Copy(destArchive, tmpArchive)
- destArchive.Close()
+
+ err = <-done
if err != nil {
- log.Error("Unable to write archive " + r.archivePath)
- return
+ return nil, err
+ }
+
+ if archiver.Status == models.RepoArchiverGenerating {
+ archiver.Status = models.RepoArchiverReady
+ if err = models.UpdateRepoArchiverStatus(ctx, archiver); err != nil {
+ return nil, err
+ }
}
- // Block any attempt to finalize creating a new request if we're marking
- r.archiveComplete = true
+ return archiver, commiter.Commit()
}
// ArchiveRepository satisfies the ArchiveRequest being passed in. Processing
@@ -255,65 +204,46 @@ func doArchive(r *ArchiveRequest) {
// anything. In all cases, the caller should be examining the *ArchiveRequest
// being returned for completion, as it may be different than the one they passed
// in.
-func ArchiveRepository(request *ArchiveRequest) *ArchiveRequest {
- // We'll return the request that's already been enqueued if it has been
- // enqueued, or we'll immediately enqueue it if it has not been enqueued
- // and it is not marked complete.
- archiveMutex.Lock()
- defer archiveMutex.Unlock()
- if rExisting := getArchiveRequest(request.repo, request.commit, request.archiveType); rExisting != nil {
- return rExisting
- }
- if request.archiveComplete {
- return request
- }
+func ArchiveRepository(request *ArchiveRequest) (*models.RepoArchiver, error) {
+ return doArchive(request)
+}
+
+var archiverQueue queue.UniqueQueue
- request.cchan = make(chan struct{})
- archiveInProgress = append(archiveInProgress, request)
- go func() {
- // Wait to start, if we have the Cond for it. This is currently only
- // useful for testing, so that the start and release of queued entries
- // can be controlled to examine the queue.
- if archiveQueueStartCond != nil {
- archiveQueueMutex.Lock()
- archiveQueueStartCond.Wait()
- archiveQueueMutex.Unlock()
+// Init initlize archive
+func Init() error {
+ handler := func(data ...queue.Data) {
+ for _, datum := range data {
+ archiveReq, ok := datum.(*ArchiveRequest)
+ if !ok {
+ log.Error("Unable to process provided datum: %v - not possible to cast to IndexerData", datum)
+ continue
+ }
+ log.Trace("ArchiverData Process: %#v", archiveReq)
+ if _, err := doArchive(archiveReq); err != nil {
+ log.Error("Archive %v faild: %v", datum, err)
+ }
}
+ }
- // Drop the mutex while we process the request. This may take a long
- // time, and it's not necessary now that we've added the reequest to
- // archiveInProgress.
- doArchive(request)
+ archiverQueue = queue.CreateUniqueQueue("repo-archive", handler, new(ArchiveRequest))
+ if archiverQueue == nil {
+ return errors.New("unable to create codes indexer queue")
+ }
- if archiveQueueReleaseCond != nil {
- archiveQueueMutex.Lock()
- archiveQueueReleaseCond.Wait()
- archiveQueueMutex.Unlock()
- }
+ go graceful.GetManager().RunWithShutdownFns(archiverQueue.Run)
- // Purge this request from the list. To do so, we'll just take the
- // index at which we ended up at and swap the final element into that
- // position, then chop off the now-redundant final element. The slice
- // may have change in between these two segments and we may have moved,
- // so we search for it here. We could perhaps avoid this search
- // entirely if len(archiveInProgress) == 1, but we should verify
- // correctness.
- archiveMutex.Lock()
- defer archiveMutex.Unlock()
-
- idx := -1
- for _idx, req := range archiveInProgress {
- if req == request {
- idx = _idx
- break
- }
- }
- if idx == -1 {
- log.Error("ArchiveRepository: Failed to find request for removal.")
- return
- }
- archiveInProgress = append(archiveInProgress[:idx], archiveInProgress[idx+1:]...)
- }()
+ return nil
+}
- return request
+// StartArchive push the archive request to the queue
+func StartArchive(request *ArchiveRequest) error {
+ has, err := archiverQueue.Has(request)
+ if err != nil {
+ return err
+ }
+ if has {
+ return nil
+ }
+ return archiverQueue.Push(request)
}
diff --git a/services/archiver/archiver_test.go b/services/archiver/archiver_test.go
index 6dcd942bf5..3f3f369987 100644
--- a/services/archiver/archiver_test.go
+++ b/services/archiver/archiver_test.go
@@ -6,108 +6,75 @@ package archiver
import (
"path/filepath"
- "sync"
"testing"
"time"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/test"
- "code.gitea.io/gitea/modules/util"
"github.com/stretchr/testify/assert"
)
-var queueMutex sync.Mutex
-
func TestMain(m *testing.M) {
models.MainTest(m, filepath.Join("..", ".."))
}
func waitForCount(t *testing.T, num int) {
- var numQueued int
-
- // Wait for up to 10 seconds for the queue to be impacted.
- timeout := time.Now().Add(10 * time.Second)
- for {
- numQueued = len(archiveInProgress)
- if numQueued == num || time.Now().After(timeout) {
- break
- }
- }
-
- assert.Len(t, archiveInProgress, num)
-}
-
-func releaseOneEntry(t *testing.T, inFlight []*ArchiveRequest) {
- var nowQueued, numQueued int
-
- numQueued = len(archiveInProgress)
-
- // Release one, then wait up to 10 seconds for it to complete.
- queueMutex.Lock()
- archiveQueueReleaseCond.Signal()
- queueMutex.Unlock()
- timeout := time.Now().Add(10 * time.Second)
- for {
- nowQueued = len(archiveInProgress)
- if nowQueued != numQueued || time.Now().After(timeout) {
- break
- }
- }
-
- // Make sure we didn't just timeout.
- assert.NotEqual(t, numQueued, nowQueued)
- // Also make sure that we released only one.
- assert.Equal(t, numQueued-1, nowQueued)
}
func TestArchive_Basic(t *testing.T) {
assert.NoError(t, models.PrepareTestDatabase())
- archiveQueueMutex = &queueMutex
- archiveQueueStartCond = sync.NewCond(&queueMutex)
- archiveQueueReleaseCond = sync.NewCond(&queueMutex)
- defer func() {
- archiveQueueMutex = nil
- archiveQueueStartCond = nil
- archiveQueueReleaseCond = nil
- }()
-
ctx := test.MockContext(t, "user27/repo49")
firstCommit, secondCommit := "51f84af23134", "aacbdfe9e1c4"
- bogusReq := DeriveRequestFrom(ctx, firstCommit+".zip")
- assert.Nil(t, bogusReq)
-
test.LoadRepo(t, ctx, 49)
- bogusReq = DeriveRequestFrom(ctx, firstCommit+".zip")
- assert.Nil(t, bogusReq)
-
test.LoadGitRepo(t, ctx)
defer ctx.Repo.GitRepo.Close()
+ bogusReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
+ assert.NoError(t, err)
+ assert.NotNil(t, bogusReq)
+ assert.EqualValues(t, firstCommit+".zip", bogusReq.GetArchiveName())
+
// Check a series of bogus requests.
// Step 1, valid commit with a bad extension.
- bogusReq = DeriveRequestFrom(ctx, firstCommit+".dilbert")
+ bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".dilbert")
+ assert.Error(t, err)
assert.Nil(t, bogusReq)
// Step 2, missing commit.
- bogusReq = DeriveRequestFrom(ctx, "dbffff.zip")
+ bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "dbffff.zip")
+ assert.Error(t, err)
assert.Nil(t, bogusReq)
// Step 3, doesn't look like branch/tag/commit.
- bogusReq = DeriveRequestFrom(ctx, "db.zip")
+ bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "db.zip")
+ assert.Error(t, err)
assert.Nil(t, bogusReq)
+ bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "master.zip")
+ assert.NoError(t, err)
+ assert.NotNil(t, bogusReq)
+ assert.EqualValues(t, "master.zip", bogusReq.GetArchiveName())
+
+ bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "test/archive.zip")
+ assert.NoError(t, err)
+ assert.NotNil(t, bogusReq)
+ assert.EqualValues(t, "test-archive.zip", bogusReq.GetArchiveName())
+
// Now two valid requests, firstCommit with valid extensions.
- zipReq := DeriveRequestFrom(ctx, firstCommit+".zip")
+ zipReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
+ assert.NoError(t, err)
assert.NotNil(t, zipReq)
- tgzReq := DeriveRequestFrom(ctx, firstCommit+".tar.gz")
+ tgzReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".tar.gz")
+ assert.NoError(t, err)
assert.NotNil(t, tgzReq)
- secondReq := DeriveRequestFrom(ctx, secondCommit+".zip")
+ secondReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".zip")
+ assert.NoError(t, err)
assert.NotNil(t, secondReq)
inFlight := make([]*ArchiveRequest, 3)
@@ -128,41 +95,9 @@ func TestArchive_Basic(t *testing.T) {
// Sleep two seconds to make sure the queue doesn't change.
time.Sleep(2 * time.Second)
- assert.Len(t, archiveInProgress, 3)
-
- // Release them all, they'll then stall at the archiveQueueReleaseCond while
- // we examine the queue state.
- queueMutex.Lock()
- archiveQueueStartCond.Broadcast()
- queueMutex.Unlock()
-
- // Iterate through all of the in-flight requests and wait for their
- // completion.
- for _, req := range inFlight {
- req.WaitForCompletion(ctx)
- }
-
- for _, req := range inFlight {
- assert.True(t, req.IsComplete())
- exist, err := util.IsExist(req.GetArchivePath())
- assert.NoError(t, err)
- assert.True(t, exist)
- }
-
- arbitraryReq := inFlight[0]
- // Reopen the channel so we don't double-close, mark it incomplete. We're
- // going to run it back through the archiver, and it should get marked
- // complete again.
- arbitraryReq.cchan = make(chan struct{})
- arbitraryReq.archiveComplete = false
- doArchive(arbitraryReq)
- assert.True(t, arbitraryReq.IsComplete())
-
- // Queues should not have drained yet, because we haven't released them.
- // Do so now.
- assert.Len(t, archiveInProgress, 3)
-
- zipReq2 := DeriveRequestFrom(ctx, firstCommit+".zip")
+
+ zipReq2, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
+ assert.NoError(t, err)
// This zipReq should match what's sitting in the queue, as we haven't
// let it release yet. From the consumer's point of view, this looks like
// a long-running archive task.
@@ -173,46 +108,22 @@ func TestArchive_Basic(t *testing.T) {
// predecessor has cleared out of the queue.
ArchiveRepository(zipReq2)
- // Make sure the queue hasn't grown any.
- assert.Len(t, archiveInProgress, 3)
-
- // Make sure the queue drains properly
- releaseOneEntry(t, inFlight)
- assert.Len(t, archiveInProgress, 2)
- releaseOneEntry(t, inFlight)
- assert.Len(t, archiveInProgress, 1)
- releaseOneEntry(t, inFlight)
- assert.Empty(t, archiveInProgress)
-
// Now we'll submit a request and TimedWaitForCompletion twice, before and
// after we release it. We should trigger both the timeout and non-timeout
// cases.
- var completed, timedout bool
- timedReq := DeriveRequestFrom(ctx, secondCommit+".tar.gz")
+ timedReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".tar.gz")
+ assert.NoError(t, err)
assert.NotNil(t, timedReq)
ArchiveRepository(timedReq)
- // Guaranteed to timeout; we haven't signalled the request to start..
- completed, timedout = timedReq.TimedWaitForCompletion(ctx, 2*time.Second)
- assert.False(t, completed)
- assert.True(t, timedout)
-
- queueMutex.Lock()
- archiveQueueStartCond.Broadcast()
- queueMutex.Unlock()
-
- // Shouldn't timeout, we've now signalled it and it's a small request.
- completed, timedout = timedReq.TimedWaitForCompletion(ctx, 15*time.Second)
- assert.True(t, completed)
- assert.False(t, timedout)
-
- zipReq2 = DeriveRequestFrom(ctx, firstCommit+".zip")
+ zipReq2, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
+ assert.NoError(t, err)
// Now, we're guaranteed to have released the original zipReq from the queue.
// Ensure that we don't get handed back the released entry somehow, but they
// should remain functionally equivalent in all fields. The exception here
// is zipReq.cchan, which will be non-nil because it's a completed request.
// It's fine to go ahead and set it to nil now.
- zipReq.cchan = nil
+
assert.Equal(t, zipReq, zipReq2)
assert.False(t, zipReq == zipReq2)