summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorqwerty287 <80460567+qwerty287@users.noreply.github.com>2022-06-04 15:17:53 +0200
committerGitHub <noreply@github.com>2022-06-04 21:17:53 +0800
commitdf9612bb535c0f2b4540641ce2fba2c3d65de284 (patch)
tree8a56c639508c88992785619b76c35f0c3111d69d
parent14d96ff7acacf8912b05f113b8ca35f67edebd1b (diff)
downloadgitea-df9612bb535c0f2b4540641ce2fba2c3d65de284.tar.gz
gitea-df9612bb535c0f2b4540641ce2fba2c3d65de284.zip
Add API to serve blob or LFS file content (#19689)
* Add LFS API * Update routers/api/v1/repo/file.go Co-authored-by: Gusted <williamzijl7@hotmail.com> * Apply suggestions * Apply suggestions * Update routers/api/v1/repo/file.go Co-authored-by: Gusted <williamzijl7@hotmail.com> * Report errors * ADd test * Use own repo for test * Use different repo name * Improve handling * Slight restructures 1. Avoid reading the blob data multiple times 2. Ensure that caching is only checked when about to serve the blob/lfs 3. Avoid nesting by returning early 4. Make log message a bit more clear 5. Ensure that the dataRc is closed by defer when passed to ServeData Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Gusted <williamzijl7@hotmail.com> Co-authored-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
-rw-r--r--integrations/api_repo_file_get_test.go56
-rw-r--r--routers/api/v1/api.go1
-rw-r--r--routers/api/v1/repo/file.go142
-rw-r--r--templates/swagger/v1_json.tmpl46
4 files changed, 245 insertions, 0 deletions
diff --git a/integrations/api_repo_file_get_test.go b/integrations/api_repo_file_get_test.go
new file mode 100644
index 0000000000..8d1c4c4bcf
--- /dev/null
+++ b/integrations/api_repo_file_get_test.go
@@ -0,0 +1,56 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package integrations
+
+import (
+ "net/http"
+ "net/url"
+ "os"
+ "testing"
+
+ api "code.gitea.io/gitea/modules/structs"
+ "code.gitea.io/gitea/modules/util"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestAPIGetRawFileOrLFS(t *testing.T) {
+ defer prepareTestEnv(t)()
+
+ // Test with raw file
+ req := NewRequest(t, "GET", "/api/v1/repos/user2/repo1/media/README.md")
+ resp := MakeRequest(t, req, http.StatusOK)
+ assert.Equal(t, "# repo1\n\nDescription for repo1", resp.Body.String())
+
+ // Test with LFS
+ onGiteaRun(t, func(t *testing.T, u *url.URL) {
+ httpContext := NewAPITestContext(t, "user2", "repo-lfs-test")
+ doAPICreateRepository(httpContext, false, func(t *testing.T, repository api.Repository) {
+ u.Path = httpContext.GitPath()
+ dstPath, err := os.MkdirTemp("", httpContext.Reponame)
+ assert.NoError(t, err)
+ defer util.RemoveAll(dstPath)
+
+ u.Path = httpContext.GitPath()
+ u.User = url.UserPassword("user2", userPassword)
+
+ t.Run("Clone", doGitClone(dstPath, u))
+
+ dstPath2, err := os.MkdirTemp("", httpContext.Reponame)
+ assert.NoError(t, err)
+ defer util.RemoveAll(dstPath2)
+
+ t.Run("Partial Clone", doPartialGitClone(dstPath2, u))
+
+ lfs, _ := lfsCommitAndPushTest(t, dstPath)
+
+ reqLFS := NewRequest(t, "GET", "/api/v1/repos/user2/repo1/media/"+lfs)
+ respLFS := MakeRequestNilResponseRecorder(t, reqLFS, http.StatusOK)
+ assert.Equal(t, littleSize, respLFS.Length)
+
+ doAPIDeleteRepository(httpContext)
+ })
+ })
+}
diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go
index 62c4a8934c..1492ef07a7 100644
--- a/routers/api/v1/api.go
+++ b/routers/api/v1/api.go
@@ -826,6 +826,7 @@ func Routes() *web.Route {
Delete(reqAdmin(), repo.DeleteTeam)
}, reqToken())
m.Get("/raw/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFile)
+ m.Get("/media/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFileOrLFS)
m.Get("/archive/*", reqRepoReader(unit.TypeCode), repo.GetArchive)
m.Combo("/forks").Get(repo.ListForks).
Post(reqToken(), reqRepoReader(unit.TypeCode), bind(api.CreateForkOption{}), repo.CreateFork)
diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go
index 1fdf70c13a..ab337e66e3 100644
--- a/routers/api/v1/repo/file.go
+++ b/routers/api/v1/repo/file.go
@@ -6,8 +6,10 @@
package repo
import (
+ "bytes"
"encoding/base64"
"fmt"
+ "io"
"net/http"
"path"
"time"
@@ -18,7 +20,11 @@ import (
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/httpcache"
+ "code.gitea.io/gitea/modules/lfs"
+ "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/storage"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/web"
"code.gitea.io/gitea/routers/common"
@@ -75,6 +81,142 @@ func GetRawFile(ctx *context.APIContext) {
}
}
+// GetRawFileOrLFS get a file by repo's path, redirecting to LFS if necessary.
+func GetRawFileOrLFS(ctx *context.APIContext) {
+ // swagger:operation GET /repos/{owner}/{repo}/media/{filepath} repository repoGetRawFileOrLFS
+ // ---
+ // summary: Get a file or it's LFS object from a repository
+ // parameters:
+ // - name: owner
+ // in: path
+ // description: owner of the repo
+ // type: string
+ // required: true
+ // - name: repo
+ // in: path
+ // description: name of the repo
+ // type: string
+ // required: true
+ // - name: filepath
+ // in: path
+ // description: filepath of the file to get
+ // type: string
+ // required: true
+ // - name: ref
+ // in: query
+ // description: "The name of the commit/branch/tag. Default the repository’s default branch (usually master)"
+ // type: string
+ // required: false
+ // responses:
+ // 200:
+ // description: Returns raw file content.
+ // "404":
+ // "$ref": "#/responses/notFound"
+
+ if ctx.Repo.Repository.IsEmpty {
+ ctx.NotFound()
+ return
+ }
+
+ blob, lastModified := getBlobForEntry(ctx)
+ if ctx.Written() {
+ return
+ }
+
+ // LFS Pointer files are at most 1024 bytes - so any blob greater than 1024 bytes cannot be an LFS file
+ if blob.Size() > 1024 {
+ // First handle caching for the blob
+ if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
+ return
+ }
+
+ // OK not cached - serve!
+ if err := common.ServeBlob(ctx.Context, blob, lastModified); err != nil {
+ ctx.ServerError("ServeBlob", err)
+ }
+ return
+ }
+
+ // OK, now the blob is known to have at most 1024 bytes we can simply read this in in one go (This saves reading it twice)
+ dataRc, err := blob.DataAsync()
+ if err != nil {
+ ctx.ServerError("DataAsync", err)
+ return
+ }
+
+ buf, err := io.ReadAll(dataRc)
+ if err != nil {
+ _ = dataRc.Close()
+ ctx.ServerError("DataAsync", err)
+ return
+ }
+
+ if err := dataRc.Close(); err != nil {
+ log.Error("Error whilst closing blob %s reader in %-v. Error: %v", blob.ID, ctx.Context.Repo.Repository, err)
+ }
+
+ // Check if the blob represents a pointer
+ pointer, _ := lfs.ReadPointer(bytes.NewReader(buf))
+
+ // if its not a pointer just serve the data directly
+ if !pointer.IsValid() {
+ // First handle caching for the blob
+ if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
+ return
+ }
+
+ // OK not cached - serve!
+ if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil {
+ ctx.ServerError("ServeBlob", err)
+ }
+ return
+ }
+
+ // Now check if there is a meta object for this pointer
+ meta, err := models.GetLFSMetaObjectByOid(ctx.Repo.Repository.ID, pointer.Oid)
+
+ // If there isn't one just serve the data directly
+ if err == models.ErrLFSObjectNotExist {
+ // Handle caching for the blob SHA (not the LFS object OID)
+ if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
+ return
+ }
+
+ if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil {
+ ctx.ServerError("ServeBlob", err)
+ }
+ return
+ } else if err != nil {
+ ctx.ServerError("GetLFSMetaObjectByOid", err)
+ return
+ }
+
+ // Handle caching for the LFS object OID
+ if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+pointer.Oid+`"`) {
+ return
+ }
+
+ if setting.LFS.ServeDirect {
+ // If we have a signed url (S3, object storage), redirect to this directly.
+ u, err := storage.LFS.URL(pointer.RelativePath(), blob.Name())
+ if u != nil && err == nil {
+ ctx.Redirect(u.String())
+ return
+ }
+ }
+
+ lfsDataRc, err := lfs.ReadMetaObject(meta.Pointer)
+ if err != nil {
+ ctx.ServerError("ReadMetaObject", err)
+ return
+ }
+ defer lfsDataRc.Close()
+
+ if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, meta.Size, lfsDataRc); err != nil {
+ ctx.ServerError("ServeData", err)
+ }
+}
+
func getBlobForEntry(ctx *context.APIContext) (blob *git.Blob, lastModified time.Time) {
entry, err := ctx.Repo.Commit.GetTreeEntryByPath(ctx.Repo.TreePath)
if err != nil {
diff --git a/templates/swagger/v1_json.tmpl b/templates/swagger/v1_json.tmpl
index d63cde60ec..c23bcb2e9a 100644
--- a/templates/swagger/v1_json.tmpl
+++ b/templates/swagger/v1_json.tmpl
@@ -7150,6 +7150,52 @@
}
}
},
+ "/repos/{owner}/{repo}/media/{filepath}": {
+ "get": {
+ "tags": [
+ "repository"
+ ],
+ "summary": "Get a file or it's LFS object from a repository",
+ "operationId": "repoGetRawFileOrLFS",
+ "parameters": [
+ {
+ "type": "string",
+ "description": "owner of the repo",
+ "name": "owner",
+ "in": "path",
+ "required": true
+ },
+ {
+ "type": "string",
+ "description": "name of the repo",
+ "name": "repo",
+ "in": "path",
+ "required": true
+ },
+ {
+ "type": "string",
+ "description": "filepath of the file to get",
+ "name": "filepath",
+ "in": "path",
+ "required": true
+ },
+ {
+ "type": "string",
+ "description": "The name of the commit/branch/tag. Default the repository’s default branch (usually master)",
+ "name": "ref",
+ "in": "query"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Returns raw file content."
+ },
+ "404": {
+ "$ref": "#/responses/notFound"
+ }
+ }
+ }
+ },
"/repos/{owner}/{repo}/milestones": {
"get": {
"produces": [