summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2019-01-23 08:56:51 +0000
committerLauris BH <lauris@nix.lv>2019-01-23 10:56:51 +0200
commit7d434376f1504ff64f249b879634ac2eec9440da (patch)
tree3a722ee5c0ceb64fff959d0431939f8b6b3f8662
parent075649572d4472b588a8484ce3d7c37df7621f85 (diff)
downloadgitea-7d434376f1504ff64f249b879634ac2eec9440da.tar.gz
gitea-7d434376f1504ff64f249b879634ac2eec9440da.zip
Pooled and buffered gzip implementation (#5722)
* Pooled and buffered gzip implementation * Add test for gzip * Add integration test * Ensure lfs check within transaction The previous code made it possible for a race condition to occur whereby a LFSMetaObject could be checked into the database twice. We should check if the LFSMetaObject is within the database and insert it if not in one transaction. * Try to avoid primary key problem in postgres The integration tests are being affected by https://github.com/go-testfixtures/testfixtures/issues/39 if we set the primary key high enough, keep a count of this and remove at the end of each test we shouldn't be affected by this.
-rw-r--r--integrations/lfs_getobject_test.go129
-rw-r--r--integrations/sqlite.ini1
-rw-r--r--models/lfs.go16
-rw-r--r--modules/gzip/gzip.go327
-rw-r--r--modules/gzip/gzip_test.go131
-rw-r--r--routers/routes/routes.go4
6 files changed, 598 insertions, 10 deletions
diff --git a/integrations/lfs_getobject_test.go b/integrations/lfs_getobject_test.go
new file mode 100644
index 0000000000..8f01d712a3
--- /dev/null
+++ b/integrations/lfs_getobject_test.go
@@ -0,0 +1,129 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package integrations
+
+import (
+ "archive/zip"
+ "bytes"
+ "crypto/sha256"
+ "encoding/hex"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "testing"
+
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/gzip"
+ "code.gitea.io/gitea/modules/lfs"
+ "code.gitea.io/gitea/modules/setting"
+ "github.com/stretchr/testify/assert"
+
+ gzipp "github.com/klauspost/compress/gzip"
+)
+
+func GenerateLFSOid(content io.Reader) (string, error) {
+ h := sha256.New()
+ if _, err := io.Copy(h, content); err != nil {
+ return "", err
+ }
+ sum := h.Sum(nil)
+ return hex.EncodeToString(sum), nil
+}
+
+var lfsID = int64(20000)
+
+func storeObjectInRepo(t *testing.T, repositoryID int64, content *[]byte) string {
+ oid, err := GenerateLFSOid(bytes.NewReader(*content))
+ assert.NoError(t, err)
+ var lfsMetaObject *models.LFSMetaObject
+
+ if setting.UsePostgreSQL {
+ lfsMetaObject = &models.LFSMetaObject{ID: lfsID, Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID}
+ } else {
+ lfsMetaObject = &models.LFSMetaObject{Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID}
+ }
+
+ lfsID = lfsID + 1
+ lfsMetaObject, err = models.NewLFSMetaObject(lfsMetaObject)
+ assert.NoError(t, err)
+ contentStore := &lfs.ContentStore{BasePath: setting.LFS.ContentPath}
+ if !contentStore.Exists(lfsMetaObject) {
+ err := contentStore.Put(lfsMetaObject, bytes.NewReader(*content))
+ assert.NoError(t, err)
+ }
+ return oid
+}
+
+func doLfs(t *testing.T, content *[]byte, expectGzip bool) {
+ prepareTestEnv(t)
+ repo, err := models.GetRepositoryByOwnerAndName("user2", "repo1")
+ assert.NoError(t, err)
+ oid := storeObjectInRepo(t, repo.ID, content)
+ defer repo.RemoveLFSMetaObjectByOid(oid)
+
+ session := loginUser(t, "user2")
+
+ // Request OID
+ req := NewRequest(t, "GET", "/user2/repo1.git/info/lfs/objects/"+oid+"/test")
+ req.Header.Set("Accept-Encoding", "gzip")
+ resp := session.MakeRequest(t, req, http.StatusOK)
+
+ contentEncoding := resp.Header().Get("Content-Encoding")
+ if !expectGzip || !setting.EnableGzip {
+ assert.NotContains(t, contentEncoding, "gzip")
+
+ result := resp.Body.Bytes()
+ assert.Equal(t, *content, result)
+ } else {
+ assert.Contains(t, contentEncoding, "gzip")
+ gzippReader, err := gzipp.NewReader(resp.Body)
+ assert.NoError(t, err)
+ result, err := ioutil.ReadAll(gzippReader)
+ assert.NoError(t, err)
+ assert.Equal(t, *content, result)
+ }
+
+}
+
+func TestGetLFSSmall(t *testing.T) {
+ content := []byte("A very small file\n")
+ doLfs(t, &content, false)
+}
+
+func TestGetLFSLarge(t *testing.T) {
+ content := make([]byte, gzip.MinSize*10)
+ for i := range content {
+ content[i] = byte(i % 256)
+ }
+ doLfs(t, &content, true)
+}
+
+func TestGetLFSGzip(t *testing.T) {
+ b := make([]byte, gzip.MinSize*10)
+ for i := range b {
+ b[i] = byte(i % 256)
+ }
+ outputBuffer := bytes.NewBuffer([]byte{})
+ gzippWriter := gzipp.NewWriter(outputBuffer)
+ gzippWriter.Write(b)
+ gzippWriter.Close()
+ content := outputBuffer.Bytes()
+ doLfs(t, &content, false)
+}
+
+func TestGetLFSZip(t *testing.T) {
+ b := make([]byte, gzip.MinSize*10)
+ for i := range b {
+ b[i] = byte(i % 256)
+ }
+ outputBuffer := bytes.NewBuffer([]byte{})
+ zipWriter := zip.NewWriter(outputBuffer)
+ fileWriter, err := zipWriter.Create("default")
+ assert.NoError(t, err)
+ fileWriter.Write(b)
+ zipWriter.Close()
+ content := outputBuffer.Bytes()
+ doLfs(t, &content, false)
+}
diff --git a/integrations/sqlite.ini b/integrations/sqlite.ini
index 857cfe4616..33c1015eca 100644
--- a/integrations/sqlite.ini
+++ b/integrations/sqlite.ini
@@ -30,6 +30,7 @@ LFS_CONTENT_PATH = data/lfs-sqlite
OFFLINE_MODE = false
LFS_JWT_SECRET = Tv_MjmZuHqpIY6GFl12ebgkRAMt4RlWt0v4EHKSXO0w
APP_DATA_PATH = integrations/gitea-integration-sqlite/data
+ENABLE_GZIP = true
[mailer]
ENABLED = false
diff --git a/models/lfs.go b/models/lfs.go
index 711e5b0494..39b0b2dd69 100644
--- a/models/lfs.go
+++ b/models/lfs.go
@@ -44,20 +44,20 @@ const (
func NewLFSMetaObject(m *LFSMetaObject) (*LFSMetaObject, error) {
var err error
- has, err := x.Get(m)
+ sess := x.NewSession()
+ defer sess.Close()
+ if err = sess.Begin(); err != nil {
+ return nil, err
+ }
+
+ has, err := sess.Get(m)
if err != nil {
return nil, err
}
if has {
m.Existing = true
- return m, nil
- }
-
- sess := x.NewSession()
- defer sess.Close()
- if err = sess.Begin(); err != nil {
- return nil, err
+ return m, sess.Commit()
}
if _, err = sess.Insert(m); err != nil {
diff --git a/modules/gzip/gzip.go b/modules/gzip/gzip.go
new file mode 100644
index 0000000000..4a4a797c7a
--- /dev/null
+++ b/modules/gzip/gzip.go
@@ -0,0 +1,327 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "net"
+ "net/http"
+ "regexp"
+ "strconv"
+ "strings"
+ "sync"
+
+ "github.com/klauspost/compress/gzip"
+ "gopkg.in/macaron.v1"
+)
+
+const (
+ acceptEncodingHeader = "Accept-Encoding"
+ contentEncodingHeader = "Content-Encoding"
+ contentLengthHeader = "Content-Length"
+ contentTypeHeader = "Content-Type"
+ rangeHeader = "Range"
+ varyHeader = "Vary"
+)
+
+const (
+ // MinSize is the minimum size of content we will compress
+ MinSize = 1400
+)
+
+// noopClosers are io.Writers with a shim to prevent early closure
+type noopCloser struct {
+ io.Writer
+}
+
+func (noopCloser) Close() error { return nil }
+
+// WriterPool is a gzip writer pool to reduce workload on creation of
+// gzip writers
+type WriterPool struct {
+ pool sync.Pool
+ compressionLevel int
+}
+
+// NewWriterPool creates a new pool
+func NewWriterPool(compressionLevel int) *WriterPool {
+ return &WriterPool{pool: sync.Pool{
+ // New will return nil, we'll manage the creation of new
+ // writers in the middleware
+ New: func() interface{} { return nil },
+ },
+ compressionLevel: compressionLevel}
+}
+
+// Get a writer from the pool - or create one if not available
+func (wp *WriterPool) Get(rw macaron.ResponseWriter) *gzip.Writer {
+ ret := wp.pool.Get()
+ if ret == nil {
+ ret, _ = gzip.NewWriterLevel(rw, wp.compressionLevel)
+ } else {
+ ret.(*gzip.Writer).Reset(rw)
+ }
+ return ret.(*gzip.Writer)
+}
+
+// Put returns a writer to the pool
+func (wp *WriterPool) Put(w *gzip.Writer) {
+ wp.pool.Put(w)
+}
+
+var writerPool WriterPool
+var regex regexp.Regexp
+
+// Options represents the configuration for the gzip middleware
+type Options struct {
+ CompressionLevel int
+}
+
+func validateCompressionLevel(level int) bool {
+ return level == gzip.DefaultCompression ||
+ level == gzip.ConstantCompression ||
+ (level >= gzip.BestSpeed && level <= gzip.BestCompression)
+}
+
+func validate(options []Options) Options {
+ // Default to level 4 compression (Best results seem to be between 4 and 6)
+ opt := Options{CompressionLevel: 4}
+ if len(options) > 0 {
+ opt = options[0]
+ }
+ if !validateCompressionLevel(opt.CompressionLevel) {
+ opt.CompressionLevel = 4
+ }
+ return opt
+}
+
+// Middleware creates a macaron.Handler to proxy the response
+func Middleware(options ...Options) macaron.Handler {
+ opt := validate(options)
+ writerPool = *NewWriterPool(opt.CompressionLevel)
+ regex := regexp.MustCompile(`bytes=(\d+)\-.*`)
+
+ return func(ctx *macaron.Context) {
+ // If the client won't accept gzip or x-gzip don't compress
+ if !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "gzip") &&
+ !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "x-gzip") {
+ return
+ }
+
+ // If the client is asking for a specific range of bytes - don't compress
+ if rangeHdr := ctx.Req.Header.Get(rangeHeader); rangeHdr != "" {
+
+ match := regex.FindStringSubmatch(rangeHdr)
+ if match != nil && len(match) > 1 {
+ return
+ }
+ }
+
+ // OK we should proxy the response writer
+ // We are still not necessarily going to compress...
+ proxyWriter := &ProxyResponseWriter{
+ ResponseWriter: ctx.Resp,
+ }
+ defer proxyWriter.Close()
+
+ ctx.Resp = proxyWriter
+ ctx.MapTo(proxyWriter, (*http.ResponseWriter)(nil))
+
+ // Check if render middleware has been registered,
+ // if yes, we need to modify ResponseWriter for it as well.
+ if _, ok := ctx.Render.(*macaron.DummyRender); !ok {
+ ctx.Render.SetResponseWriter(proxyWriter)
+ }
+
+ ctx.Next()
+ }
+}
+
+// ProxyResponseWriter is a wrapped macaron ResponseWriter that may compress its contents
+type ProxyResponseWriter struct {
+ writer io.WriteCloser
+ macaron.ResponseWriter
+ stopped bool
+
+ code int
+ buf []byte
+}
+
+// Write appends data to the proxied gzip writer.
+func (proxy *ProxyResponseWriter) Write(b []byte) (int, error) {
+ // if writer is initialized, use the writer
+ if proxy.writer != nil {
+ return proxy.writer.Write(b)
+ }
+
+ proxy.buf = append(proxy.buf, b...)
+
+ var (
+ contentLength, _ = strconv.Atoi(proxy.Header().Get(contentLengthHeader))
+ contentType = proxy.Header().Get(contentTypeHeader)
+ contentEncoding = proxy.Header().Get(contentEncodingHeader)
+ )
+
+ // OK if an encoding hasn't been chosen, and content length > 1400
+ // and content type isn't a compressed type
+ if contentEncoding == "" &&
+ (contentLength == 0 || contentLength >= MinSize) &&
+ (contentType == "" || !compressedContentType(contentType)) {
+ // If current buffer is less than the min size and a Content-Length isn't set, then wait
+ if len(proxy.buf) < MinSize && contentLength == 0 {
+ return len(b), nil
+ }
+
+ // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
+ if contentLength >= MinSize || len(proxy.buf) >= MinSize {
+ // if we don't know the content type, infer it
+ if contentType == "" {
+ contentType = http.DetectContentType(proxy.buf)
+ proxy.Header().Set(contentTypeHeader, contentType)
+ }
+ // If the Content-Type is not compressed - Compress!
+ if !compressedContentType(contentType) {
+ if err := proxy.startGzip(); err != nil {
+ return 0, err
+ }
+ return len(b), nil
+ }
+ }
+ }
+ // If we got here, we should not GZIP this response.
+ if err := proxy.startPlain(); err != nil {
+ return 0, err
+ }
+ return len(b), nil
+}
+
+func (proxy *ProxyResponseWriter) startGzip() error {
+ // Set the content-encoding and vary headers.
+ proxy.Header().Set(contentEncodingHeader, "gzip")
+ proxy.Header().Set(varyHeader, acceptEncodingHeader)
+
+ // if the Content-Length is already set, then calls to Write on gzip
+ // will fail to set the Content-Length header since its already set
+ // See: https://github.com/golang/go/issues/14975.
+ proxy.Header().Del(contentLengthHeader)
+
+ // Write the header to gzip response.
+ if proxy.code != 0 {
+ proxy.ResponseWriter.WriteHeader(proxy.code)
+ // Ensure that no other WriteHeader's happen
+ proxy.code = 0
+ }
+
+ // Initialize and flush the buffer into the gzip response if there are any bytes.
+ // If there aren't any, we shouldn't initialize it yet because on Close it will
+ // write the gzip header even if nothing was ever written.
+ if len(proxy.buf) > 0 {
+ // Initialize the GZIP response.
+ proxy.writer = writerPool.Get(proxy.ResponseWriter)
+
+ return proxy.writeBuf()
+ }
+ return nil
+}
+
+func (proxy *ProxyResponseWriter) startPlain() error {
+ if proxy.code != 0 {
+ proxy.ResponseWriter.WriteHeader(proxy.code)
+ proxy.code = 0
+ }
+ proxy.stopped = true
+ proxy.writer = noopCloser{proxy.ResponseWriter}
+ return proxy.writeBuf()
+}
+
+func (proxy *ProxyResponseWriter) writeBuf() error {
+ if proxy.buf == nil {
+ return nil
+ }
+
+ n, err := proxy.writer.Write(proxy.buf)
+
+ // This should never happen (per io.Writer docs), but if the write didn't
+ // accept the entire buffer but returned no specific error, we have no clue
+ // what's going on, so abort just to be safe.
+ if err == nil && n < len(proxy.buf) {
+ err = io.ErrShortWrite
+ }
+ proxy.buf = nil
+ return err
+}
+
+// WriteHeader will ensure that we have setup the writer before we write the header
+func (proxy *ProxyResponseWriter) WriteHeader(code int) {
+ if proxy.code == 0 {
+ proxy.code = code
+ }
+}
+
+// Close the writer
+func (proxy *ProxyResponseWriter) Close() error {
+ if proxy.stopped {
+ return nil
+ }
+
+ if proxy.writer == nil {
+ err := proxy.startPlain()
+
+ if err != nil {
+ err = fmt.Errorf("GzipMiddleware: write to regular responseWriter at close gets error: %q", err.Error())
+ }
+ }
+
+ err := proxy.writer.Close()
+
+ if poolWriter, ok := proxy.writer.(*gzip.Writer); ok {
+ writerPool.Put(poolWriter)
+ }
+
+ proxy.writer = nil
+ proxy.stopped = true
+ return err
+}
+
+// Flush the writer
+func (proxy *ProxyResponseWriter) Flush() {
+ if proxy.writer == nil {
+ return
+ }
+
+ if gw, ok := proxy.writer.(*gzip.Writer); ok {
+ gw.Flush()
+ }
+
+ proxy.ResponseWriter.Flush()
+}
+
+// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
+// Hijacker, its Hijack method is returned. Otherwise an error is returned.
+func (proxy *ProxyResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
+ hijacker, ok := proxy.ResponseWriter.(http.Hijacker)
+ if !ok {
+ return nil, nil, fmt.Errorf("the ResponseWriter doesn't support the Hijacker interface")
+ }
+ return hijacker.Hijack()
+}
+
+// verify Hijacker interface implementation
+var _ http.Hijacker = &ProxyResponseWriter{}
+
+func compressedContentType(contentType string) bool {
+ switch contentType {
+ case "application/zip":
+ return true
+ case "application/x-gzip":
+ return true
+ case "application/gzip":
+ return true
+ default:
+ return false
+ }
+}
diff --git a/modules/gzip/gzip_test.go b/modules/gzip/gzip_test.go
new file mode 100644
index 0000000000..d131e240af
--- /dev/null
+++ b/modules/gzip/gzip_test.go
@@ -0,0 +1,131 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "archive/zip"
+ "bytes"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+
+ gzipp "github.com/klauspost/compress/gzip"
+ "github.com/stretchr/testify/assert"
+ macaron "gopkg.in/macaron.v1"
+)
+
+func setup(sampleResponse []byte) (*macaron.Macaron, *[]byte) {
+ m := macaron.New()
+ m.Use(Middleware())
+ m.Get("/", func() *[]byte { return &sampleResponse })
+ return m, &sampleResponse
+}
+
+func reqNoAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte) {
+ // Request without accept gzip: Should not gzip
+ resp := httptest.NewRecorder()
+ req, err := http.NewRequest("GET", "/", nil)
+ assert.NoError(t, err)
+ m.ServeHTTP(resp, req)
+
+ _, ok := resp.HeaderMap[contentEncodingHeader]
+ assert.False(t, ok)
+
+ contentEncoding := resp.Header().Get(contentEncodingHeader)
+ assert.NotContains(t, contentEncoding, "gzip")
+
+ result := resp.Body.Bytes()
+ assert.Equal(t, *sampleResponse, result)
+}
+
+func reqAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte, expectGzip bool) {
+ // Request without accept gzip: Should not gzip
+ resp := httptest.NewRecorder()
+ req, err := http.NewRequest("GET", "/", nil)
+ assert.NoError(t, err)
+ req.Header.Set(acceptEncodingHeader, "gzip")
+ m.ServeHTTP(resp, req)
+
+ _, ok := resp.HeaderMap[contentEncodingHeader]
+ assert.Equal(t, ok, expectGzip)
+
+ contentEncoding := resp.Header().Get(contentEncodingHeader)
+ if expectGzip {
+ assert.Contains(t, contentEncoding, "gzip")
+ gzippReader, err := gzipp.NewReader(resp.Body)
+ assert.NoError(t, err)
+ result, err := ioutil.ReadAll(gzippReader)
+ assert.NoError(t, err)
+ assert.Equal(t, *sampleResponse, result)
+ } else {
+ assert.NotContains(t, contentEncoding, "gzip")
+ result := resp.Body.Bytes()
+ assert.Equal(t, *sampleResponse, result)
+ }
+}
+
+func TestMiddlewareSmall(t *testing.T) {
+ m, sampleResponse := setup([]byte("Small response"))
+
+ reqNoAcceptGzip(t, m, sampleResponse)
+
+ reqAcceptGzip(t, m, sampleResponse, false)
+}
+
+func TestMiddlewareLarge(t *testing.T) {
+ b := make([]byte, MinSize+1)
+ for i := range b {
+ b[i] = byte(i % 256)
+ }
+ m, sampleResponse := setup(b)
+
+ reqNoAcceptGzip(t, m, sampleResponse)
+
+ // This should be gzipped as we accept gzip
+ reqAcceptGzip(t, m, sampleResponse, true)
+}
+
+func TestMiddlewareGzip(t *testing.T) {
+ b := make([]byte, MinSize*10)
+ for i := range b {
+ b[i] = byte(i % 256)
+ }
+ outputBuffer := bytes.NewBuffer([]byte{})
+ gzippWriter := gzipp.NewWriter(outputBuffer)
+ gzippWriter.Write(b)
+ gzippWriter.Flush()
+ gzippWriter.Close()
+ output := outputBuffer.Bytes()
+
+ m, sampleResponse := setup(output)
+
+ reqNoAcceptGzip(t, m, sampleResponse)
+
+ // This should not be gzipped even though we accept gzip
+ reqAcceptGzip(t, m, sampleResponse, false)
+}
+
+func TestMiddlewareZip(t *testing.T) {
+ b := make([]byte, MinSize*10)
+ for i := range b {
+ b[i] = byte(i % 256)
+ }
+ outputBuffer := bytes.NewBuffer([]byte{})
+ zipWriter := zip.NewWriter(outputBuffer)
+ fileWriter, err := zipWriter.Create("default")
+ assert.NoError(t, err)
+ fileWriter.Write(b)
+ //fileWriter.Close()
+ zipWriter.Close()
+ output := outputBuffer.Bytes()
+
+ m, sampleResponse := setup(output)
+
+ reqNoAcceptGzip(t, m, sampleResponse)
+
+ // This should not be gzipped even though we accept gzip
+ reqAcceptGzip(t, m, sampleResponse, false)
+}
diff --git a/routers/routes/routes.go b/routers/routes/routes.go
index a3bf3f753a..c012d5c3c2 100644
--- a/routers/routes/routes.go
+++ b/routers/routes/routes.go
@@ -14,6 +14,7 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/auth"
"code.gitea.io/gitea/modules/context"
+ "code.gitea.io/gitea/modules/gzip"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/metrics"
@@ -36,7 +37,6 @@ import (
"github.com/go-macaron/cache"
"github.com/go-macaron/captcha"
"github.com/go-macaron/csrf"
- "github.com/go-macaron/gzip"
"github.com/go-macaron/i18n"
"github.com/go-macaron/session"
"github.com/go-macaron/toolbox"
@@ -54,7 +54,7 @@ func NewMacaron() *macaron.Macaron {
}
m.Use(macaron.Recovery())
if setting.EnableGzip {
- m.Use(gzip.Gziper())
+ m.Use(gzip.Middleware())
}
if setting.Protocol == setting.FCGI {
m.SetURLPrefix(setting.AppSubURL)