summaryrefslogtreecommitdiffstats
path: root/modules/gzip
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2019-01-23 08:56:51 +0000
committerLauris BH <lauris@nix.lv>2019-01-23 10:56:51 +0200
commit7d434376f1504ff64f249b879634ac2eec9440da (patch)
tree3a722ee5c0ceb64fff959d0431939f8b6b3f8662 /modules/gzip
parent075649572d4472b588a8484ce3d7c37df7621f85 (diff)
downloadgitea-7d434376f1504ff64f249b879634ac2eec9440da.tar.gz
gitea-7d434376f1504ff64f249b879634ac2eec9440da.zip
Pooled and buffered gzip implementation (#5722)
* Pooled and buffered gzip implementation * Add test for gzip * Add integration test * Ensure lfs check within transaction The previous code made it possible for a race condition to occur whereby a LFSMetaObject could be checked into the database twice. We should check if the LFSMetaObject is within the database and insert it if not in one transaction. * Try to avoid primary key problem in postgres The integration tests are being affected by https://github.com/go-testfixtures/testfixtures/issues/39 if we set the primary key high enough, keep a count of this and remove at the end of each test we shouldn't be affected by this.
Diffstat (limited to 'modules/gzip')
-rw-r--r--modules/gzip/gzip.go327
-rw-r--r--modules/gzip/gzip_test.go131
2 files changed, 458 insertions, 0 deletions
diff --git a/modules/gzip/gzip.go b/modules/gzip/gzip.go
new file mode 100644
index 0000000000..4a4a797c7a
--- /dev/null
+++ b/modules/gzip/gzip.go
@@ -0,0 +1,327 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "net"
+ "net/http"
+ "regexp"
+ "strconv"
+ "strings"
+ "sync"
+
+ "github.com/klauspost/compress/gzip"
+ "gopkg.in/macaron.v1"
+)
+
+const (
+ acceptEncodingHeader = "Accept-Encoding"
+ contentEncodingHeader = "Content-Encoding"
+ contentLengthHeader = "Content-Length"
+ contentTypeHeader = "Content-Type"
+ rangeHeader = "Range"
+ varyHeader = "Vary"
+)
+
+const (
+ // MinSize is the minimum size of content we will compress
+ MinSize = 1400
+)
+
+// noopClosers are io.Writers with a shim to prevent early closure
+type noopCloser struct {
+ io.Writer
+}
+
+func (noopCloser) Close() error { return nil }
+
+// WriterPool is a gzip writer pool to reduce workload on creation of
+// gzip writers
+type WriterPool struct {
+ pool sync.Pool
+ compressionLevel int
+}
+
+// NewWriterPool creates a new pool
+func NewWriterPool(compressionLevel int) *WriterPool {
+ return &WriterPool{pool: sync.Pool{
+ // New will return nil, we'll manage the creation of new
+ // writers in the middleware
+ New: func() interface{} { return nil },
+ },
+ compressionLevel: compressionLevel}
+}
+
+// Get a writer from the pool - or create one if not available
+func (wp *WriterPool) Get(rw macaron.ResponseWriter) *gzip.Writer {
+ ret := wp.pool.Get()
+ if ret == nil {
+ ret, _ = gzip.NewWriterLevel(rw, wp.compressionLevel)
+ } else {
+ ret.(*gzip.Writer).Reset(rw)
+ }
+ return ret.(*gzip.Writer)
+}
+
+// Put returns a writer to the pool
+func (wp *WriterPool) Put(w *gzip.Writer) {
+ wp.pool.Put(w)
+}
+
+var writerPool WriterPool
+var regex regexp.Regexp
+
+// Options represents the configuration for the gzip middleware
+type Options struct {
+ CompressionLevel int
+}
+
+func validateCompressionLevel(level int) bool {
+ return level == gzip.DefaultCompression ||
+ level == gzip.ConstantCompression ||
+ (level >= gzip.BestSpeed && level <= gzip.BestCompression)
+}
+
+func validate(options []Options) Options {
+ // Default to level 4 compression (Best results seem to be between 4 and 6)
+ opt := Options{CompressionLevel: 4}
+ if len(options) > 0 {
+ opt = options[0]
+ }
+ if !validateCompressionLevel(opt.CompressionLevel) {
+ opt.CompressionLevel = 4
+ }
+ return opt
+}
+
+// Middleware creates a macaron.Handler to proxy the response
+func Middleware(options ...Options) macaron.Handler {
+ opt := validate(options)
+ writerPool = *NewWriterPool(opt.CompressionLevel)
+ regex := regexp.MustCompile(`bytes=(\d+)\-.*`)
+
+ return func(ctx *macaron.Context) {
+ // If the client won't accept gzip or x-gzip don't compress
+ if !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "gzip") &&
+ !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "x-gzip") {
+ return
+ }
+
+ // If the client is asking for a specific range of bytes - don't compress
+ if rangeHdr := ctx.Req.Header.Get(rangeHeader); rangeHdr != "" {
+
+ match := regex.FindStringSubmatch(rangeHdr)
+ if match != nil && len(match) > 1 {
+ return
+ }
+ }
+
+ // OK we should proxy the response writer
+ // We are still not necessarily going to compress...
+ proxyWriter := &ProxyResponseWriter{
+ ResponseWriter: ctx.Resp,
+ }
+ defer proxyWriter.Close()
+
+ ctx.Resp = proxyWriter
+ ctx.MapTo(proxyWriter, (*http.ResponseWriter)(nil))
+
+ // Check if render middleware has been registered,
+ // if yes, we need to modify ResponseWriter for it as well.
+ if _, ok := ctx.Render.(*macaron.DummyRender); !ok {
+ ctx.Render.SetResponseWriter(proxyWriter)
+ }
+
+ ctx.Next()
+ }
+}
+
+// ProxyResponseWriter is a wrapped macaron ResponseWriter that may compress its contents
+type ProxyResponseWriter struct {
+ writer io.WriteCloser
+ macaron.ResponseWriter
+ stopped bool
+
+ code int
+ buf []byte
+}
+
+// Write appends data to the proxied gzip writer.
+func (proxy *ProxyResponseWriter) Write(b []byte) (int, error) {
+ // if writer is initialized, use the writer
+ if proxy.writer != nil {
+ return proxy.writer.Write(b)
+ }
+
+ proxy.buf = append(proxy.buf, b...)
+
+ var (
+ contentLength, _ = strconv.Atoi(proxy.Header().Get(contentLengthHeader))
+ contentType = proxy.Header().Get(contentTypeHeader)
+ contentEncoding = proxy.Header().Get(contentEncodingHeader)
+ )
+
+ // OK if an encoding hasn't been chosen, and content length > 1400
+ // and content type isn't a compressed type
+ if contentEncoding == "" &&
+ (contentLength == 0 || contentLength >= MinSize) &&
+ (contentType == "" || !compressedContentType(contentType)) {
+ // If current buffer is less than the min size and a Content-Length isn't set, then wait
+ if len(proxy.buf) < MinSize && contentLength == 0 {
+ return len(b), nil
+ }
+
+ // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
+ if contentLength >= MinSize || len(proxy.buf) >= MinSize {
+ // if we don't know the content type, infer it
+ if contentType == "" {
+ contentType = http.DetectContentType(proxy.buf)
+ proxy.Header().Set(contentTypeHeader, contentType)
+ }
+ // If the Content-Type is not compressed - Compress!
+ if !compressedContentType(contentType) {
+ if err := proxy.startGzip(); err != nil {
+ return 0, err
+ }
+ return len(b), nil
+ }
+ }
+ }
+ // If we got here, we should not GZIP this response.
+ if err := proxy.startPlain(); err != nil {
+ return 0, err
+ }
+ return len(b), nil
+}
+
+func (proxy *ProxyResponseWriter) startGzip() error {
+ // Set the content-encoding and vary headers.
+ proxy.Header().Set(contentEncodingHeader, "gzip")
+ proxy.Header().Set(varyHeader, acceptEncodingHeader)
+
+ // if the Content-Length is already set, then calls to Write on gzip
+ // will fail to set the Content-Length header since its already set
+ // See: https://github.com/golang/go/issues/14975.
+ proxy.Header().Del(contentLengthHeader)
+
+ // Write the header to gzip response.
+ if proxy.code != 0 {
+ proxy.ResponseWriter.WriteHeader(proxy.code)
+ // Ensure that no other WriteHeader's happen
+ proxy.code = 0
+ }
+
+ // Initialize and flush the buffer into the gzip response if there are any bytes.
+ // If there aren't any, we shouldn't initialize it yet because on Close it will
+ // write the gzip header even if nothing was ever written.
+ if len(proxy.buf) > 0 {
+ // Initialize the GZIP response.
+ proxy.writer = writerPool.Get(proxy.ResponseWriter)
+
+ return proxy.writeBuf()
+ }
+ return nil
+}
+
+func (proxy *ProxyResponseWriter) startPlain() error {
+ if proxy.code != 0 {
+ proxy.ResponseWriter.WriteHeader(proxy.code)
+ proxy.code = 0
+ }
+ proxy.stopped = true
+ proxy.writer = noopCloser{proxy.ResponseWriter}
+ return proxy.writeBuf()
+}
+
+func (proxy *ProxyResponseWriter) writeBuf() error {
+ if proxy.buf == nil {
+ return nil
+ }
+
+ n, err := proxy.writer.Write(proxy.buf)
+
+ // This should never happen (per io.Writer docs), but if the write didn't
+ // accept the entire buffer but returned no specific error, we have no clue
+ // what's going on, so abort just to be safe.
+ if err == nil && n < len(proxy.buf) {
+ err = io.ErrShortWrite
+ }
+ proxy.buf = nil
+ return err
+}
+
+// WriteHeader will ensure that we have setup the writer before we write the header
+func (proxy *ProxyResponseWriter) WriteHeader(code int) {
+ if proxy.code == 0 {
+ proxy.code = code
+ }
+}
+
+// Close the writer
+func (proxy *ProxyResponseWriter) Close() error {
+ if proxy.stopped {
+ return nil
+ }
+
+ if proxy.writer == nil {
+ err := proxy.startPlain()
+
+ if err != nil {
+ err = fmt.Errorf("GzipMiddleware: write to regular responseWriter at close gets error: %q", err.Error())
+ }
+ }
+
+ err := proxy.writer.Close()
+
+ if poolWriter, ok := proxy.writer.(*gzip.Writer); ok {
+ writerPool.Put(poolWriter)
+ }
+
+ proxy.writer = nil
+ proxy.stopped = true
+ return err
+}
+
+// Flush the writer
+func (proxy *ProxyResponseWriter) Flush() {
+ if proxy.writer == nil {
+ return
+ }
+
+ if gw, ok := proxy.writer.(*gzip.Writer); ok {
+ gw.Flush()
+ }
+
+ proxy.ResponseWriter.Flush()
+}
+
+// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
+// Hijacker, its Hijack method is returned. Otherwise an error is returned.
+func (proxy *ProxyResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
+ hijacker, ok := proxy.ResponseWriter.(http.Hijacker)
+ if !ok {
+ return nil, nil, fmt.Errorf("the ResponseWriter doesn't support the Hijacker interface")
+ }
+ return hijacker.Hijack()
+}
+
+// verify Hijacker interface implementation
+var _ http.Hijacker = &ProxyResponseWriter{}
+
+func compressedContentType(contentType string) bool {
+ switch contentType {
+ case "application/zip":
+ return true
+ case "application/x-gzip":
+ return true
+ case "application/gzip":
+ return true
+ default:
+ return false
+ }
+}
diff --git a/modules/gzip/gzip_test.go b/modules/gzip/gzip_test.go
new file mode 100644
index 0000000000..d131e240af
--- /dev/null
+++ b/modules/gzip/gzip_test.go
@@ -0,0 +1,131 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "archive/zip"
+ "bytes"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+
+ gzipp "github.com/klauspost/compress/gzip"
+ "github.com/stretchr/testify/assert"
+ macaron "gopkg.in/macaron.v1"
+)
+
+func setup(sampleResponse []byte) (*macaron.Macaron, *[]byte) {
+ m := macaron.New()
+ m.Use(Middleware())
+ m.Get("/", func() *[]byte { return &sampleResponse })
+ return m, &sampleResponse
+}
+
+func reqNoAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte) {
+ // Request without accept gzip: Should not gzip
+ resp := httptest.NewRecorder()
+ req, err := http.NewRequest("GET", "/", nil)
+ assert.NoError(t, err)
+ m.ServeHTTP(resp, req)
+
+ _, ok := resp.HeaderMap[contentEncodingHeader]
+ assert.False(t, ok)
+
+ contentEncoding := resp.Header().Get(contentEncodingHeader)
+ assert.NotContains(t, contentEncoding, "gzip")
+
+ result := resp.Body.Bytes()
+ assert.Equal(t, *sampleResponse, result)
+}
+
+func reqAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte, expectGzip bool) {
+ // Request without accept gzip: Should not gzip
+ resp := httptest.NewRecorder()
+ req, err := http.NewRequest("GET", "/", nil)
+ assert.NoError(t, err)
+ req.Header.Set(acceptEncodingHeader, "gzip")
+ m.ServeHTTP(resp, req)
+
+ _, ok := resp.HeaderMap[contentEncodingHeader]
+ assert.Equal(t, ok, expectGzip)
+
+ contentEncoding := resp.Header().Get(contentEncodingHeader)
+ if expectGzip {
+ assert.Contains(t, contentEncoding, "gzip")
+ gzippReader, err := gzipp.NewReader(resp.Body)
+ assert.NoError(t, err)
+ result, err := ioutil.ReadAll(gzippReader)
+ assert.NoError(t, err)
+ assert.Equal(t, *sampleResponse, result)
+ } else {
+ assert.NotContains(t, contentEncoding, "gzip")
+ result := resp.Body.Bytes()
+ assert.Equal(t, *sampleResponse, result)
+ }
+}
+
+func TestMiddlewareSmall(t *testing.T) {
+ m, sampleResponse := setup([]byte("Small response"))
+
+ reqNoAcceptGzip(t, m, sampleResponse)
+
+ reqAcceptGzip(t, m, sampleResponse, false)
+}
+
+func TestMiddlewareLarge(t *testing.T) {
+ b := make([]byte, MinSize+1)
+ for i := range b {
+ b[i] = byte(i % 256)
+ }
+ m, sampleResponse := setup(b)
+
+ reqNoAcceptGzip(t, m, sampleResponse)
+
+ // This should be gzipped as we accept gzip
+ reqAcceptGzip(t, m, sampleResponse, true)
+}
+
+func TestMiddlewareGzip(t *testing.T) {
+ b := make([]byte, MinSize*10)
+ for i := range b {
+ b[i] = byte(i % 256)
+ }
+ outputBuffer := bytes.NewBuffer([]byte{})
+ gzippWriter := gzipp.NewWriter(outputBuffer)
+ gzippWriter.Write(b)
+ gzippWriter.Flush()
+ gzippWriter.Close()
+ output := outputBuffer.Bytes()
+
+ m, sampleResponse := setup(output)
+
+ reqNoAcceptGzip(t, m, sampleResponse)
+
+ // This should not be gzipped even though we accept gzip
+ reqAcceptGzip(t, m, sampleResponse, false)
+}
+
+func TestMiddlewareZip(t *testing.T) {
+ b := make([]byte, MinSize*10)
+ for i := range b {
+ b[i] = byte(i % 256)
+ }
+ outputBuffer := bytes.NewBuffer([]byte{})
+ zipWriter := zip.NewWriter(outputBuffer)
+ fileWriter, err := zipWriter.Create("default")
+ assert.NoError(t, err)
+ fileWriter.Write(b)
+ //fileWriter.Close()
+ zipWriter.Close()
+ output := outputBuffer.Bytes()
+
+ m, sampleResponse := setup(output)
+
+ reqNoAcceptGzip(t, m, sampleResponse)
+
+ // This should not be gzipped even though we accept gzip
+ reqAcceptGzip(t, m, sampleResponse, false)
+}