From 7d434376f1504ff64f249b879634ac2eec9440da Mon Sep 17 00:00:00 2001 From: zeripath Date: Wed, 23 Jan 2019 08:56:51 +0000 Subject: Pooled and buffered gzip implementation (#5722) * Pooled and buffered gzip implementation * Add test for gzip * Add integration test * Ensure lfs check within transaction The previous code made it possible for a race condition to occur whereby a LFSMetaObject could be checked into the database twice. We should check if the LFSMetaObject is within the database and insert it if not in one transaction. * Try to avoid primary key problem in postgres The integration tests are being affected by https://github.com/go-testfixtures/testfixtures/issues/39 if we set the primary key high enough, keep a count of this and remove at the end of each test we shouldn't be affected by this. --- modules/gzip/gzip.go | 327 ++++++++++++++++++++++++++++++++++++++++++++++ modules/gzip/gzip_test.go | 131 +++++++++++++++++++ 2 files changed, 458 insertions(+) create mode 100644 modules/gzip/gzip.go create mode 100644 modules/gzip/gzip_test.go (limited to 'modules') diff --git a/modules/gzip/gzip.go b/modules/gzip/gzip.go new file mode 100644 index 0000000000..4a4a797c7a --- /dev/null +++ b/modules/gzip/gzip.go @@ -0,0 +1,327 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "bufio" + "fmt" + "io" + "net" + "net/http" + "regexp" + "strconv" + "strings" + "sync" + + "github.com/klauspost/compress/gzip" + "gopkg.in/macaron.v1" +) + +const ( + acceptEncodingHeader = "Accept-Encoding" + contentEncodingHeader = "Content-Encoding" + contentLengthHeader = "Content-Length" + contentTypeHeader = "Content-Type" + rangeHeader = "Range" + varyHeader = "Vary" +) + +const ( + // MinSize is the minimum size of content we will compress + MinSize = 1400 +) + +// noopClosers are io.Writers with a shim to prevent early closure +type noopCloser struct { + io.Writer +} + +func (noopCloser) Close() error { return nil } + +// WriterPool is a gzip writer pool to reduce workload on creation of +// gzip writers +type WriterPool struct { + pool sync.Pool + compressionLevel int +} + +// NewWriterPool creates a new pool +func NewWriterPool(compressionLevel int) *WriterPool { + return &WriterPool{pool: sync.Pool{ + // New will return nil, we'll manage the creation of new + // writers in the middleware + New: func() interface{} { return nil }, + }, + compressionLevel: compressionLevel} +} + +// Get a writer from the pool - or create one if not available +func (wp *WriterPool) Get(rw macaron.ResponseWriter) *gzip.Writer { + ret := wp.pool.Get() + if ret == nil { + ret, _ = gzip.NewWriterLevel(rw, wp.compressionLevel) + } else { + ret.(*gzip.Writer).Reset(rw) + } + return ret.(*gzip.Writer) +} + +// Put returns a writer to the pool +func (wp *WriterPool) Put(w *gzip.Writer) { + wp.pool.Put(w) +} + +var writerPool WriterPool +var regex regexp.Regexp + +// Options represents the configuration for the gzip middleware +type Options struct { + CompressionLevel int +} + +func validateCompressionLevel(level int) bool { + return level == gzip.DefaultCompression || + level == gzip.ConstantCompression || + (level >= gzip.BestSpeed && level <= gzip.BestCompression) +} + +func validate(options []Options) Options { + // Default to level 4 compression (Best results seem to be between 4 and 6) + opt := Options{CompressionLevel: 4} + if len(options) > 0 { + opt = options[0] + } + if !validateCompressionLevel(opt.CompressionLevel) { + opt.CompressionLevel = 4 + } + return opt +} + +// Middleware creates a macaron.Handler to proxy the response +func Middleware(options ...Options) macaron.Handler { + opt := validate(options) + writerPool = *NewWriterPool(opt.CompressionLevel) + regex := regexp.MustCompile(`bytes=(\d+)\-.*`) + + return func(ctx *macaron.Context) { + // If the client won't accept gzip or x-gzip don't compress + if !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "gzip") && + !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "x-gzip") { + return + } + + // If the client is asking for a specific range of bytes - don't compress + if rangeHdr := ctx.Req.Header.Get(rangeHeader); rangeHdr != "" { + + match := regex.FindStringSubmatch(rangeHdr) + if match != nil && len(match) > 1 { + return + } + } + + // OK we should proxy the response writer + // We are still not necessarily going to compress... + proxyWriter := &ProxyResponseWriter{ + ResponseWriter: ctx.Resp, + } + defer proxyWriter.Close() + + ctx.Resp = proxyWriter + ctx.MapTo(proxyWriter, (*http.ResponseWriter)(nil)) + + // Check if render middleware has been registered, + // if yes, we need to modify ResponseWriter for it as well. + if _, ok := ctx.Render.(*macaron.DummyRender); !ok { + ctx.Render.SetResponseWriter(proxyWriter) + } + + ctx.Next() + } +} + +// ProxyResponseWriter is a wrapped macaron ResponseWriter that may compress its contents +type ProxyResponseWriter struct { + writer io.WriteCloser + macaron.ResponseWriter + stopped bool + + code int + buf []byte +} + +// Write appends data to the proxied gzip writer. +func (proxy *ProxyResponseWriter) Write(b []byte) (int, error) { + // if writer is initialized, use the writer + if proxy.writer != nil { + return proxy.writer.Write(b) + } + + proxy.buf = append(proxy.buf, b...) + + var ( + contentLength, _ = strconv.Atoi(proxy.Header().Get(contentLengthHeader)) + contentType = proxy.Header().Get(contentTypeHeader) + contentEncoding = proxy.Header().Get(contentEncodingHeader) + ) + + // OK if an encoding hasn't been chosen, and content length > 1400 + // and content type isn't a compressed type + if contentEncoding == "" && + (contentLength == 0 || contentLength >= MinSize) && + (contentType == "" || !compressedContentType(contentType)) { + // If current buffer is less than the min size and a Content-Length isn't set, then wait + if len(proxy.buf) < MinSize && contentLength == 0 { + return len(b), nil + } + + // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue. + if contentLength >= MinSize || len(proxy.buf) >= MinSize { + // if we don't know the content type, infer it + if contentType == "" { + contentType = http.DetectContentType(proxy.buf) + proxy.Header().Set(contentTypeHeader, contentType) + } + // If the Content-Type is not compressed - Compress! + if !compressedContentType(contentType) { + if err := proxy.startGzip(); err != nil { + return 0, err + } + return len(b), nil + } + } + } + // If we got here, we should not GZIP this response. + if err := proxy.startPlain(); err != nil { + return 0, err + } + return len(b), nil +} + +func (proxy *ProxyResponseWriter) startGzip() error { + // Set the content-encoding and vary headers. + proxy.Header().Set(contentEncodingHeader, "gzip") + proxy.Header().Set(varyHeader, acceptEncodingHeader) + + // if the Content-Length is already set, then calls to Write on gzip + // will fail to set the Content-Length header since its already set + // See: https://github.com/golang/go/issues/14975. + proxy.Header().Del(contentLengthHeader) + + // Write the header to gzip response. + if proxy.code != 0 { + proxy.ResponseWriter.WriteHeader(proxy.code) + // Ensure that no other WriteHeader's happen + proxy.code = 0 + } + + // Initialize and flush the buffer into the gzip response if there are any bytes. + // If there aren't any, we shouldn't initialize it yet because on Close it will + // write the gzip header even if nothing was ever written. + if len(proxy.buf) > 0 { + // Initialize the GZIP response. + proxy.writer = writerPool.Get(proxy.ResponseWriter) + + return proxy.writeBuf() + } + return nil +} + +func (proxy *ProxyResponseWriter) startPlain() error { + if proxy.code != 0 { + proxy.ResponseWriter.WriteHeader(proxy.code) + proxy.code = 0 + } + proxy.stopped = true + proxy.writer = noopCloser{proxy.ResponseWriter} + return proxy.writeBuf() +} + +func (proxy *ProxyResponseWriter) writeBuf() error { + if proxy.buf == nil { + return nil + } + + n, err := proxy.writer.Write(proxy.buf) + + // This should never happen (per io.Writer docs), but if the write didn't + // accept the entire buffer but returned no specific error, we have no clue + // what's going on, so abort just to be safe. + if err == nil && n < len(proxy.buf) { + err = io.ErrShortWrite + } + proxy.buf = nil + return err +} + +// WriteHeader will ensure that we have setup the writer before we write the header +func (proxy *ProxyResponseWriter) WriteHeader(code int) { + if proxy.code == 0 { + proxy.code = code + } +} + +// Close the writer +func (proxy *ProxyResponseWriter) Close() error { + if proxy.stopped { + return nil + } + + if proxy.writer == nil { + err := proxy.startPlain() + + if err != nil { + err = fmt.Errorf("GzipMiddleware: write to regular responseWriter at close gets error: %q", err.Error()) + } + } + + err := proxy.writer.Close() + + if poolWriter, ok := proxy.writer.(*gzip.Writer); ok { + writerPool.Put(poolWriter) + } + + proxy.writer = nil + proxy.stopped = true + return err +} + +// Flush the writer +func (proxy *ProxyResponseWriter) Flush() { + if proxy.writer == nil { + return + } + + if gw, ok := proxy.writer.(*gzip.Writer); ok { + gw.Flush() + } + + proxy.ResponseWriter.Flush() +} + +// Hijack implements http.Hijacker. If the underlying ResponseWriter is a +// Hijacker, its Hijack method is returned. Otherwise an error is returned. +func (proxy *ProxyResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { + hijacker, ok := proxy.ResponseWriter.(http.Hijacker) + if !ok { + return nil, nil, fmt.Errorf("the ResponseWriter doesn't support the Hijacker interface") + } + return hijacker.Hijack() +} + +// verify Hijacker interface implementation +var _ http.Hijacker = &ProxyResponseWriter{} + +func compressedContentType(contentType string) bool { + switch contentType { + case "application/zip": + return true + case "application/x-gzip": + return true + case "application/gzip": + return true + default: + return false + } +} diff --git a/modules/gzip/gzip_test.go b/modules/gzip/gzip_test.go new file mode 100644 index 0000000000..d131e240af --- /dev/null +++ b/modules/gzip/gzip_test.go @@ -0,0 +1,131 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "archive/zip" + "bytes" + "io/ioutil" + "net/http" + "net/http/httptest" + "testing" + + gzipp "github.com/klauspost/compress/gzip" + "github.com/stretchr/testify/assert" + macaron "gopkg.in/macaron.v1" +) + +func setup(sampleResponse []byte) (*macaron.Macaron, *[]byte) { + m := macaron.New() + m.Use(Middleware()) + m.Get("/", func() *[]byte { return &sampleResponse }) + return m, &sampleResponse +} + +func reqNoAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte) { + // Request without accept gzip: Should not gzip + resp := httptest.NewRecorder() + req, err := http.NewRequest("GET", "/", nil) + assert.NoError(t, err) + m.ServeHTTP(resp, req) + + _, ok := resp.HeaderMap[contentEncodingHeader] + assert.False(t, ok) + + contentEncoding := resp.Header().Get(contentEncodingHeader) + assert.NotContains(t, contentEncoding, "gzip") + + result := resp.Body.Bytes() + assert.Equal(t, *sampleResponse, result) +} + +func reqAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte, expectGzip bool) { + // Request without accept gzip: Should not gzip + resp := httptest.NewRecorder() + req, err := http.NewRequest("GET", "/", nil) + assert.NoError(t, err) + req.Header.Set(acceptEncodingHeader, "gzip") + m.ServeHTTP(resp, req) + + _, ok := resp.HeaderMap[contentEncodingHeader] + assert.Equal(t, ok, expectGzip) + + contentEncoding := resp.Header().Get(contentEncodingHeader) + if expectGzip { + assert.Contains(t, contentEncoding, "gzip") + gzippReader, err := gzipp.NewReader(resp.Body) + assert.NoError(t, err) + result, err := ioutil.ReadAll(gzippReader) + assert.NoError(t, err) + assert.Equal(t, *sampleResponse, result) + } else { + assert.NotContains(t, contentEncoding, "gzip") + result := resp.Body.Bytes() + assert.Equal(t, *sampleResponse, result) + } +} + +func TestMiddlewareSmall(t *testing.T) { + m, sampleResponse := setup([]byte("Small response")) + + reqNoAcceptGzip(t, m, sampleResponse) + + reqAcceptGzip(t, m, sampleResponse, false) +} + +func TestMiddlewareLarge(t *testing.T) { + b := make([]byte, MinSize+1) + for i := range b { + b[i] = byte(i % 256) + } + m, sampleResponse := setup(b) + + reqNoAcceptGzip(t, m, sampleResponse) + + // This should be gzipped as we accept gzip + reqAcceptGzip(t, m, sampleResponse, true) +} + +func TestMiddlewareGzip(t *testing.T) { + b := make([]byte, MinSize*10) + for i := range b { + b[i] = byte(i % 256) + } + outputBuffer := bytes.NewBuffer([]byte{}) + gzippWriter := gzipp.NewWriter(outputBuffer) + gzippWriter.Write(b) + gzippWriter.Flush() + gzippWriter.Close() + output := outputBuffer.Bytes() + + m, sampleResponse := setup(output) + + reqNoAcceptGzip(t, m, sampleResponse) + + // This should not be gzipped even though we accept gzip + reqAcceptGzip(t, m, sampleResponse, false) +} + +func TestMiddlewareZip(t *testing.T) { + b := make([]byte, MinSize*10) + for i := range b { + b[i] = byte(i % 256) + } + outputBuffer := bytes.NewBuffer([]byte{}) + zipWriter := zip.NewWriter(outputBuffer) + fileWriter, err := zipWriter.Create("default") + assert.NoError(t, err) + fileWriter.Write(b) + //fileWriter.Close() + zipWriter.Close() + output := outputBuffer.Bytes() + + m, sampleResponse := setup(output) + + reqNoAcceptGzip(t, m, sampleResponse) + + // This should not be gzipped even though we accept gzip + reqAcceptGzip(t, m, sampleResponse, false) +} -- cgit v1.2.3