summaryrefslogtreecommitdiffstats
path: root/modules/gzip/gzip.go
blob: 4a4a797c7aac0f2ac43524880a166cacaf1d7212 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package gzip

import (
	"bufio"
	"fmt"
	"io"
	"net"
	"net/http"
	"regexp"
	"strconv"
	"strings"
	"sync"

	"github.com/klauspost/compress/gzip"
	"gopkg.in/macaron.v1"
)

const (
	acceptEncodingHeader  = "Accept-Encoding"
	contentEncodingHeader = "Content-Encoding"
	contentLengthHeader   = "Content-Length"
	contentTypeHeader     = "Content-Type"
	rangeHeader           = "Range"
	varyHeader            = "Vary"
)

const (
	// MinSize is the minimum size of content we will compress
	MinSize = 1400
)

// noopClosers are io.Writers with a shim to prevent early closure
type noopCloser struct {
	io.Writer
}

func (noopCloser) Close() error { return nil }

// WriterPool is a gzip writer pool to reduce workload on creation of
// gzip writers
type WriterPool struct {
	pool             sync.Pool
	compressionLevel int
}

// NewWriterPool creates a new pool
func NewWriterPool(compressionLevel int) *WriterPool {
	return &WriterPool{pool: sync.Pool{
		// New will return nil, we'll manage the creation of new
		// writers in the middleware
		New: func() interface{} { return nil },
	},
		compressionLevel: compressionLevel}
}

// Get a writer from the pool - or create one if not available
func (wp *WriterPool) Get(rw macaron.ResponseWriter) *gzip.Writer {
	ret := wp.pool.Get()
	if ret == nil {
		ret, _ = gzip.NewWriterLevel(rw, wp.compressionLevel)
	} else {
		ret.(*gzip.Writer).Reset(rw)
	}
	return ret.(*gzip.Writer)
}

// Put returns a writer to the pool
func (wp *WriterPool) Put(w *gzip.Writer) {
	wp.pool.Put(w)
}

var writerPool WriterPool
var regex regexp.Regexp

// Options represents the configuration for the gzip middleware
type Options struct {
	CompressionLevel int
}

func validateCompressionLevel(level int) bool {
	return level == gzip.DefaultCompression ||
		level == gzip.ConstantCompression ||
		(level >= gzip.BestSpeed && level <= gzip.BestCompression)
}

func validate(options []Options) Options {
	// Default to level 4 compression (Best results seem to be between 4 and 6)
	opt := Options{CompressionLevel: 4}
	if len(options) > 0 {
		opt = options[0]
	}
	if !validateCompressionLevel(opt.CompressionLevel) {
		opt.CompressionLevel = 4
	}
	return opt
}

// Middleware creates a macaron.Handler to proxy the response
func Middleware(options ...Options) macaron.Handler {
	opt := validate(options)
	writerPool = *NewWriterPool(opt.CompressionLevel)
	regex := regexp.MustCompile(`bytes=(\d+)\-.*`)

	return func(ctx *macaron.Context) {
		// If the client won't accept gzip or x-gzip don't compress
		if !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "gzip") &&
			!strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "x-gzip") {
			return
		}

		// If the client is asking for a specific range of bytes - don't compress
		if rangeHdr := ctx.Req.Header.Get(rangeHeader); rangeHdr != "" {

			match := regex.FindStringSubmatch(rangeHdr)
			if match != nil && len(match) > 1 {
				return
			}
		}

		// OK we should proxy the response writer
		// We are still not necessarily going to compress...
		proxyWriter := &ProxyResponseWriter{
			ResponseWriter: ctx.Resp,
		}
		defer proxyWriter.Close()

		ctx.Resp = proxyWriter
		ctx.MapTo(proxyWriter, (*http.ResponseWriter)(nil))

		// Check if render middleware has been registered,
		// if yes, we need to modify ResponseWriter for it as well.
		if _, ok := ctx.Render.(*macaron.DummyRender); !ok {
			ctx.Render.SetResponseWriter(proxyWriter)
		}

		ctx.Next()
	}
}

// ProxyResponseWriter is a wrapped macaron ResponseWriter that may compress its contents
type ProxyResponseWriter struct {
	writer io.WriteCloser
	macaron.ResponseWriter
	stopped bool

	code int
	buf  []byte
}

// Write appends data to the proxied gzip writer.
func (proxy *ProxyResponseWriter) Write(b []byte) (int, error) {
	// if writer is initialized, use the writer
	if proxy.writer != nil {
		return proxy.writer.Write(b)
	}

	proxy.buf = append(proxy.buf, b...)

	var (
		contentLength, _ = strconv.Atoi(proxy.Header().Get(contentLengthHeader))
		contentType      = proxy.Header().Get(contentTypeHeader)
		contentEncoding  = proxy.Header().Get(contentEncodingHeader)
	)

	// OK if an encoding hasn't been chosen, and content length > 1400
	// and content type isn't a compressed type
	if contentEncoding == "" &&
		(contentLength == 0 || contentLength >= MinSize) &&
		(contentType == "" || !compressedContentType(contentType)) {
		// If current buffer is less than the min size and a Content-Length isn't set, then wait
		if len(proxy.buf) < MinSize && contentLength == 0 {
			return len(b), nil
		}

		// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
		if contentLength >= MinSize || len(proxy.buf) >= MinSize {
			// if we don't know the content type, infer it
			if contentType == "" {
				contentType = http.DetectContentType(proxy.buf)
				proxy.Header().Set(contentTypeHeader, contentType)
			}
			// If the Content-Type is not compressed - Compress!
			if !compressedContentType(contentType) {
				if err := proxy.startGzip(); err != nil {
					return 0, err
				}
				return len(b), nil
			}
		}
	}
	// If we got here, we should not GZIP this response.
	if err := proxy.startPlain(); err != nil {
		return 0, err
	}
	return len(b), nil
}

func (proxy *ProxyResponseWriter) startGzip() error {
	// Set the content-encoding and vary headers.
	proxy.Header().Set(contentEncodingHeader, "gzip")
	proxy.Header().Set(varyHeader, acceptEncodingHeader)

	// if the Content-Length is already set, then calls to Write on gzip
	// will fail to set the Content-Length header since its already set
	// See: https://github.com/golang/go/issues/14975.
	proxy.Header().Del(contentLengthHeader)

	// Write the header to gzip response.
	if proxy.code != 0 {
		proxy.ResponseWriter.WriteHeader(proxy.code)
		// Ensure that no other WriteHeader's happen
		proxy.code = 0
	}

	// Initialize and flush the buffer into the gzip response if there are any bytes.
	// If there aren't any, we shouldn't initialize it yet because on Close it will
	// write the gzip header even if nothing was ever written.
	if len(proxy.buf) > 0 {
		// Initialize the GZIP response.
		proxy.writer = writerPool.Get(proxy.ResponseWriter)

		return proxy.writeBuf()
	}
	return nil
}

func (proxy *ProxyResponseWriter) startPlain() error {
	if proxy.code != 0 {
		proxy.ResponseWriter.WriteHeader(proxy.code)
		proxy.code = 0
	}
	proxy.stopped = true
	proxy.writer = noopCloser{proxy.ResponseWriter}
	return proxy.writeBuf()
}

func (proxy *ProxyResponseWriter) writeBuf() error {
	if proxy.buf == nil {
		return nil
	}

	n, err := proxy.writer.Write(proxy.buf)

	// This should never happen (per io.Writer docs), but if the write didn't
	// accept the entire buffer but returned no specific error, we have no clue
	// what's going on, so abort just to be safe.
	if err == nil && n < len(proxy.buf) {
		err = io.ErrShortWrite
	}
	proxy.buf = nil
	return err
}

// WriteHeader will ensure that we have setup the writer before we write the header
func (proxy *ProxyResponseWriter) WriteHeader(code int) {
	if proxy.code == 0 {
		proxy.code = code
	}
}

// Close the writer
func (proxy *ProxyResponseWriter) Close() error {
	if proxy.stopped {
		return nil
	}

	if proxy.writer == nil {
		err := proxy.startPlain()

		if err != nil {
			err = fmt.Errorf("GzipMiddleware: write to regular responseWriter at close gets error: %q", err.Error())
		}
	}

	err := proxy.writer.Close()

	if poolWriter, ok := proxy.writer.(*gzip.Writer); ok {
		writerPool.Put(poolWriter)
	}

	proxy.writer = nil
	proxy.stopped = true
	return err
}

// Flush the writer
func (proxy *ProxyResponseWriter) Flush() {
	if proxy.writer == nil {
		return
	}

	if gw, ok := proxy.writer.(*gzip.Writer); ok {
		gw.Flush()
	}

	proxy.ResponseWriter.Flush()
}

// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
// Hijacker, its Hijack method is returned. Otherwise an error is returned.
func (proxy *ProxyResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
	hijacker, ok := proxy.ResponseWriter.(http.Hijacker)
	if !ok {
		return nil, nil, fmt.Errorf("the ResponseWriter doesn't support the Hijacker interface")
	}
	return hijacker.Hijack()
}

// verify Hijacker interface implementation
var _ http.Hijacker = &ProxyResponseWriter{}

func compressedContentType(contentType string) bool {
	switch contentType {
	case "application/zip":
		return true
	case "application/x-gzip":
		return true
	case "application/gzip":
		return true
	default:
		return false
	}
}