123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532 |
- package gziphandler // import "github.com/NYTimes/gziphandler"
-
- import (
- "bufio"
- "compress/gzip"
- "fmt"
- "io"
- "mime"
- "net"
- "net/http"
- "strconv"
- "strings"
- "sync"
- )
-
- const (
- vary = "Vary"
- acceptEncoding = "Accept-Encoding"
- contentEncoding = "Content-Encoding"
- contentType = "Content-Type"
- contentLength = "Content-Length"
- )
-
- type codings map[string]float64
-
- const (
- // DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
- // This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
- // The examples seem to indicate that it is.
- DefaultQValue = 1.0
-
- // DefaultMinSize is the default minimum size until we enable gzip compression.
- // 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
- // If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
- // That being the case, you should restrict the gzip compression to files with a size greater than a single packet, 1400 bytes (1.4KB) is a safe value.
- DefaultMinSize = 1400
- )
-
- // gzipWriterPools stores a sync.Pool for each compression level for reuse of
- // gzip.Writers. Use poolIndex to covert a compression level to an index into
- // gzipWriterPools.
- var gzipWriterPools [gzip.BestCompression - gzip.BestSpeed + 2]*sync.Pool
-
- func init() {
- for i := gzip.BestSpeed; i <= gzip.BestCompression; i++ {
- addLevelPool(i)
- }
- addLevelPool(gzip.DefaultCompression)
- }
-
- // poolIndex maps a compression level to its index into gzipWriterPools. It
- // assumes that level is a valid gzip compression level.
- func poolIndex(level int) int {
- // gzip.DefaultCompression == -1, so we need to treat it special.
- if level == gzip.DefaultCompression {
- return gzip.BestCompression - gzip.BestSpeed + 1
- }
- return level - gzip.BestSpeed
- }
-
- func addLevelPool(level int) {
- gzipWriterPools[poolIndex(level)] = &sync.Pool{
- New: func() interface{} {
- // NewWriterLevel only returns error on a bad level, we are guaranteeing
- // that this will be a valid level so it is okay to ignore the returned
- // error.
- w, _ := gzip.NewWriterLevel(nil, level)
- return w
- },
- }
- }
-
- // GzipResponseWriter provides an http.ResponseWriter interface, which gzips
- // bytes before writing them to the underlying response. This doesn't close the
- // writers, so don't forget to do that.
- // It can be configured to skip response smaller than minSize.
- type GzipResponseWriter struct {
- http.ResponseWriter
- index int // Index for gzipWriterPools.
- gw *gzip.Writer
-
- code int // Saves the WriteHeader value.
-
- minSize int // Specifed the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
- buf []byte // Holds the first part of the write before reaching the minSize or the end of the write.
- ignore bool // If true, then we immediately passthru writes to the underlying ResponseWriter.
-
- contentTypes []parsedContentType // Only compress if the response is one of these content-types. All are accepted if empty.
- }
-
- type GzipResponseWriterWithCloseNotify struct {
- *GzipResponseWriter
- }
-
- func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
- return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
- }
-
- // Write appends data to the gzip writer.
- func (w *GzipResponseWriter) Write(b []byte) (int, error) {
- // GZIP responseWriter is initialized. Use the GZIP responseWriter.
- if w.gw != nil {
- return w.gw.Write(b)
- }
-
- // If we have already decided not to use GZIP, immediately passthrough.
- if w.ignore {
- return w.ResponseWriter.Write(b)
- }
-
- // Save the write into a buffer for later use in GZIP responseWriter (if content is long enough) or at close with regular responseWriter.
- // On the first write, w.buf changes from nil to a valid slice
- w.buf = append(w.buf, b...)
-
- var (
- cl, _ = strconv.Atoi(w.Header().Get(contentLength))
- ct = w.Header().Get(contentType)
- ce = w.Header().Get(contentEncoding)
- )
- // Only continue if they didn't already choose an encoding or a known unhandled content length or type.
- if ce == "" && (cl == 0 || cl >= w.minSize) && (ct == "" || handleContentType(w.contentTypes, ct)) {
- // If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
- if len(w.buf) < w.minSize && cl == 0 {
- return len(b), nil
- }
- // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
- if cl >= w.minSize || len(w.buf) >= w.minSize {
- // If a Content-Type wasn't specified, infer it from the current buffer.
- if ct == "" {
- ct = http.DetectContentType(w.buf)
- w.Header().Set(contentType, ct)
- }
- // If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
- if handleContentType(w.contentTypes, ct) {
- if err := w.startGzip(); err != nil {
- return 0, err
- }
- return len(b), nil
- }
- }
- }
- // If we got here, we should not GZIP this response.
- if err := w.startPlain(); err != nil {
- return 0, err
- }
- return len(b), nil
- }
-
- // startGzip initializes a GZIP writer and writes the buffer.
- func (w *GzipResponseWriter) startGzip() error {
- // Set the GZIP header.
- w.Header().Set(contentEncoding, "gzip")
-
- // if the Content-Length is already set, then calls to Write on gzip
- // will fail to set the Content-Length header since its already set
- // See: https://github.com/golang/go/issues/14975.
- w.Header().Del(contentLength)
-
- // Write the header to gzip response.
- if w.code != 0 {
- w.ResponseWriter.WriteHeader(w.code)
- // Ensure that no other WriteHeader's happen
- w.code = 0
- }
-
- // Initialize and flush the buffer into the gzip response if there are any bytes.
- // If there aren't any, we shouldn't initialize it yet because on Close it will
- // write the gzip header even if nothing was ever written.
- if len(w.buf) > 0 {
- // Initialize the GZIP response.
- w.init()
- n, err := w.gw.Write(w.buf)
-
- // This should never happen (per io.Writer docs), but if the write didn't
- // accept the entire buffer but returned no specific error, we have no clue
- // what's going on, so abort just to be safe.
- if err == nil && n < len(w.buf) {
- err = io.ErrShortWrite
- }
- return err
- }
- return nil
- }
-
- // startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
- func (w *GzipResponseWriter) startPlain() error {
- if w.code != 0 {
- w.ResponseWriter.WriteHeader(w.code)
- // Ensure that no other WriteHeader's happen
- w.code = 0
- }
- w.ignore = true
- // If Write was never called then don't call Write on the underlying ResponseWriter.
- if w.buf == nil {
- return nil
- }
- n, err := w.ResponseWriter.Write(w.buf)
- w.buf = nil
- // This should never happen (per io.Writer docs), but if the write didn't
- // accept the entire buffer but returned no specific error, we have no clue
- // what's going on, so abort just to be safe.
- if err == nil && n < len(w.buf) {
- err = io.ErrShortWrite
- }
- return err
- }
-
- // WriteHeader just saves the response code until close or GZIP effective writes.
- func (w *GzipResponseWriter) WriteHeader(code int) {
- if w.code == 0 {
- w.code = code
- }
- }
-
- // init graps a new gzip writer from the gzipWriterPool and writes the correct
- // content encoding header.
- func (w *GzipResponseWriter) init() {
- // Bytes written during ServeHTTP are redirected to this gzip writer
- // before being written to the underlying response.
- gzw := gzipWriterPools[w.index].Get().(*gzip.Writer)
- gzw.Reset(w.ResponseWriter)
- w.gw = gzw
- }
-
- // Close will close the gzip.Writer and will put it back in the gzipWriterPool.
- func (w *GzipResponseWriter) Close() error {
- if w.ignore {
- return nil
- }
-
- if w.gw == nil {
- // GZIP not triggered yet, write out regular response.
- err := w.startPlain()
- // Returns the error if any at write.
- if err != nil {
- err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
- }
- return err
- }
-
- err := w.gw.Close()
- gzipWriterPools[w.index].Put(w.gw)
- w.gw = nil
- return err
- }
-
- // Flush flushes the underlying *gzip.Writer and then the underlying
- // http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
- // an http.Flusher.
- func (w *GzipResponseWriter) Flush() {
- if w.gw == nil && !w.ignore {
- // Only flush once startGzip or startPlain has been called.
- //
- // Flush is thus a no-op until we're certain whether a plain
- // or gzipped response will be served.
- return
- }
-
- if w.gw != nil {
- w.gw.Flush()
- }
-
- if fw, ok := w.ResponseWriter.(http.Flusher); ok {
- fw.Flush()
- }
- }
-
- // Hijack implements http.Hijacker. If the underlying ResponseWriter is a
- // Hijacker, its Hijack method is returned. Otherwise an error is returned.
- func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
- if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
- return hj.Hijack()
- }
- return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
- }
-
- // verify Hijacker interface implementation
- var _ http.Hijacker = &GzipResponseWriter{}
-
- // MustNewGzipLevelHandler behaves just like NewGzipLevelHandler except that in
- // an error case it panics rather than returning an error.
- func MustNewGzipLevelHandler(level int) func(http.Handler) http.Handler {
- wrap, err := NewGzipLevelHandler(level)
- if err != nil {
- panic(err)
- }
- return wrap
- }
-
- // NewGzipLevelHandler returns a wrapper function (often known as middleware)
- // which can be used to wrap an HTTP handler to transparently gzip the response
- // body if the client supports it (via the Accept-Encoding header). Responses will
- // be encoded at the given gzip compression level. An error will be returned only
- // if an invalid gzip compression level is given, so if one can ensure the level
- // is valid, the returned error can be safely ignored.
- func NewGzipLevelHandler(level int) (func(http.Handler) http.Handler, error) {
- return NewGzipLevelAndMinSize(level, DefaultMinSize)
- }
-
- // NewGzipLevelAndMinSize behave as NewGzipLevelHandler except it let the caller
- // specify the minimum size before compression.
- func NewGzipLevelAndMinSize(level, minSize int) (func(http.Handler) http.Handler, error) {
- return GzipHandlerWithOpts(CompressionLevel(level), MinSize(minSize))
- }
-
- func GzipHandlerWithOpts(opts ...option) (func(http.Handler) http.Handler, error) {
- c := &config{
- level: gzip.DefaultCompression,
- minSize: DefaultMinSize,
- }
-
- for _, o := range opts {
- o(c)
- }
-
- if err := c.validate(); err != nil {
- return nil, err
- }
-
- return func(h http.Handler) http.Handler {
- index := poolIndex(c.level)
-
- return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- w.Header().Add(vary, acceptEncoding)
- if acceptsGzip(r) {
- gw := &GzipResponseWriter{
- ResponseWriter: w,
- index: index,
- minSize: c.minSize,
- contentTypes: c.contentTypes,
- }
- defer gw.Close()
-
- if _, ok := w.(http.CloseNotifier); ok {
- gwcn := GzipResponseWriterWithCloseNotify{gw}
- h.ServeHTTP(gwcn, r)
- } else {
- h.ServeHTTP(gw, r)
- }
-
- } else {
- h.ServeHTTP(w, r)
- }
- })
- }, nil
- }
-
- // Parsed representation of one of the inputs to ContentTypes.
- // See https://golang.org/pkg/mime/#ParseMediaType
- type parsedContentType struct {
- mediaType string
- params map[string]string
- }
-
- // equals returns whether this content type matches another content type.
- func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
- if pct.mediaType != mediaType {
- return false
- }
- // if pct has no params, don't care about other's params
- if len(pct.params) == 0 {
- return true
- }
-
- // if pct has any params, they must be identical to other's.
- if len(pct.params) != len(params) {
- return false
- }
- for k, v := range pct.params {
- if w, ok := params[k]; !ok || v != w {
- return false
- }
- }
- return true
- }
-
- // Used for functional configuration.
- type config struct {
- minSize int
- level int
- contentTypes []parsedContentType
- }
-
- func (c *config) validate() error {
- if c.level != gzip.DefaultCompression && (c.level < gzip.BestSpeed || c.level > gzip.BestCompression) {
- return fmt.Errorf("invalid compression level requested: %d", c.level)
- }
-
- if c.minSize < 0 {
- return fmt.Errorf("minimum size must be more than zero")
- }
-
- return nil
- }
-
- type option func(c *config)
-
- func MinSize(size int) option {
- return func(c *config) {
- c.minSize = size
- }
- }
-
- func CompressionLevel(level int) option {
- return func(c *config) {
- c.level = level
- }
- }
-
- // ContentTypes specifies a list of content types to compare
- // the Content-Type header to before compressing. If none
- // match, the response will be returned as-is.
- //
- // Content types are compared in a case-insensitive, whitespace-ignored
- // manner.
- //
- // A MIME type without any other directive will match a content type
- // that has the same MIME type, regardless of that content type's other
- // directives. I.e., "text/html" will match both "text/html" and
- // "text/html; charset=utf-8".
- //
- // A MIME type with any other directive will only match a content type
- // that has the same MIME type and other directives. I.e.,
- // "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
- //
- // By default, responses are gzipped regardless of
- // Content-Type.
- func ContentTypes(types []string) option {
- return func(c *config) {
- c.contentTypes = []parsedContentType{}
- for _, v := range types {
- mediaType, params, err := mime.ParseMediaType(v)
- if err == nil {
- c.contentTypes = append(c.contentTypes, parsedContentType{mediaType, params})
- }
- }
- }
- }
-
- // GzipHandler wraps an HTTP handler, to transparently gzip the response body if
- // the client supports it (via the Accept-Encoding header). This will compress at
- // the default compression level.
- func GzipHandler(h http.Handler) http.Handler {
- wrapper, _ := NewGzipLevelHandler(gzip.DefaultCompression)
- return wrapper(h)
- }
-
- // acceptsGzip returns true if the given HTTP request indicates that it will
- // accept a gzipped response.
- func acceptsGzip(r *http.Request) bool {
- acceptedEncodings, _ := parseEncodings(r.Header.Get(acceptEncoding))
- return acceptedEncodings["gzip"] > 0.0
- }
-
- // returns true if we've been configured to compress the specific content type.
- func handleContentType(contentTypes []parsedContentType, ct string) bool {
- // If contentTypes is empty we handle all content types.
- if len(contentTypes) == 0 {
- return true
- }
-
- mediaType, params, err := mime.ParseMediaType(ct)
- if err != nil {
- return false
- }
-
- for _, c := range contentTypes {
- if c.equals(mediaType, params) {
- return true
- }
- }
-
- return false
- }
-
- // parseEncodings attempts to parse a list of codings, per RFC 2616, as might
- // appear in an Accept-Encoding header. It returns a map of content-codings to
- // quality values, and an error containing the errors encountered. It's probably
- // safe to ignore those, because silently ignoring errors is how the internet
- // works.
- //
- // See: http://tools.ietf.org/html/rfc2616#section-14.3.
- func parseEncodings(s string) (codings, error) {
- c := make(codings)
- var e []string
-
- for _, ss := range strings.Split(s, ",") {
- coding, qvalue, err := parseCoding(ss)
-
- if err != nil {
- e = append(e, err.Error())
- } else {
- c[coding] = qvalue
- }
- }
-
- // TODO (adammck): Use a proper multi-error struct, so the individual errors
- // can be extracted if anyone cares.
- if len(e) > 0 {
- return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
- }
-
- return c, nil
- }
-
- // parseCoding parses a single conding (content-coding with an optional qvalue),
- // as might appear in an Accept-Encoding header. It attempts to forgive minor
- // formatting errors.
- func parseCoding(s string) (coding string, qvalue float64, err error) {
- for n, part := range strings.Split(s, ";") {
- part = strings.TrimSpace(part)
- qvalue = DefaultQValue
-
- if n == 0 {
- coding = strings.ToLower(part)
- } else if strings.HasPrefix(part, "q=") {
- qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
-
- if qvalue < 0.0 {
- qvalue = 0.0
- } else if qvalue > 1.0 {
- qvalue = 1.0
- }
- }
- }
-
- if coding == "" {
- err = fmt.Errorf("empty content-coding")
- }
-
- return
- }
|