You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

gzip.go 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. package gziphandler // import "github.com/NYTimes/gziphandler"
  2. import (
  3. "bufio"
  4. "compress/gzip"
  5. "fmt"
  6. "io"
  7. "mime"
  8. "net"
  9. "net/http"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. )
  14. const (
  15. vary = "Vary"
  16. acceptEncoding = "Accept-Encoding"
  17. contentEncoding = "Content-Encoding"
  18. contentType = "Content-Type"
  19. contentLength = "Content-Length"
  20. )
  21. type codings map[string]float64
  22. const (
  23. // DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
  24. // This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
  25. // The examples seem to indicate that it is.
  26. DefaultQValue = 1.0
  27. // DefaultMinSize is the default minimum size until we enable gzip compression.
  28. // 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
  29. // If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
  30. // That being the case, you should restrict the gzip compression to files with a size greater than a single packet, 1400 bytes (1.4KB) is a safe value.
  31. DefaultMinSize = 1400
  32. )
  33. // gzipWriterPools stores a sync.Pool for each compression level for reuse of
  34. // gzip.Writers. Use poolIndex to covert a compression level to an index into
  35. // gzipWriterPools.
  36. var gzipWriterPools [gzip.BestCompression - gzip.BestSpeed + 2]*sync.Pool
  37. func init() {
  38. for i := gzip.BestSpeed; i <= gzip.BestCompression; i++ {
  39. addLevelPool(i)
  40. }
  41. addLevelPool(gzip.DefaultCompression)
  42. }
  43. // poolIndex maps a compression level to its index into gzipWriterPools. It
  44. // assumes that level is a valid gzip compression level.
  45. func poolIndex(level int) int {
  46. // gzip.DefaultCompression == -1, so we need to treat it special.
  47. if level == gzip.DefaultCompression {
  48. return gzip.BestCompression - gzip.BestSpeed + 1
  49. }
  50. return level - gzip.BestSpeed
  51. }
  52. func addLevelPool(level int) {
  53. gzipWriterPools[poolIndex(level)] = &sync.Pool{
  54. New: func() interface{} {
  55. // NewWriterLevel only returns error on a bad level, we are guaranteeing
  56. // that this will be a valid level so it is okay to ignore the returned
  57. // error.
  58. w, _ := gzip.NewWriterLevel(nil, level)
  59. return w
  60. },
  61. }
  62. }
  63. // GzipResponseWriter provides an http.ResponseWriter interface, which gzips
  64. // bytes before writing them to the underlying response. This doesn't close the
  65. // writers, so don't forget to do that.
  66. // It can be configured to skip response smaller than minSize.
  67. type GzipResponseWriter struct {
  68. http.ResponseWriter
  69. index int // Index for gzipWriterPools.
  70. gw *gzip.Writer
  71. code int // Saves the WriteHeader value.
  72. minSize int // Specifed the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
  73. buf []byte // Holds the first part of the write before reaching the minSize or the end of the write.
  74. ignore bool // If true, then we immediately passthru writes to the underlying ResponseWriter.
  75. contentTypes []parsedContentType // Only compress if the response is one of these content-types. All are accepted if empty.
  76. }
  77. type GzipResponseWriterWithCloseNotify struct {
  78. *GzipResponseWriter
  79. }
  80. func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
  81. return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
  82. }
  83. // Write appends data to the gzip writer.
  84. func (w *GzipResponseWriter) Write(b []byte) (int, error) {
  85. // GZIP responseWriter is initialized. Use the GZIP responseWriter.
  86. if w.gw != nil {
  87. return w.gw.Write(b)
  88. }
  89. // If we have already decided not to use GZIP, immediately passthrough.
  90. if w.ignore {
  91. return w.ResponseWriter.Write(b)
  92. }
  93. // Save the write into a buffer for later use in GZIP responseWriter (if content is long enough) or at close with regular responseWriter.
  94. // On the first write, w.buf changes from nil to a valid slice
  95. w.buf = append(w.buf, b...)
  96. var (
  97. cl, _ = strconv.Atoi(w.Header().Get(contentLength))
  98. ct = w.Header().Get(contentType)
  99. ce = w.Header().Get(contentEncoding)
  100. )
  101. // Only continue if they didn't already choose an encoding or a known unhandled content length or type.
  102. if ce == "" && (cl == 0 || cl >= w.minSize) && (ct == "" || handleContentType(w.contentTypes, ct)) {
  103. // If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
  104. if len(w.buf) < w.minSize && cl == 0 {
  105. return len(b), nil
  106. }
  107. // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
  108. if cl >= w.minSize || len(w.buf) >= w.minSize {
  109. // If a Content-Type wasn't specified, infer it from the current buffer.
  110. if ct == "" {
  111. ct = http.DetectContentType(w.buf)
  112. w.Header().Set(contentType, ct)
  113. }
  114. // If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
  115. if handleContentType(w.contentTypes, ct) {
  116. if err := w.startGzip(); err != nil {
  117. return 0, err
  118. }
  119. return len(b), nil
  120. }
  121. }
  122. }
  123. // If we got here, we should not GZIP this response.
  124. if err := w.startPlain(); err != nil {
  125. return 0, err
  126. }
  127. return len(b), nil
  128. }
  129. // startGzip initializes a GZIP writer and writes the buffer.
  130. func (w *GzipResponseWriter) startGzip() error {
  131. // Set the GZIP header.
  132. w.Header().Set(contentEncoding, "gzip")
  133. // if the Content-Length is already set, then calls to Write on gzip
  134. // will fail to set the Content-Length header since its already set
  135. // See: https://github.com/golang/go/issues/14975.
  136. w.Header().Del(contentLength)
  137. // Write the header to gzip response.
  138. if w.code != 0 {
  139. w.ResponseWriter.WriteHeader(w.code)
  140. // Ensure that no other WriteHeader's happen
  141. w.code = 0
  142. }
  143. // Initialize and flush the buffer into the gzip response if there are any bytes.
  144. // If there aren't any, we shouldn't initialize it yet because on Close it will
  145. // write the gzip header even if nothing was ever written.
  146. if len(w.buf) > 0 {
  147. // Initialize the GZIP response.
  148. w.init()
  149. n, err := w.gw.Write(w.buf)
  150. // This should never happen (per io.Writer docs), but if the write didn't
  151. // accept the entire buffer but returned no specific error, we have no clue
  152. // what's going on, so abort just to be safe.
  153. if err == nil && n < len(w.buf) {
  154. err = io.ErrShortWrite
  155. }
  156. return err
  157. }
  158. return nil
  159. }
  160. // startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
  161. func (w *GzipResponseWriter) startPlain() error {
  162. if w.code != 0 {
  163. w.ResponseWriter.WriteHeader(w.code)
  164. // Ensure that no other WriteHeader's happen
  165. w.code = 0
  166. }
  167. w.ignore = true
  168. // If Write was never called then don't call Write on the underlying ResponseWriter.
  169. if w.buf == nil {
  170. return nil
  171. }
  172. n, err := w.ResponseWriter.Write(w.buf)
  173. w.buf = nil
  174. // This should never happen (per io.Writer docs), but if the write didn't
  175. // accept the entire buffer but returned no specific error, we have no clue
  176. // what's going on, so abort just to be safe.
  177. if err == nil && n < len(w.buf) {
  178. err = io.ErrShortWrite
  179. }
  180. return err
  181. }
  182. // WriteHeader just saves the response code until close or GZIP effective writes.
  183. func (w *GzipResponseWriter) WriteHeader(code int) {
  184. if w.code == 0 {
  185. w.code = code
  186. }
  187. }
  188. // init graps a new gzip writer from the gzipWriterPool and writes the correct
  189. // content encoding header.
  190. func (w *GzipResponseWriter) init() {
  191. // Bytes written during ServeHTTP are redirected to this gzip writer
  192. // before being written to the underlying response.
  193. gzw := gzipWriterPools[w.index].Get().(*gzip.Writer)
  194. gzw.Reset(w.ResponseWriter)
  195. w.gw = gzw
  196. }
  197. // Close will close the gzip.Writer and will put it back in the gzipWriterPool.
  198. func (w *GzipResponseWriter) Close() error {
  199. if w.ignore {
  200. return nil
  201. }
  202. if w.gw == nil {
  203. // GZIP not triggered yet, write out regular response.
  204. err := w.startPlain()
  205. // Returns the error if any at write.
  206. if err != nil {
  207. err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
  208. }
  209. return err
  210. }
  211. err := w.gw.Close()
  212. gzipWriterPools[w.index].Put(w.gw)
  213. w.gw = nil
  214. return err
  215. }
  216. // Flush flushes the underlying *gzip.Writer and then the underlying
  217. // http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
  218. // an http.Flusher.
  219. func (w *GzipResponseWriter) Flush() {
  220. if w.gw == nil && !w.ignore {
  221. // Only flush once startGzip or startPlain has been called.
  222. //
  223. // Flush is thus a no-op until we're certain whether a plain
  224. // or gzipped response will be served.
  225. return
  226. }
  227. if w.gw != nil {
  228. w.gw.Flush()
  229. }
  230. if fw, ok := w.ResponseWriter.(http.Flusher); ok {
  231. fw.Flush()
  232. }
  233. }
  234. // Hijack implements http.Hijacker. If the underlying ResponseWriter is a
  235. // Hijacker, its Hijack method is returned. Otherwise an error is returned.
  236. func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
  237. if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
  238. return hj.Hijack()
  239. }
  240. return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
  241. }
  242. // verify Hijacker interface implementation
  243. var _ http.Hijacker = &GzipResponseWriter{}
  244. // MustNewGzipLevelHandler behaves just like NewGzipLevelHandler except that in
  245. // an error case it panics rather than returning an error.
  246. func MustNewGzipLevelHandler(level int) func(http.Handler) http.Handler {
  247. wrap, err := NewGzipLevelHandler(level)
  248. if err != nil {
  249. panic(err)
  250. }
  251. return wrap
  252. }
  253. // NewGzipLevelHandler returns a wrapper function (often known as middleware)
  254. // which can be used to wrap an HTTP handler to transparently gzip the response
  255. // body if the client supports it (via the Accept-Encoding header). Responses will
  256. // be encoded at the given gzip compression level. An error will be returned only
  257. // if an invalid gzip compression level is given, so if one can ensure the level
  258. // is valid, the returned error can be safely ignored.
  259. func NewGzipLevelHandler(level int) (func(http.Handler) http.Handler, error) {
  260. return NewGzipLevelAndMinSize(level, DefaultMinSize)
  261. }
  262. // NewGzipLevelAndMinSize behave as NewGzipLevelHandler except it let the caller
  263. // specify the minimum size before compression.
  264. func NewGzipLevelAndMinSize(level, minSize int) (func(http.Handler) http.Handler, error) {
  265. return GzipHandlerWithOpts(CompressionLevel(level), MinSize(minSize))
  266. }
  267. func GzipHandlerWithOpts(opts ...option) (func(http.Handler) http.Handler, error) {
  268. c := &config{
  269. level: gzip.DefaultCompression,
  270. minSize: DefaultMinSize,
  271. }
  272. for _, o := range opts {
  273. o(c)
  274. }
  275. if err := c.validate(); err != nil {
  276. return nil, err
  277. }
  278. return func(h http.Handler) http.Handler {
  279. index := poolIndex(c.level)
  280. return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  281. w.Header().Add(vary, acceptEncoding)
  282. if acceptsGzip(r) {
  283. gw := &GzipResponseWriter{
  284. ResponseWriter: w,
  285. index: index,
  286. minSize: c.minSize,
  287. contentTypes: c.contentTypes,
  288. }
  289. defer gw.Close()
  290. if _, ok := w.(http.CloseNotifier); ok {
  291. gwcn := GzipResponseWriterWithCloseNotify{gw}
  292. h.ServeHTTP(gwcn, r)
  293. } else {
  294. h.ServeHTTP(gw, r)
  295. }
  296. } else {
  297. h.ServeHTTP(w, r)
  298. }
  299. })
  300. }, nil
  301. }
  302. // Parsed representation of one of the inputs to ContentTypes.
  303. // See https://golang.org/pkg/mime/#ParseMediaType
  304. type parsedContentType struct {
  305. mediaType string
  306. params map[string]string
  307. }
  308. // equals returns whether this content type matches another content type.
  309. func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
  310. if pct.mediaType != mediaType {
  311. return false
  312. }
  313. // if pct has no params, don't care about other's params
  314. if len(pct.params) == 0 {
  315. return true
  316. }
  317. // if pct has any params, they must be identical to other's.
  318. if len(pct.params) != len(params) {
  319. return false
  320. }
  321. for k, v := range pct.params {
  322. if w, ok := params[k]; !ok || v != w {
  323. return false
  324. }
  325. }
  326. return true
  327. }
  328. // Used for functional configuration.
  329. type config struct {
  330. minSize int
  331. level int
  332. contentTypes []parsedContentType
  333. }
  334. func (c *config) validate() error {
  335. if c.level != gzip.DefaultCompression && (c.level < gzip.BestSpeed || c.level > gzip.BestCompression) {
  336. return fmt.Errorf("invalid compression level requested: %d", c.level)
  337. }
  338. if c.minSize < 0 {
  339. return fmt.Errorf("minimum size must be more than zero")
  340. }
  341. return nil
  342. }
  343. type option func(c *config)
  344. func MinSize(size int) option {
  345. return func(c *config) {
  346. c.minSize = size
  347. }
  348. }
  349. func CompressionLevel(level int) option {
  350. return func(c *config) {
  351. c.level = level
  352. }
  353. }
  354. // ContentTypes specifies a list of content types to compare
  355. // the Content-Type header to before compressing. If none
  356. // match, the response will be returned as-is.
  357. //
  358. // Content types are compared in a case-insensitive, whitespace-ignored
  359. // manner.
  360. //
  361. // A MIME type without any other directive will match a content type
  362. // that has the same MIME type, regardless of that content type's other
  363. // directives. I.e., "text/html" will match both "text/html" and
  364. // "text/html; charset=utf-8".
  365. //
  366. // A MIME type with any other directive will only match a content type
  367. // that has the same MIME type and other directives. I.e.,
  368. // "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
  369. //
  370. // By default, responses are gzipped regardless of
  371. // Content-Type.
  372. func ContentTypes(types []string) option {
  373. return func(c *config) {
  374. c.contentTypes = []parsedContentType{}
  375. for _, v := range types {
  376. mediaType, params, err := mime.ParseMediaType(v)
  377. if err == nil {
  378. c.contentTypes = append(c.contentTypes, parsedContentType{mediaType, params})
  379. }
  380. }
  381. }
  382. }
  383. // GzipHandler wraps an HTTP handler, to transparently gzip the response body if
  384. // the client supports it (via the Accept-Encoding header). This will compress at
  385. // the default compression level.
  386. func GzipHandler(h http.Handler) http.Handler {
  387. wrapper, _ := NewGzipLevelHandler(gzip.DefaultCompression)
  388. return wrapper(h)
  389. }
  390. // acceptsGzip returns true if the given HTTP request indicates that it will
  391. // accept a gzipped response.
  392. func acceptsGzip(r *http.Request) bool {
  393. acceptedEncodings, _ := parseEncodings(r.Header.Get(acceptEncoding))
  394. return acceptedEncodings["gzip"] > 0.0
  395. }
  396. // returns true if we've been configured to compress the specific content type.
  397. func handleContentType(contentTypes []parsedContentType, ct string) bool {
  398. // If contentTypes is empty we handle all content types.
  399. if len(contentTypes) == 0 {
  400. return true
  401. }
  402. mediaType, params, err := mime.ParseMediaType(ct)
  403. if err != nil {
  404. return false
  405. }
  406. for _, c := range contentTypes {
  407. if c.equals(mediaType, params) {
  408. return true
  409. }
  410. }
  411. return false
  412. }
  413. // parseEncodings attempts to parse a list of codings, per RFC 2616, as might
  414. // appear in an Accept-Encoding header. It returns a map of content-codings to
  415. // quality values, and an error containing the errors encountered. It's probably
  416. // safe to ignore those, because silently ignoring errors is how the internet
  417. // works.
  418. //
  419. // See: http://tools.ietf.org/html/rfc2616#section-14.3.
  420. func parseEncodings(s string) (codings, error) {
  421. c := make(codings)
  422. var e []string
  423. for _, ss := range strings.Split(s, ",") {
  424. coding, qvalue, err := parseCoding(ss)
  425. if err != nil {
  426. e = append(e, err.Error())
  427. } else {
  428. c[coding] = qvalue
  429. }
  430. }
  431. // TODO (adammck): Use a proper multi-error struct, so the individual errors
  432. // can be extracted if anyone cares.
  433. if len(e) > 0 {
  434. return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
  435. }
  436. return c, nil
  437. }
  438. // parseCoding parses a single conding (content-coding with an optional qvalue),
  439. // as might appear in an Accept-Encoding header. It attempts to forgive minor
  440. // formatting errors.
  441. func parseCoding(s string) (coding string, qvalue float64, err error) {
  442. for n, part := range strings.Split(s, ";") {
  443. part = strings.TrimSpace(part)
  444. qvalue = DefaultQValue
  445. if n == 0 {
  446. coding = strings.ToLower(part)
  447. } else if strings.HasPrefix(part, "q=") {
  448. qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
  449. if qvalue < 0.0 {
  450. qvalue = 0.0
  451. } else if qvalue > 1.0 {
  452. qvalue = 1.0
  453. }
  454. }
  455. }
  456. if coding == "" {
  457. err = fmt.Errorf("empty content-coding")
  458. }
  459. return
  460. }