diff options
author | KN4CK3R <KN4CK3R@users.noreply.github.com> | 2021-06-05 14:32:19 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-05 15:32:19 +0300 |
commit | 8e262104c25d1c2578f683109e1b373aade3a17c (patch) | |
tree | 04b8fda8516498b74350bb695f230e0e1089a48d /modules/typesniffer | |
parent | 7979c3654eb91adce4fd9717d9ff891496a56ff3 (diff) | |
download | gitea-8e262104c25d1c2578f683109e1b373aade3a17c.tar.gz gitea-8e262104c25d1c2578f683109e1b373aade3a17c.zip |
Add Image Diff for SVG files (#14867)
* Added type sniffer.
* Switched content detection from base to typesniffer.
* Added GuessContentType to Blob.
* Moved image info logic to client.
Added support for SVG images in diff.
* Restore old blocked svg behaviour.
* Added missing image formats.
* Execute image diff only when container is visible.
* add margin to spinner
* improve BIN tag on image diffs
* Default to render view.
* Show image diff on incomplete diff.
Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: Lauris BH <lauris@nix.lv>
Diffstat (limited to 'modules/typesniffer')
-rw-r--r-- | modules/typesniffer/typesniffer.go | 96 | ||||
-rw-r--r-- | modules/typesniffer/typesniffer_test.go | 97 |
2 files changed, 193 insertions, 0 deletions
diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go new file mode 100644 index 0000000000..7c89f66699 --- /dev/null +++ b/modules/typesniffer/typesniffer.go @@ -0,0 +1,96 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package typesniffer + +import ( + "fmt" + "io" + "net/http" + "regexp" + "strings" +) + +// Use at most this many bytes to determine Content Type. +const sniffLen = 1024 + +// SvgMimeType MIME type of SVG images. +const SvgMimeType = "image/svg+xml" + +var svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`) +var svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`) + +// SniffedType contains informations about a blobs type. +type SniffedType struct { + contentType string +} + +// IsText etects if content format is plain text. +func (ct SniffedType) IsText() bool { + return strings.Contains(ct.contentType, "text/") +} + +// IsImage detects if data is an image format +func (ct SniffedType) IsImage() bool { + return strings.Contains(ct.contentType, "image/") +} + +// IsSvgImage detects if data is an SVG image format +func (ct SniffedType) IsSvgImage() bool { + return strings.Contains(ct.contentType, SvgMimeType) +} + +// IsPDF detects if data is a PDF format +func (ct SniffedType) IsPDF() bool { + return strings.Contains(ct.contentType, "application/pdf") +} + +// IsVideo detects if data is an video format +func (ct SniffedType) IsVideo() bool { + return strings.Contains(ct.contentType, "video/") +} + +// IsAudio detects if data is an video format +func (ct SniffedType) IsAudio() bool { + return strings.Contains(ct.contentType, "audio/") +} + +// IsRepresentableAsText returns true if file content can be represented as +// plain text or is empty. +func (ct SniffedType) IsRepresentableAsText() bool { + return ct.IsText() || ct.IsSvgImage() +} + +// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty. +func DetectContentType(data []byte) SniffedType { + if len(data) == 0 { + return SniffedType{"text/unknown"} + } + + ct := http.DetectContentType(data) + + if len(data) > sniffLen { + data = data[:sniffLen] + } + + if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) || + strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) { + // SVG is unsupported. https://github.com/golang/go/issues/15888 + ct = SvgMimeType + } + + return SniffedType{ct} +} + +// DetectContentTypeFromReader guesses the content type contained in the reader. +func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) { + buf := make([]byte, sniffLen) + n, err := r.Read(buf) + if err != nil && err != io.EOF { + return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err) + } + buf = buf[:n] + + return DetectContentType(buf), nil +} diff --git a/modules/typesniffer/typesniffer_test.go b/modules/typesniffer/typesniffer_test.go new file mode 100644 index 0000000000..a3b47c4598 --- /dev/null +++ b/modules/typesniffer/typesniffer_test.go @@ -0,0 +1,97 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package typesniffer + +import ( + "bytes" + "encoding/base64" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDetectContentTypeLongerThanSniffLen(t *testing.T) { + // Pre-condition: Shorter than sniffLen detects SVG. + assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType) + // Longer than sniffLen detects something else. + assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType) +} + +func TestIsTextFile(t *testing.T) { + assert.True(t, DetectContentType([]byte{}).IsText()) + assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText()) +} + +func TestIsSvgImage(t *testing.T) { + assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage()) + assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte("<svg/>")).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<!-- Comment --> + <svg></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<!-- Multiple --> + <!-- Comments --> + <svg></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<!-- Multiline + Comment --> + <svg></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN" + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd"> + <svg></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> + <!-- Comment --> + <svg></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> + <!-- Multiple --> + <!-- Comments --> + <svg></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> + <!-- Multline + Comment --> + <svg></svg>`)).IsSvgImage()) + assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> + <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> + <!-- Multline + Comment --> + <svg></svg>`)).IsSvgImage()) + assert.False(t, DetectContentType([]byte{}).IsSvgImage()) + assert.False(t, DetectContentType([]byte("svg")).IsSvgImage()) + assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage()) + assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage()) + assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage()) + assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage()) + assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment --> + <foo></foo>`)).IsSvgImage()) + assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> + <!-- <svg></svg> inside comment --> + <foo></foo>`)).IsSvgImage()) +} + +func TestIsPDF(t *testing.T) { + pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe") + assert.True(t, DetectContentType(pdf).IsPDF()) + assert.False(t, DetectContentType([]byte("plain text")).IsPDF()) +} + +func TestIsVideo(t *testing.T) { + mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA") + assert.True(t, DetectContentType(mp4).IsVideo()) + assert.False(t, DetectContentType([]byte("plain text")).IsVideo()) +} + +func TestIsAudio(t *testing.T) { + mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl") + assert.True(t, DetectContentType(mp3).IsAudio()) + assert.False(t, DetectContentType([]byte("plain text")).IsAudio()) +} + +func TestDetectContentTypeFromReader(t *testing.T) { + mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl") + st, err := DetectContentTypeFromReader(bytes.NewReader(mp3)) + assert.NoError(t, err) + assert.True(t, st.IsAudio()) +} |