summaryrefslogtreecommitdiffstats
path: root/modules/typesniffer
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2023-03-07 20:11:24 +0800
committerGitHub <noreply@github.com>2023-03-07 20:11:24 +0800
commit4c59c8c7682da31410decba3bd868fde5116e073 (patch)
tree3503efe79ceb5a266edf86ed23ce5ab3ba07975e /modules/typesniffer
parentc84238800bb743181582f043ece9b44fef233c95 (diff)
downloadgitea-4c59c8c7682da31410decba3bd868fde5116e073.tar.gz
gitea-4c59c8c7682da31410decba3bd868fde5116e073.zip
Fix various ImageDiff/SVG bugs (#23312)
Replace #23310, Close #19733 And fix various UI problems, including regressions from #22959 #22950 and more. ## SVG Detection The old regexp may mismatch non-SVG files. This PR adds new tests for those cases. ## UI Changes ### Before ![image](https://user-images.githubusercontent.com/2114189/222967716-f6ad8721-f46a-4a3f-9eb0-a89e488d3436.png) ![image](https://user-images.githubusercontent.com/2114189/222967780-8af8981a-e69d-4304-9dc4-0235582fa4f4.png) ### After ![image](https://user-images.githubusercontent.com/2114189/222967575-c21c23d4-0200-4e09-aac3-57895e853000.png) ![image](https://user-images.githubusercontent.com/2114189/222967585-8b8da262-bc96-441a-9851-8d3845f2659d.png) ![image](https://user-images.githubusercontent.com/2114189/222967595-58d9bea5-6df4-41fa-bf8a-86704117959d.png) ![image](https://user-images.githubusercontent.com/2114189/222967608-38757c1a-b8bd-4ebf-b7a8-3b30edb7f303.png) ![image](https://user-images.githubusercontent.com/2114189/222967623-9849a339-6fae-4484-8fa5-939e2fdacbf5.png) ![image](https://user-images.githubusercontent.com/2114189/222967633-4383d7dd-62ba-47a3-8c10-86f7ca7757ae.png) --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Diffstat (limited to 'modules/typesniffer')
-rw-r--r--modules/typesniffer/typesniffer.go21
-rw-r--r--modules/typesniffer/typesniffer_test.go25
2 files changed, 39 insertions, 7 deletions
diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go
index c9fef953ce..5b215496b8 100644
--- a/modules/typesniffer/typesniffer.go
+++ b/modules/typesniffer/typesniffer.go
@@ -4,6 +4,7 @@
package typesniffer
import (
+ "bytes"
"fmt"
"io"
"net/http"
@@ -24,8 +25,9 @@ const (
)
var (
- svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
- svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
+ svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
+ svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
+ svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
)
// SniffedType contains information about a blobs type.
@@ -91,10 +93,17 @@ func DetectContentType(data []byte) SniffedType {
data = data[:sniffLen]
}
- if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
- strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) {
- // SVG is unsupported. https://github.com/golang/go/issues/15888
- ct = SvgMimeType
+ // SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
+
+ detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
+ detectByXML := strings.Contains(ct, "text/xml")
+ if detectByHTML || detectByXML {
+ dataProcessed := svgComment.ReplaceAll(data, nil)
+ dataProcessed = bytes.TrimSpace(dataProcessed)
+ if detectByHTML && svgTagRegex.Match(dataProcessed) ||
+ detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
+ ct = SvgMimeType
+ }
}
return SniffedType{ct}
diff --git a/modules/typesniffer/typesniffer_test.go b/modules/typesniffer/typesniffer_test.go
index dbce94fc3b..2bafdffd14 100644
--- a/modules/typesniffer/typesniffer_test.go
+++ b/modules/typesniffer/typesniffer_test.go
@@ -28,7 +28,6 @@ func TestIsSvgImage(t *testing.T) {
assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage())
- assert.True(t, DetectContentType([]byte("<svg/>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!-- Comment -->
<svg></svg>`)).IsSvgImage())
@@ -57,6 +56,10 @@ func TestIsSvgImage(t *testing.T) {
<!-- Multline
Comment -->
<svg></svg>`)).IsSvgImage())
+
+ // the DetectContentType should work for incomplete data, because only beginning bytes are used for detection
+ assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage())
+
assert.False(t, DetectContentType([]byte{}).IsSvgImage())
assert.False(t, DetectContentType([]byte("svg")).IsSvgImage())
assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage())
@@ -68,6 +71,26 @@ func TestIsSvgImage(t *testing.T) {
assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- <svg></svg> inside comment -->
<foo></foo>`)).IsSvgImage())
+
+ assert.False(t, DetectContentType([]byte(`
+<!-- comment1 -->
+<div>
+ <!-- comment2 -->
+ <svg></svg>
+</div>
+`)).IsSvgImage())
+
+ assert.False(t, DetectContentType([]byte(`
+<!-- comment1
+-->
+<div>
+ <!-- comment2
+-->
+ <svg></svg>
+</div>
+`)).IsSvgImage())
+ assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage())
+ assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage())
}
func TestIsPDF(t *testing.T) {