summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorGiteabot <teabot@gitea.io>2023-03-08 03:03:05 -0500
committerGitHub <noreply@github.com>2023-03-08 16:03:05 +0800
commitcf80f829b49b35fd91854798f9ead913145782f1 (patch)
treeb77d5eedd32a9dae77cdfa2a8476c2d495f971b0 /modules
parented25e094abfc93e83c6fecb5d82ce64d0e220717 (diff)
downloadgitea-cf80f829b49b35fd91854798f9ead913145782f1.tar.gz
gitea-cf80f829b49b35fd91854798f9ead913145782f1.zip
Do not recognize text files as audio (#23355) (#23368)
Backport #23355 Close #17108 This PR uses a trick (removing the ID3 tag) to detect the content again to to see whether the content is text type. Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: delvh <dev.lh@web.de> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Diffstat (limited to 'modules')
-rw-r--r--modules/typesniffer/typesniffer.go10
-rw-r--r--modules/typesniffer/typesniffer_test.go4
2 files changed, 14 insertions, 0 deletions
diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go
index 5b215496b8..7887fd42b7 100644
--- a/modules/typesniffer/typesniffer.go
+++ b/modules/typesniffer/typesniffer.go
@@ -106,6 +106,16 @@ func DetectContentType(data []byte) SniffedType {
}
}
+ if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
+ // The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
+ // So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
+ // This works especially because audio files contain many unprintable/invalid characters like `0x00`
+ ct2 := http.DetectContentType(data[3:])
+ if strings.HasPrefix(ct2, "text/") {
+ ct = ct2
+ }
+ }
+
return SniffedType{ct}
}
diff --git a/modules/typesniffer/typesniffer_test.go b/modules/typesniffer/typesniffer_test.go
index 2bafdffd14..6c6da34aa0 100644
--- a/modules/typesniffer/typesniffer_test.go
+++ b/modules/typesniffer/typesniffer_test.go
@@ -109,6 +109,10 @@ func TestIsAudio(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
assert.True(t, DetectContentType(mp3).IsAudio())
assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
+
+ assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
+ assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText()) // test ID3 tag for plain text
+ assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
}
func TestDetectContentTypeFromReader(t *testing.T) {