aboutsummaryrefslogtreecommitdiffstats
path: root/modules/typesniffer/typesniffer.go
diff options
context:
space:
mode:
Diffstat (limited to 'modules/typesniffer/typesniffer.go')
-rw-r--r--modules/typesniffer/typesniffer.go65
1 files changed, 29 insertions, 36 deletions
diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go
index 8cb3d278ce..2e8d9c4a1e 100644
--- a/modules/typesniffer/typesniffer.go
+++ b/modules/typesniffer/typesniffer.go
@@ -6,18 +6,14 @@ package typesniffer
import (
"bytes"
"encoding/binary"
- "fmt"
- "io"
"net/http"
"regexp"
"slices"
"strings"
-
- "code.gitea.io/gitea/modules/util"
+ "sync"
)
-// Use at most this many bytes to determine Content Type.
-const sniffLen = 1024
+const SniffContentSize = 1024
const (
MimeTypeImageSvg = "image/svg+xml"
@@ -26,22 +22,30 @@ const (
MimeTypeApplicationOctetStream = "application/octet-stream"
)
-var (
- svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
- svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
- svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
-)
-
-// SniffedType contains information about a blobs type.
+var globalVars = sync.OnceValue(func() (ret struct {
+ svgComment, svgTagRegex, svgTagInXMLRegex *regexp.Regexp
+},
+) {
+ ret.svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
+ ret.svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
+ ret.svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
+ return ret
+})
+
+// SniffedType contains information about a blob's type.
type SniffedType struct {
contentType string
}
-// IsText etects if content format is plain text.
+// IsText detects if the content format is text family, including text/plain, text/html, text/css, etc.
func (ct SniffedType) IsText() bool {
return strings.Contains(ct.contentType, "text/")
}
+func (ct SniffedType) IsTextPlain() bool {
+ return strings.Contains(ct.contentType, "text/plain")
+}
+
// IsImage detects if data is an image format
func (ct SniffedType) IsImage() bool {
return strings.Contains(ct.contentType, "image/")
@@ -57,12 +61,12 @@ func (ct SniffedType) IsPDF() bool {
return strings.Contains(ct.contentType, "application/pdf")
}
-// IsVideo detects if data is an video format
+// IsVideo detects if data is a video format
func (ct SniffedType) IsVideo() bool {
return strings.Contains(ct.contentType, "video/")
}
-// IsAudio detects if data is an video format
+// IsAudio detects if data is a video format
func (ct SniffedType) IsAudio() bool {
return strings.Contains(ct.contentType, "audio/")
}
@@ -103,33 +107,34 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) {
return brands, true
}
-// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
+// DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty.
func DetectContentType(data []byte) SniffedType {
if len(data) == 0 {
- return SniffedType{"text/unknown"}
+ return SniffedType{"text/plain"}
}
ct := http.DetectContentType(data)
- if len(data) > sniffLen {
- data = data[:sniffLen]
+ if len(data) > SniffContentSize {
+ data = data[:SniffContentSize]
}
+ vars := globalVars()
// SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
detectByXML := strings.Contains(ct, "text/xml")
if detectByHTML || detectByXML {
- dataProcessed := svgComment.ReplaceAll(data, nil)
+ dataProcessed := vars.svgComment.ReplaceAll(data, nil)
dataProcessed = bytes.TrimSpace(dataProcessed)
- if detectByHTML && svgTagRegex.Match(dataProcessed) ||
- detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
+ if detectByHTML && vars.svgTagRegex.Match(dataProcessed) ||
+ detectByXML && vars.svgTagInXMLRegex.Match(dataProcessed) {
ct = MimeTypeImageSvg
}
}
if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
- // So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
+ // So remove the "ID3" prefix and detect again, then if the result is "text", it must be text content.
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
ct2 := http.DetectContentType(data[3:])
if strings.HasPrefix(ct2, "text/") {
@@ -155,15 +160,3 @@ func DetectContentType(data []byte) SniffedType {
}
return SniffedType{ct}
}
-
-// DetectContentTypeFromReader guesses the content type contained in the reader.
-func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
- buf := make([]byte, sniffLen)
- n, err := util.ReadAtMost(r, buf)
- if err != nil {
- return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err)
- }
- buf = buf[:n]
-
- return DetectContentType(buf), nil
-}