Refactor renders (#15175)

* Refactor renders * Some performance optimization * Fix comment * Transform reader * Fix csv test * Fix test * Fix tests * Improve optimaziation * Fix test * Fix test * Detect file encoding with reader * Improve optimaziation * reduce memory usage * improve code * fix build * Fix test * Fix for go1.15 * Fix render * Fix comment * Fix lint * Fix test * Don't use NormalEOF when unnecessary * revert change on util.go * Apply suggestions from code review Co-authored-by: zeripath <art27@cantab.net> * rename function * Take NormalEOF back Co-authored-by: zeripath <art27@cantab.net>
author: Lunny Xiao <xiaolunwen@gmail.com> 2021-04-20 06:25:08 +0800
committer: GitHub <noreply@github.com> 2021-04-19 18:25:08 -0400
commit: 9d99f6ab19ac3f97af3ca126720e9075c127a652 (patch)
tree: b817b4582a871f83b91ad7977fe772fc3501c1e8 /modules/charset
parent: c9cc6698d2172625854cd063301e63602204a2a1 (diff)
download: gitea-9d99f6ab19ac3f97af3ca126720e9075c127a652.tar.gz
gitea-9d99f6ab19ac3f97af3ca126720e9075c127a652.zip
1 files changed, 31 insertions, 18 deletions
diff --git a/modules/charset/charset.go b/modules/charset/charset.go
index a7e427db99..3000864c2e 100644
--- a/modules/charset/charset.go
+++ b/modules/charset/charset.go
@@ -7,6 +7,8 @@ package charset
 import (
 	"bytes"
 	"fmt"
+	"io"
+	"io/ioutil"
 	"strings"
 	"unicode/utf8"
 
@@ -21,6 +23,33 @@ import (
 // UTF8BOM is the utf-8 byte-order marker
 var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'}
 
+// ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible
+func ToUTF8WithFallbackReader(rd io.Reader) io.Reader {
+	var buf = make([]byte, 2048)
+	n, err := rd.Read(buf)
+	if err != nil {
+		return rd
+	}
+
+	charsetLabel, err := DetectEncoding(buf[:n])
+	if err != nil || charsetLabel == "UTF-8" {
+		return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd)
+	}
+
+	encoding, _ := charset.Lookup(charsetLabel)
+	if encoding == nil {
+		return io.MultiReader(bytes.NewReader(buf[:n]), rd)
+	}
+
+	return transform.NewReader(
+		io.MultiReader(
+			bytes.NewReader(RemoveBOMIfPresent(buf[:n])),
+			rd,
+		),
+		encoding.NewDecoder(),
+	)
+}
+
 // ToUTF8WithErr converts content to UTF8 encoding
 func ToUTF8WithErr(content []byte) (string, error) {
 	charsetLabel, err := DetectEncoding(content)
@@ -49,24 +78,8 @@ func ToUTF8WithErr(content []byte) (string, error) {
 
 // ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible
 func ToUTF8WithFallback(content []byte) []byte {
-	charsetLabel, err := DetectEncoding(content)
-	if err != nil || charsetLabel == "UTF-8" {
-		return RemoveBOMIfPresent(content)
-	}
-
-	encoding, _ := charset.Lookup(charsetLabel)
-	if encoding == nil {
-		return content
-	}
-
-	// If there is an error, we concatenate the nicely decoded part and the
-	// original left over. This way we won't lose data.
-	result, n, err := transform.Bytes(encoding.NewDecoder(), content)
-	if err != nil {
-		return append(result, content[n:]...)
-	}
-
-	return RemoveBOMIfPresent(result)
+	bs, _ := ioutil.ReadAll(ToUTF8WithFallbackReader(bytes.NewReader(content)))
+	return bs
 }
 
 // ToUTF8 converts content to UTF8 encoding and ignore error
author	Lunny Xiao <xiaolunwen@gmail.com>	2021-04-20 06:25:08 +0800
committer	GitHub <noreply@github.com>	2021-04-19 18:25:08 -0400
commit	9d99f6ab19ac3f97af3ca126720e9075c127a652 (patch)
tree	b817b4582a871f83b91ad7977fe772fc3501c1e8 /modules/charset
parent	c9cc6698d2172625854cd063301e63602204a2a1 (diff)
download	gitea-9d99f6ab19ac3f97af3ca126720e9075c127a652.tar.gz gitea-9d99f6ab19ac3f97af3ca126720e9075c127a652.zip