diff options
author | Lunny Xiao <xiaolunwen@gmail.com> | 2021-04-20 06:25:08 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-19 18:25:08 -0400 |
commit | 9d99f6ab19ac3f97af3ca126720e9075c127a652 (patch) | |
tree | b817b4582a871f83b91ad7977fe772fc3501c1e8 /modules/charset | |
parent | c9cc6698d2172625854cd063301e63602204a2a1 (diff) | |
download | gitea-9d99f6ab19ac3f97af3ca126720e9075c127a652.tar.gz gitea-9d99f6ab19ac3f97af3ca126720e9075c127a652.zip |
Refactor renders (#15175)
* Refactor renders
* Some performance optimization
* Fix comment
* Transform reader
* Fix csv test
* Fix test
* Fix tests
* Improve optimaziation
* Fix test
* Fix test
* Detect file encoding with reader
* Improve optimaziation
* reduce memory usage
* improve code
* fix build
* Fix test
* Fix for go1.15
* Fix render
* Fix comment
* Fix lint
* Fix test
* Don't use NormalEOF when unnecessary
* revert change on util.go
* Apply suggestions from code review
Co-authored-by: zeripath <art27@cantab.net>
* rename function
* Take NormalEOF back
Co-authored-by: zeripath <art27@cantab.net>
Diffstat (limited to 'modules/charset')
-rw-r--r-- | modules/charset/charset.go | 49 |
1 files changed, 31 insertions, 18 deletions
diff --git a/modules/charset/charset.go b/modules/charset/charset.go index a7e427db99..3000864c2e 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -7,6 +7,8 @@ package charset import ( "bytes" "fmt" + "io" + "io/ioutil" "strings" "unicode/utf8" @@ -21,6 +23,33 @@ import ( // UTF8BOM is the utf-8 byte-order marker var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} +// ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible +func ToUTF8WithFallbackReader(rd io.Reader) io.Reader { + var buf = make([]byte, 2048) + n, err := rd.Read(buf) + if err != nil { + return rd + } + + charsetLabel, err := DetectEncoding(buf[:n]) + if err != nil || charsetLabel == "UTF-8" { + return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd) + } + + encoding, _ := charset.Lookup(charsetLabel) + if encoding == nil { + return io.MultiReader(bytes.NewReader(buf[:n]), rd) + } + + return transform.NewReader( + io.MultiReader( + bytes.NewReader(RemoveBOMIfPresent(buf[:n])), + rd, + ), + encoding.NewDecoder(), + ) +} + // ToUTF8WithErr converts content to UTF8 encoding func ToUTF8WithErr(content []byte) (string, error) { charsetLabel, err := DetectEncoding(content) @@ -49,24 +78,8 @@ func ToUTF8WithErr(content []byte) (string, error) { // ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible func ToUTF8WithFallback(content []byte) []byte { - charsetLabel, err := DetectEncoding(content) - if err != nil || charsetLabel == "UTF-8" { - return RemoveBOMIfPresent(content) - } - - encoding, _ := charset.Lookup(charsetLabel) - if encoding == nil { - return content - } - - // If there is an error, we concatenate the nicely decoded part and the - // original left over. This way we won't lose data. - result, n, err := transform.Bytes(encoding.NewDecoder(), content) - if err != nil { - return append(result, content[n:]...) - } - - return RemoveBOMIfPresent(result) + bs, _ := ioutil.ReadAll(ToUTF8WithFallbackReader(bytes.NewReader(content))) + return bs } // ToUTF8 converts content to UTF8 encoding and ignore error |