summaryrefslogtreecommitdiffstats
path: root/modules/charset
diff options
context:
space:
mode:
Diffstat (limited to 'modules/charset')
-rw-r--r--modules/charset/charset.go49
1 files changed, 31 insertions, 18 deletions
diff --git a/modules/charset/charset.go b/modules/charset/charset.go
index a7e427db99..3000864c2e 100644
--- a/modules/charset/charset.go
+++ b/modules/charset/charset.go
@@ -7,6 +7,8 @@ package charset
import (
"bytes"
"fmt"
+ "io"
+ "io/ioutil"
"strings"
"unicode/utf8"
@@ -21,6 +23,33 @@ import (
// UTF8BOM is the utf-8 byte-order marker
var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'}
+// ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible
+func ToUTF8WithFallbackReader(rd io.Reader) io.Reader {
+ var buf = make([]byte, 2048)
+ n, err := rd.Read(buf)
+ if err != nil {
+ return rd
+ }
+
+ charsetLabel, err := DetectEncoding(buf[:n])
+ if err != nil || charsetLabel == "UTF-8" {
+ return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd)
+ }
+
+ encoding, _ := charset.Lookup(charsetLabel)
+ if encoding == nil {
+ return io.MultiReader(bytes.NewReader(buf[:n]), rd)
+ }
+
+ return transform.NewReader(
+ io.MultiReader(
+ bytes.NewReader(RemoveBOMIfPresent(buf[:n])),
+ rd,
+ ),
+ encoding.NewDecoder(),
+ )
+}
+
// ToUTF8WithErr converts content to UTF8 encoding
func ToUTF8WithErr(content []byte) (string, error) {
charsetLabel, err := DetectEncoding(content)
@@ -49,24 +78,8 @@ func ToUTF8WithErr(content []byte) (string, error) {
// ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible
func ToUTF8WithFallback(content []byte) []byte {
- charsetLabel, err := DetectEncoding(content)
- if err != nil || charsetLabel == "UTF-8" {
- return RemoveBOMIfPresent(content)
- }
-
- encoding, _ := charset.Lookup(charsetLabel)
- if encoding == nil {
- return content
- }
-
- // If there is an error, we concatenate the nicely decoded part and the
- // original left over. This way we won't lose data.
- result, n, err := transform.Bytes(encoding.NewDecoder(), content)
- if err != nil {
- return append(result, content[n:]...)
- }
-
- return RemoveBOMIfPresent(result)
+ bs, _ := ioutil.ReadAll(ToUTF8WithFallbackReader(bytes.NewReader(content)))
+ return bs
}
// ToUTF8 converts content to UTF8 encoding and ignore error