summaryrefslogtreecommitdiffstats
path: root/modules/templates/helper.go
diff options
context:
space:
mode:
authorLauris BH <lauris@nix.lv>2018-09-29 11:33:54 +0300
committerLunny Xiao <xiaolunwen@gmail.com>2018-09-29 16:33:54 +0800
commit81702e6ec9d5bd9d2185a8cb5a021047314baee9 (patch)
treebd0dbdbb7a1d7505809457c72882d00818a50875 /modules/templates/helper.go
parent67806611923eee071513a74671bc00429c3a89fa (diff)
downloadgitea-81702e6ec9d5bd9d2185a8cb5a021047314baee9.tar.gz
gitea-81702e6ec9d5bd9d2185a8cb5a021047314baee9.zip
Detect charset and convert non UTF-8 files for display (#4950)
* Detect charset and convert non UTF-8 files for display * Refactor and move function to correct module * Revert unrelated changes * More unrelated changes * Duplicate content for small text to have better encoding detection * Check if original content is valid before duplicating it
Diffstat (limited to 'modules/templates/helper.go')
-rw-r--r--modules/templates/helper.go25
1 files changed, 24 insertions, 1 deletions
diff --git a/modules/templates/helper.go b/modules/templates/helper.go
index d55c122df0..ce077d1a92 100644
--- a/modules/templates/helper.go
+++ b/modules/templates/helper.go
@@ -1,3 +1,4 @@
+// Copyright 2018 The Gitea Authors. All rights reserved.
// Copyright 2014 The Gogs Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@@ -275,7 +276,7 @@ func ToUTF8WithErr(content []byte) (string, error) {
}
// If there is an error, we concatenate the nicely decoded part and the
- // original left over. This way we won't loose data.
+ // original left over. This way we won't lose data.
result, n, err := transform.String(encoding.NewDecoder(), string(content))
if err != nil {
result = result + string(content[n:])
@@ -284,6 +285,28 @@ func ToUTF8WithErr(content []byte) (string, error) {
return result, err
}
+// ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible
+func ToUTF8WithFallback(content []byte) []byte {
+ charsetLabel, err := base.DetectEncoding(content)
+ if err != nil || charsetLabel == "UTF-8" {
+ return content
+ }
+
+ encoding, _ := charset.Lookup(charsetLabel)
+ if encoding == nil {
+ return content
+ }
+
+ // If there is an error, we concatenate the nicely decoded part and the
+ // original left over. This way we won't lose data.
+ result, n, err := transform.Bytes(encoding.NewDecoder(), content)
+ if err != nil {
+ return append(result, content[n:]...)
+ }
+
+ return result
+}
+
// ToUTF8 converts content to UTF8 encoding and ignore error
func ToUTF8(content string) string {
res, _ := ToUTF8WithErr([]byte(content))