diff options
author | Lauris BH <lauris@nix.lv> | 2018-09-29 11:33:54 +0300 |
---|---|---|
committer | Lunny Xiao <xiaolunwen@gmail.com> | 2018-09-29 16:33:54 +0800 |
commit | 81702e6ec9d5bd9d2185a8cb5a021047314baee9 (patch) | |
tree | bd0dbdbb7a1d7505809457c72882d00818a50875 /modules/templates | |
parent | 67806611923eee071513a74671bc00429c3a89fa (diff) | |
download | gitea-81702e6ec9d5bd9d2185a8cb5a021047314baee9.tar.gz gitea-81702e6ec9d5bd9d2185a8cb5a021047314baee9.zip |
Detect charset and convert non UTF-8 files for display (#4950)
* Detect charset and convert non UTF-8 files for display
* Refactor and move function to correct module
* Revert unrelated changes
* More unrelated changes
* Duplicate content for small text to have better encoding detection
* Check if original content is valid before duplicating it
Diffstat (limited to 'modules/templates')
-rw-r--r-- | modules/templates/helper.go | 25 |
1 files changed, 24 insertions, 1 deletions
diff --git a/modules/templates/helper.go b/modules/templates/helper.go index d55c122df0..ce077d1a92 100644 --- a/modules/templates/helper.go +++ b/modules/templates/helper.go @@ -1,3 +1,4 @@ +// Copyright 2018 The Gitea Authors. All rights reserved. // Copyright 2014 The Gogs Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. @@ -275,7 +276,7 @@ func ToUTF8WithErr(content []byte) (string, error) { } // If there is an error, we concatenate the nicely decoded part and the - // original left over. This way we won't loose data. + // original left over. This way we won't lose data. result, n, err := transform.String(encoding.NewDecoder(), string(content)) if err != nil { result = result + string(content[n:]) @@ -284,6 +285,28 @@ func ToUTF8WithErr(content []byte) (string, error) { return result, err } +// ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible +func ToUTF8WithFallback(content []byte) []byte { + charsetLabel, err := base.DetectEncoding(content) + if err != nil || charsetLabel == "UTF-8" { + return content + } + + encoding, _ := charset.Lookup(charsetLabel) + if encoding == nil { + return content + } + + // If there is an error, we concatenate the nicely decoded part and the + // original left over. This way we won't lose data. + result, n, err := transform.Bytes(encoding.NewDecoder(), content) + if err != nil { + return append(result, content[n:]...) + } + + return result +} + // ToUTF8 converts content to UTF8 encoding and ignore error func ToUTF8(content string) string { res, _ := ToUTF8WithErr([]byte(content)) |