diff options
author | Lauris BH <lauris@nix.lv> | 2018-09-29 11:33:54 +0300 |
---|---|---|
committer | Lunny Xiao <xiaolunwen@gmail.com> | 2018-09-29 16:33:54 +0800 |
commit | 81702e6ec9d5bd9d2185a8cb5a021047314baee9 (patch) | |
tree | bd0dbdbb7a1d7505809457c72882d00818a50875 /modules/base | |
parent | 67806611923eee071513a74671bc00429c3a89fa (diff) | |
download | gitea-81702e6ec9d5bd9d2185a8cb5a021047314baee9.tar.gz gitea-81702e6ec9d5bd9d2185a8cb5a021047314baee9.zip |
Detect charset and convert non UTF-8 files for display (#4950)
* Detect charset and convert non UTF-8 files for display
* Refactor and move function to correct module
* Revert unrelated changes
* More unrelated changes
* Duplicate content for small text to have better encoding detection
* Check if original content is valid before duplicating it
Diffstat (limited to 'modules/base')
-rw-r--r-- | modules/base/tool.go | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/modules/base/tool.go b/modules/base/tool.go index 2dfd8ffec0..d5ec9e83fc 100644 --- a/modules/base/tool.go +++ b/modules/base/tool.go @@ -59,7 +59,22 @@ func DetectEncoding(content []byte) (string, error) { return "UTF-8", nil } - result, err := chardet.NewTextDetector().DetectBest(content) + textDetector := chardet.NewTextDetector() + var detectContent []byte + if len(content) < 1024 { + // Check if original content is valid + if _, err := textDetector.DetectBest(content); err != nil { + return "", err + } + times := 1024 / len(content) + detectContent = make([]byte, 0, times*len(content)) + for i := 0; i < times; i++ { + detectContent = append(detectContent, content...) + } + } else { + detectContent = content + } + result, err := textDetector.DetectBest(detectContent) if err != nil { return "", err } |