summaryrefslogtreecommitdiffstats
path: root/modules/base
diff options
context:
space:
mode:
authorLauris BH <lauris@nix.lv>2018-09-29 11:33:54 +0300
committerLunny Xiao <xiaolunwen@gmail.com>2018-09-29 16:33:54 +0800
commit81702e6ec9d5bd9d2185a8cb5a021047314baee9 (patch)
treebd0dbdbb7a1d7505809457c72882d00818a50875 /modules/base
parent67806611923eee071513a74671bc00429c3a89fa (diff)
downloadgitea-81702e6ec9d5bd9d2185a8cb5a021047314baee9.tar.gz
gitea-81702e6ec9d5bd9d2185a8cb5a021047314baee9.zip
Detect charset and convert non UTF-8 files for display (#4950)
* Detect charset and convert non UTF-8 files for display * Refactor and move function to correct module * Revert unrelated changes * More unrelated changes * Duplicate content for small text to have better encoding detection * Check if original content is valid before duplicating it
Diffstat (limited to 'modules/base')
-rw-r--r--modules/base/tool.go17
1 files changed, 16 insertions, 1 deletions
diff --git a/modules/base/tool.go b/modules/base/tool.go
index 2dfd8ffec0..d5ec9e83fc 100644
--- a/modules/base/tool.go
+++ b/modules/base/tool.go
@@ -59,7 +59,22 @@ func DetectEncoding(content []byte) (string, error) {
return "UTF-8", nil
}
- result, err := chardet.NewTextDetector().DetectBest(content)
+ textDetector := chardet.NewTextDetector()
+ var detectContent []byte
+ if len(content) < 1024 {
+ // Check if original content is valid
+ if _, err := textDetector.DetectBest(content); err != nil {
+ return "", err
+ }
+ times := 1024 / len(content)
+ detectContent = make([]byte, 0, times*len(content))
+ for i := 0; i < times; i++ {
+ detectContent = append(detectContent, content...)
+ }
+ } else {
+ detectContent = content
+ }
+ result, err := textDetector.DetectBest(detectContent)
if err != nil {
return "", err
}