diff options
author | Unknwon <u@gogs.io> | 2015-12-31 22:13:47 -0500 |
---|---|---|
committer | Unknwon <u@gogs.io> | 2015-12-31 22:13:47 -0500 |
commit | 4993ab1a767bea5645a65b5639cc00bb107fdfd3 (patch) | |
tree | 47f5859ff0a7516d56e0155a9234c57e79cac6d8 /modules/base | |
parent | a62290de52b5258b8f301c2f56ff84aa96e5f6d2 (diff) | |
download | gitea-4993ab1a767bea5645a65b5639cc00bb107fdfd3.tar.gz gitea-4993ab1a767bea5645a65b5639cc00bb107fdfd3.zip |
#2185 fall back to use custom chardet lib
Diffstat (limited to 'modules/base')
-rw-r--r-- | modules/base/tool.go | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/modules/base/tool.go b/modules/base/tool.go index 5927dad01c..255c34ef5a 100644 --- a/modules/base/tool.go +++ b/modules/base/tool.go @@ -23,7 +23,8 @@ import ( "github.com/Unknwon/com" "github.com/Unknwon/i18n" "github.com/microcosm-cc/bluemonday" - "golang.org/x/net/html/charset" + + "github.com/gogits/chardet" "github.com/gogits/gogs/modules/avatar" "github.com/gogits/gogs/modules/log" @@ -53,19 +54,20 @@ func ShortSha(sha1 string) string { return sha1 } -func DetectEncoding(content []byte) string { - if utf8.Valid(content[:1024]) { +func DetectEncoding(content []byte) (string, error) { + if utf8.Valid(content) { log.Debug("Detected encoding: utf-8 (fast)") - return "utf-8" + return "UTF-8", nil } - _, name, certain := charset.DetermineEncoding(content, "") - if name != "utf-8" && len(setting.Repository.AnsiCharset) > 0 { + result, err := chardet.NewTextDetector().DetectBest(content) + if result.Charset != "UTF-8" && len(setting.Repository.AnsiCharset) > 0 { log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset) - return setting.Repository.AnsiCharset + return setting.Repository.AnsiCharset, err } - log.Debug("Detected encoding: %s (%v)", name, certain) - return name + + log.Debug("Detected encoding: %s", result.Charset) + return result.Charset, err } func BasicAuthDecode(encoded string) (string, string, error) { |