diff options
author | guillep2k <18600385+guillep2k@users.noreply.github.com> | 2019-08-15 09:07:28 -0300 |
---|---|---|
committer | Lunny Xiao <xiaolunwen@gmail.com> | 2019-08-15 20:07:28 +0800 |
commit | 5a44be627c055d3e9eb406ec4a91579de78b6910 (patch) | |
tree | 1a9d4ce34f16bdc02e0ac79a63a81d955ead8e6e /models | |
parent | c2c35d169c819439b78c8c2c7f8877e7bf053cd1 (diff) | |
download | gitea-5a44be627c055d3e9eb406ec4a91579de78b6910.tar.gz gitea-5a44be627c055d3e9eb406ec4a91579de78b6910.zip |
Convert files to utf-8 for indexing (#7814)
* Convert files to utf-8 for indexing
* Move utf8 functions to modules/base
* Bump repoIndexerLatestVersion to 3
* Add tests for base/encoding.go
* Changes to pass gosimple
* Move UTF8 funcs into new modules/charset package
Diffstat (limited to 'models')
-rw-r--r-- | models/git_diff.go | 8 | ||||
-rw-r--r-- | models/repo_indexer.go | 4 |
2 files changed, 7 insertions, 5 deletions
diff --git a/models/git_diff.go b/models/git_diff.go index 518d543e7b..62bb35be07 100644 --- a/models/git_diff.go +++ b/models/git_diff.go @@ -19,7 +19,7 @@ import ( "strconv" "strings" - "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/log" @@ -27,7 +27,7 @@ import ( "code.gitea.io/gitea/modules/setting" "github.com/Unknwon/com" "github.com/sergi/go-diff/diffmatchpatch" - "golang.org/x/net/html/charset" + stdcharset "golang.org/x/net/html/charset" "golang.org/x/text/transform" ) @@ -641,9 +641,9 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D buf.WriteString("\n") } } - charsetLabel, err := base.DetectEncoding(buf.Bytes()) + charsetLabel, err := charset.DetectEncoding(buf.Bytes()) if charsetLabel != "UTF-8" && err == nil { - encoding, _ := charset.Lookup(charsetLabel) + encoding, _ := stdcharset.Lookup(charsetLabel) if encoding != nil { d := encoding.NewDecoder() for _, sec := range f.Sections { diff --git a/models/repo_indexer.go b/models/repo_indexer.go index c991b1aac9..f625b80389 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -10,6 +10,7 @@ import ( "strings" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/log" @@ -207,6 +208,7 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch) if err != nil { return err } else if !base.IsTextFile(fileContents) { + // FIXME: UTF-16 files will probably fail here return nil } indexerUpdate := indexer.RepoIndexerUpdate{ @@ -214,7 +216,7 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch) Op: indexer.RepoIndexerOpUpdate, Data: &indexer.RepoIndexerData{ RepoID: repo.ID, - Content: string(fileContents), + Content: string(charset.ToUTF8DropErrors(fileContents)), }, } return indexerUpdate.AddToFlushingBatch(batch) |