aboutsummaryrefslogtreecommitdiffstats
path: root/models
diff options
context:
space:
mode:
authorguillep2k <18600385+guillep2k@users.noreply.github.com>2019-08-15 09:07:28 -0300
committerLunny Xiao <xiaolunwen@gmail.com>2019-08-15 20:07:28 +0800
commit5a44be627c055d3e9eb406ec4a91579de78b6910 (patch)
tree1a9d4ce34f16bdc02e0ac79a63a81d955ead8e6e /models
parentc2c35d169c819439b78c8c2c7f8877e7bf053cd1 (diff)
downloadgitea-5a44be627c055d3e9eb406ec4a91579de78b6910.tar.gz
gitea-5a44be627c055d3e9eb406ec4a91579de78b6910.zip
Convert files to utf-8 for indexing (#7814)
* Convert files to utf-8 for indexing * Move utf8 functions to modules/base * Bump repoIndexerLatestVersion to 3 * Add tests for base/encoding.go * Changes to pass gosimple * Move UTF8 funcs into new modules/charset package
Diffstat (limited to 'models')
-rw-r--r--models/git_diff.go8
-rw-r--r--models/repo_indexer.go4
2 files changed, 7 insertions, 5 deletions
diff --git a/models/git_diff.go b/models/git_diff.go
index 518d543e7b..62bb35be07 100644
--- a/models/git_diff.go
+++ b/models/git_diff.go
@@ -19,7 +19,7 @@ import (
"strconv"
"strings"
- "code.gitea.io/gitea/modules/base"
+ "code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/log"
@@ -27,7 +27,7 @@ import (
"code.gitea.io/gitea/modules/setting"
"github.com/Unknwon/com"
"github.com/sergi/go-diff/diffmatchpatch"
- "golang.org/x/net/html/charset"
+ stdcharset "golang.org/x/net/html/charset"
"golang.org/x/text/transform"
)
@@ -641,9 +641,9 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D
buf.WriteString("\n")
}
}
- charsetLabel, err := base.DetectEncoding(buf.Bytes())
+ charsetLabel, err := charset.DetectEncoding(buf.Bytes())
if charsetLabel != "UTF-8" && err == nil {
- encoding, _ := charset.Lookup(charsetLabel)
+ encoding, _ := stdcharset.Lookup(charsetLabel)
if encoding != nil {
d := encoding.NewDecoder()
for _, sec := range f.Sections {
diff --git a/models/repo_indexer.go b/models/repo_indexer.go
index c991b1aac9..f625b80389 100644
--- a/models/repo_indexer.go
+++ b/models/repo_indexer.go
@@ -10,6 +10,7 @@ import (
"strings"
"code.gitea.io/gitea/modules/base"
+ "code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
@@ -207,6 +208,7 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch)
if err != nil {
return err
} else if !base.IsTextFile(fileContents) {
+ // FIXME: UTF-16 files will probably fail here
return nil
}
indexerUpdate := indexer.RepoIndexerUpdate{
@@ -214,7 +216,7 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch)
Op: indexer.RepoIndexerOpUpdate,
Data: &indexer.RepoIndexerData{
RepoID: repo.ID,
- Content: string(fileContents),
+ Content: string(charset.ToUTF8DropErrors(fileContents)),
},
}
return indexerUpdate.AddToFlushingBatch(batch)