diff options
author | zeripath <art27@cantab.net> | 2019-04-26 13:00:30 +0100 |
---|---|---|
committer | Lauris BH <lauris@nix.lv> | 2019-04-26 15:00:30 +0300 |
commit | f6eedd4dc8fb10df869750c69e2bead0521ec0eb (patch) | |
tree | 52ee0f6f7848be01bb3752152f81521dc4c24c33 /modules/repofiles | |
parent | 4c34bc111ce020161a2fbd962a19a9123b3e2dc4 (diff) | |
download | gitea-f6eedd4dc8fb10df869750c69e2bead0521ec0eb.tar.gz gitea-f6eedd4dc8fb10df869750c69e2bead0521ec0eb.zip |
UI: Detect and restore encoding and BOM in content (#6727)
* detect and remove a decoded BOM
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Restore the previous encoding and BOM
* On error keep as UTF-8
Signed-off-by: Andrew Thornton <art27@cantab.net>
* create remove BOM function
* Deal with LFSed content
* Update modules/repofiles/update.go
* Fix final LFS bug
* Keep LFS sections referring to opts.Content
Diffstat (limited to 'modules/repofiles')
-rw-r--r-- | modules/repofiles/update.go | 95 |
1 files changed, 94 insertions, 1 deletions
diff --git a/modules/repofiles/update.go b/modules/repofiles/update.go index e9b3077535..8bc3b50ae0 100644 --- a/modules/repofiles/update.go +++ b/modules/repofiles/update.go @@ -5,13 +5,19 @@ package repofiles import ( + "bytes" "fmt" "path" "strings" + "golang.org/x/net/html/charset" + "golang.org/x/text/transform" + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/sdk/gitea" ) @@ -37,6 +43,70 @@ type UpdateRepoFileOptions struct { Committer *IdentityOptions } +func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string, bool) { + reader, err := entry.Blob().DataAsync() + if err != nil { + // return default + return "UTF-8", false + } + defer reader.Close() + buf := make([]byte, 1024) + n, err := reader.Read(buf) + if err != nil { + // return default + return "UTF-8", false + } + buf = buf[:n] + + if setting.LFS.StartServer { + meta := lfs.IsPointerFile(&buf) + if meta != nil { + meta, err = repo.GetLFSMetaObjectByOid(meta.Oid) + if err != nil && err != models.ErrLFSObjectNotExist { + // return default + return "UTF-8", false + } + } + if meta != nil { + dataRc, err := lfs.ReadMetaObject(meta) + if err != nil { + // return default + return "UTF-8", false + } + defer dataRc.Close() + buf = make([]byte, 1024) + n, err = dataRc.Read(buf) + if err != nil { + // return default + return "UTF-8", false + } + buf = buf[:n] + } + + } + + encoding, err := base.DetectEncoding(buf) + if err != nil { + // just default to utf-8 and no bom + return "UTF-8", false + } + if encoding == "UTF-8" { + return encoding, bytes.Equal(buf[0:3], base.UTF8BOM) + } + charsetEncoding, _ := charset.Lookup(encoding) + if charsetEncoding == nil { + return "UTF-8", false + } + + result, n, err := transform.String(charsetEncoding.NewDecoder(), string(buf)) + + if n > 2 { + return encoding, bytes.Equal([]byte(result)[0:3], base.UTF8BOM) + } + + return encoding, false +} + // CreateOrUpdateRepoFile adds or updates a file in the given repository func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *UpdateRepoFileOptions) (*gitea.FileResponse, error) { // If no branch name is set, assume master @@ -118,6 +188,9 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up opts.LastCommitID = commit.ID.String() } + encoding := "UTF-8" + bom := false + if !opts.IsNewFile { fromEntry, err := commit.GetTreeEntryByPath(fromTreePath) if err != nil { @@ -151,6 +224,7 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up // haven't been made. We throw an error if one wasn't provided. return nil, models.ErrSHAOrCommitIDNotProvided{} } + encoding, bom = detectEncodingAndBOM(fromEntry, repo) } // For the path where this file will be created/updated, we need to make @@ -235,9 +309,28 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up } content := opts.Content + if bom { + content = string(base.UTF8BOM) + content + } + if encoding != "UTF-8" { + charsetEncoding, _ := charset.Lookup(encoding) + if charsetEncoding != nil { + result, _, err := transform.String(charsetEncoding.NewEncoder(), string(content)) + if err != nil { + // Look if we can't encode back in to the original we should just stick with utf-8 + log.Error("Error re-encoding %s (%s) as %s - will stay as UTF-8: %v", opts.TreePath, opts.FromTreePath, encoding, err) + result = content + } + content = result + } else { + log.Error("Unknown encoding: %s", encoding) + } + } + // Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content + opts.Content = content var lfsMetaObject *models.LFSMetaObject - if filename2attribute2info[treePath] != nil && filename2attribute2info[treePath]["filter"] == "lfs" { + if setting.LFS.StartServer && filename2attribute2info[treePath] != nil && filename2attribute2info[treePath]["filter"] == "lfs" { // OK so we are supposed to LFS this data! oid, err := models.GenerateLFSOid(strings.NewReader(opts.Content)) if err != nil { |