summaryrefslogtreecommitdiffstats
path: root/modules/repofiles
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2019-04-26 13:00:30 +0100
committerLauris BH <lauris@nix.lv>2019-04-26 15:00:30 +0300
commitf6eedd4dc8fb10df869750c69e2bead0521ec0eb (patch)
tree52ee0f6f7848be01bb3752152f81521dc4c24c33 /modules/repofiles
parent4c34bc111ce020161a2fbd962a19a9123b3e2dc4 (diff)
downloadgitea-f6eedd4dc8fb10df869750c69e2bead0521ec0eb.tar.gz
gitea-f6eedd4dc8fb10df869750c69e2bead0521ec0eb.zip
UI: Detect and restore encoding and BOM in content (#6727)
* detect and remove a decoded BOM Signed-off-by: Andrew Thornton <art27@cantab.net> * Restore the previous encoding and BOM * On error keep as UTF-8 Signed-off-by: Andrew Thornton <art27@cantab.net> * create remove BOM function * Deal with LFSed content * Update modules/repofiles/update.go * Fix final LFS bug * Keep LFS sections referring to opts.Content
Diffstat (limited to 'modules/repofiles')
-rw-r--r--modules/repofiles/update.go95
1 files changed, 94 insertions, 1 deletions
diff --git a/modules/repofiles/update.go b/modules/repofiles/update.go
index e9b3077535..8bc3b50ae0 100644
--- a/modules/repofiles/update.go
+++ b/modules/repofiles/update.go
@@ -5,13 +5,19 @@
package repofiles
import (
+ "bytes"
"fmt"
"path"
"strings"
+ "golang.org/x/net/html/charset"
+ "golang.org/x/text/transform"
+
"code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
+ "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/sdk/gitea"
)
@@ -37,6 +43,70 @@ type UpdateRepoFileOptions struct {
Committer *IdentityOptions
}
+func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string, bool) {
+ reader, err := entry.Blob().DataAsync()
+ if err != nil {
+ // return default
+ return "UTF-8", false
+ }
+ defer reader.Close()
+ buf := make([]byte, 1024)
+ n, err := reader.Read(buf)
+ if err != nil {
+ // return default
+ return "UTF-8", false
+ }
+ buf = buf[:n]
+
+ if setting.LFS.StartServer {
+ meta := lfs.IsPointerFile(&buf)
+ if meta != nil {
+ meta, err = repo.GetLFSMetaObjectByOid(meta.Oid)
+ if err != nil && err != models.ErrLFSObjectNotExist {
+ // return default
+ return "UTF-8", false
+ }
+ }
+ if meta != nil {
+ dataRc, err := lfs.ReadMetaObject(meta)
+ if err != nil {
+ // return default
+ return "UTF-8", false
+ }
+ defer dataRc.Close()
+ buf = make([]byte, 1024)
+ n, err = dataRc.Read(buf)
+ if err != nil {
+ // return default
+ return "UTF-8", false
+ }
+ buf = buf[:n]
+ }
+
+ }
+
+ encoding, err := base.DetectEncoding(buf)
+ if err != nil {
+ // just default to utf-8 and no bom
+ return "UTF-8", false
+ }
+ if encoding == "UTF-8" {
+ return encoding, bytes.Equal(buf[0:3], base.UTF8BOM)
+ }
+ charsetEncoding, _ := charset.Lookup(encoding)
+ if charsetEncoding == nil {
+ return "UTF-8", false
+ }
+
+ result, n, err := transform.String(charsetEncoding.NewDecoder(), string(buf))
+
+ if n > 2 {
+ return encoding, bytes.Equal([]byte(result)[0:3], base.UTF8BOM)
+ }
+
+ return encoding, false
+}
+
// CreateOrUpdateRepoFile adds or updates a file in the given repository
func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *UpdateRepoFileOptions) (*gitea.FileResponse, error) {
// If no branch name is set, assume master
@@ -118,6 +188,9 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
opts.LastCommitID = commit.ID.String()
}
+ encoding := "UTF-8"
+ bom := false
+
if !opts.IsNewFile {
fromEntry, err := commit.GetTreeEntryByPath(fromTreePath)
if err != nil {
@@ -151,6 +224,7 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
// haven't been made. We throw an error if one wasn't provided.
return nil, models.ErrSHAOrCommitIDNotProvided{}
}
+ encoding, bom = detectEncodingAndBOM(fromEntry, repo)
}
// For the path where this file will be created/updated, we need to make
@@ -235,9 +309,28 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
}
content := opts.Content
+ if bom {
+ content = string(base.UTF8BOM) + content
+ }
+ if encoding != "UTF-8" {
+ charsetEncoding, _ := charset.Lookup(encoding)
+ if charsetEncoding != nil {
+ result, _, err := transform.String(charsetEncoding.NewEncoder(), string(content))
+ if err != nil {
+ // Look if we can't encode back in to the original we should just stick with utf-8
+ log.Error("Error re-encoding %s (%s) as %s - will stay as UTF-8: %v", opts.TreePath, opts.FromTreePath, encoding, err)
+ result = content
+ }
+ content = result
+ } else {
+ log.Error("Unknown encoding: %s", encoding)
+ }
+ }
+ // Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content
+ opts.Content = content
var lfsMetaObject *models.LFSMetaObject
- if filename2attribute2info[treePath] != nil && filename2attribute2info[treePath]["filter"] == "lfs" {
+ if setting.LFS.StartServer && filename2attribute2info[treePath] != nil && filename2attribute2info[treePath]["filter"] == "lfs" {
// OK so we are supposed to LFS this data!
oid, err := models.GenerateLFSOid(strings.NewReader(opts.Content))
if err != nil {