aboutsummaryrefslogtreecommitdiffstats
path: root/services
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2023-07-12 17:58:27 +0800
committerGitHub <noreply@github.com>2023-07-12 09:58:27 +0000
commit22eeede885327fca0328b7d5b153e7a6c4211ffa (patch)
treecc130a4864921f227783a53da5249e3926051f02 /services
parentd1e066f5d6e1ba91f45118de835c3777eee0811f (diff)
downloadgitea-22eeede885327fca0328b7d5b153e7a6c4211ffa.tar.gz
gitea-22eeede885327fca0328b7d5b153e7a6c4211ffa.zip
Do not "guess" the file encoding/BOM when using API to upload files (#25828)
Related issue: #18368 It doesn't seem right to "guess" the file encoding/BOM when using API to upload files. The API should save the uploaded content as-is.
Diffstat (limited to 'services')
-rw-r--r--services/repository/files/update.go105
1 files changed, 3 insertions, 102 deletions
diff --git a/services/repository/files/update.go b/services/repository/files/update.go
index 737f914dd6..1d5f10a3f2 100644
--- a/services/repository/files/update.go
+++ b/services/repository/files/update.go
@@ -4,7 +4,6 @@
package files
import (
- "bytes"
"context"
"fmt"
"path"
@@ -12,21 +11,15 @@ import (
"time"
"code.gitea.io/gitea/models"
- "code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
- "code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/structs"
- "code.gitea.io/gitea/modules/util"
asymkey_service "code.gitea.io/gitea/services/asymkey"
-
- stdcharset "golang.org/x/net/html/charset"
- "golang.org/x/text/transform"
)
// IdentityOptions for a person's identity like an author or committer
@@ -66,78 +59,9 @@ type ChangeRepoFilesOptions struct {
type RepoFileOptions struct {
treePath string
fromTreePath string
- encoding string
- bom bool
executable bool
}
-func detectEncodingAndBOM(entry *git.TreeEntry, repo *repo_model.Repository) (string, bool) {
- reader, err := entry.Blob().DataAsync()
- if err != nil {
- // return default
- return "UTF-8", false
- }
- defer reader.Close()
- buf := make([]byte, 1024)
- n, err := util.ReadAtMost(reader, buf)
- if err != nil {
- // return default
- return "UTF-8", false
- }
- buf = buf[:n]
-
- if setting.LFS.StartServer {
- pointer, _ := lfs.ReadPointerFromBuffer(buf)
- if pointer.IsValid() {
- meta, err := git_model.GetLFSMetaObjectByOid(db.DefaultContext, repo.ID, pointer.Oid)
- if err != nil && err != git_model.ErrLFSObjectNotExist {
- // return default
- return "UTF-8", false
- }
- if meta != nil {
- dataRc, err := lfs.ReadMetaObject(pointer)
- if err != nil {
- // return default
- return "UTF-8", false
- }
- defer dataRc.Close()
- buf = make([]byte, 1024)
- n, err = util.ReadAtMost(dataRc, buf)
- if err != nil {
- // return default
- return "UTF-8", false
- }
- buf = buf[:n]
- }
- }
- }
-
- encoding, err := charset.DetectEncoding(buf)
- if err != nil {
- // just default to utf-8 and no bom
- return "UTF-8", false
- }
- if encoding == "UTF-8" {
- return encoding, bytes.Equal(buf[0:3], charset.UTF8BOM)
- }
- charsetEncoding, _ := stdcharset.Lookup(encoding)
- if charsetEncoding == nil {
- return "UTF-8", false
- }
-
- result, n, err := transform.String(charsetEncoding.NewDecoder(), string(buf))
- if err != nil {
- // return default
- return "UTF-8", false
- }
-
- if n > 2 {
- return encoding, bytes.Equal([]byte(result)[0:3], charset.UTF8BOM)
- }
-
- return encoding, false
-}
-
// ChangeRepoFiles adds, updates or removes multiple files in the given repository
func ChangeRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *user_model.User, opts *ChangeRepoFilesOptions) (*structs.FilesResponse, error) {
// If no branch name is set, assume default branch
@@ -184,8 +108,6 @@ func ChangeRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
file.Options = &RepoFileOptions{
treePath: treePath,
fromTreePath: fromTreePath,
- encoding: "UTF-8",
- bom: false,
executable: false,
}
treePaths = append(treePaths, treePath)
@@ -381,7 +303,6 @@ func handleCheckErrors(file *ChangeRepoFile, commit *git.Commit, opts *ChangeRep
// haven't been made. We throw an error if one wasn't provided.
return models.ErrSHAOrCommitIDNotProvided{}
}
- file.Options.encoding, file.Options.bom = detectEncodingAndBOM(fromEntry, repo)
file.Options.executable = fromEntry.IsExecutable()
}
if file.Operation == "create" || file.Operation == "update" {
@@ -466,28 +387,8 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
}
}
- content := file.Content
- if file.Options.bom {
- content = string(charset.UTF8BOM) + content
- }
- if file.Options.encoding != "UTF-8" {
- charsetEncoding, _ := stdcharset.Lookup(file.Options.encoding)
- if charsetEncoding != nil {
- result, _, err := transform.String(charsetEncoding.NewEncoder(), content)
- if err != nil {
- // Look if we can't encode back in to the original we should just stick with utf-8
- log.Error("Error re-encoding %s (%s) as %s - will stay as UTF-8: %v", file.TreePath, file.FromTreePath, file.Options.encoding, err)
- result = content
- }
- content = result
- } else {
- log.Error("Unknown encoding: %s", file.Options.encoding)
- }
- }
- // Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content
- file.Content = content
+ treeObjectContent := file.Content
var lfsMetaObject *git_model.LFSMetaObject
-
if setting.LFS.StartServer && hasOldBranch {
// Check there is no way this can return multiple infos
filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
@@ -506,12 +407,12 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
return err
}
lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: repoID}
- content = pointer.StringContent()
+ treeObjectContent = pointer.StringContent()
}
}
// Add the object to the database
- objectHash, err := t.HashObject(strings.NewReader(content))
+ objectHash, err := t.HashObject(strings.NewReader(treeObjectContent))
if err != nil {
return err
}