diff options
author | zeripath <art27@cantab.net> | 2020-12-17 14:00:47 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-17 22:00:47 +0800 |
commit | 511f6138d4b5b7a464a8fa3d7f8fc52bec3789a4 (patch) | |
tree | 126d29951a505dfe499357131b31b0bde57a7896 /modules/git/pipeline | |
parent | 0851a895819e0a5a1a79dcbd596d4c93d4d47a76 (diff) | |
download | gitea-511f6138d4b5b7a464a8fa3d7f8fc52bec3789a4.tar.gz gitea-511f6138d4b5b7a464a8fa3d7f8fc52bec3789a4.zip |
Use native git variants by default with go-git variants as build tag (#13673)
* Move last commit cache back into modules/git
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Remove go-git from the interface for last commit cache
Signed-off-by: Andrew Thornton <art27@cantab.net>
* move cacheref to last_commit_cache
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Remove go-git from routers/private/hook
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Move FindLFSFiles to pipeline
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Make no-go-git variants
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Submodule RefID
Signed-off-by: Andrew Thornton <art27@cantab.net>
* fix issue with GetCommitsInfo
Signed-off-by: Andrew Thornton <art27@cantab.net>
* fix GetLastCommitForPaths
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Improve efficiency
Signed-off-by: Andrew Thornton <art27@cantab.net>
* More efficiency
Signed-off-by: Andrew Thornton <art27@cantab.net>
* even faster
Signed-off-by: Andrew Thornton <art27@cantab.net>
* Reduce duplication
* As per @lunny
Signed-off-by: Andrew Thornton <art27@cantab.net>
* attempt to fix drone
Signed-off-by: Andrew Thornton <art27@cantab.net>
* fix test-tags
Signed-off-by: Andrew Thornton <art27@cantab.net>
* default to use no-go-git variants and add gogit build tag
Signed-off-by: Andrew Thornton <art27@cantab.net>
* placate lint
Signed-off-by: Andrew Thornton <art27@cantab.net>
* as per @6543
Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: 6543 <6543@obermui.de>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Diffstat (limited to 'modules/git/pipeline')
-rw-r--r-- | modules/git/pipeline/lfs.go | 159 | ||||
-rw-r--r-- | modules/git/pipeline/lfs_nogogit.go | 266 |
2 files changed, 425 insertions, 0 deletions
diff --git a/modules/git/pipeline/lfs.go b/modules/git/pipeline/lfs.go new file mode 100644 index 0000000000..d47b7d91ea --- /dev/null +++ b/modules/git/pipeline/lfs.go @@ -0,0 +1,159 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// +build gogit + +package pipeline + +import ( + "bufio" + "fmt" + "io" + "sort" + "strings" + "sync" + "time" + + "code.gitea.io/gitea/modules/git" + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/object" +) + +// LFSResult represents commits found using a provided pointer file hash +type LFSResult struct { + Name string + SHA string + Summary string + When time.Time + ParentHashes []git.SHA1 + BranchName string + FullCommitName string +} + +type lfsResultSlice []*LFSResult + +func (a lfsResultSlice) Len() int { return len(a) } +func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) } + +// FindLFSFile finds commits that contain a provided pointer file hash +func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) { + resultsMap := map[string]*LFSResult{} + results := make([]*LFSResult, 0) + + basePath := repo.Path + gogitRepo := repo.GoGitRepo() + + commitsIter, err := gogitRepo.Log(&gogit.LogOptions{ + Order: gogit.LogOrderCommitterTime, + All: true, + }) + if err != nil { + return nil, fmt.Errorf("Failed to get GoGit CommitsIter. Error: %w", err) + } + + err = commitsIter.ForEach(func(gitCommit *object.Commit) error { + tree, err := gitCommit.Tree() + if err != nil { + return err + } + treeWalker := object.NewTreeWalker(tree, true, nil) + defer treeWalker.Close() + for { + name, entry, err := treeWalker.Next() + if err == io.EOF { + break + } + if entry.Hash == hash { + result := LFSResult{ + Name: name, + SHA: gitCommit.Hash.String(), + Summary: strings.Split(strings.TrimSpace(gitCommit.Message), "\n")[0], + When: gitCommit.Author.When, + ParentHashes: gitCommit.ParentHashes, + } + resultsMap[gitCommit.Hash.String()+":"+name] = &result + } + } + return nil + }) + if err != nil && err != io.EOF { + return nil, fmt.Errorf("Failure in CommitIter.ForEach: %w", err) + } + + for _, result := range resultsMap { + hasParent := false + for _, parentHash := range result.ParentHashes { + if _, hasParent = resultsMap[parentHash.String()+":"+result.Name]; hasParent { + break + } + } + if !hasParent { + results = append(results, result) + } + } + + sort.Sort(lfsResultSlice(results)) + + // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple + shasToNameReader, shasToNameWriter := io.Pipe() + nameRevStdinReader, nameRevStdinWriter := io.Pipe() + errChan := make(chan error, 1) + wg := sync.WaitGroup{} + wg.Add(3) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(nameRevStdinReader) + i := 0 + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + result := results[i] + result.FullCommitName = line + result.BranchName = strings.Split(line, "~")[0] + i++ + } + }() + go NameRevStdin(shasToNameReader, nameRevStdinWriter, &wg, basePath) + go func() { + defer wg.Done() + defer shasToNameWriter.Close() + for _, result := range results { + i := 0 + if i < len(result.SHA) { + n, err := shasToNameWriter.Write([]byte(result.SHA)[i:]) + if err != nil { + errChan <- err + break + } + i += n + } + n := 0 + for n < 1 { + n, err = shasToNameWriter.Write([]byte{'\n'}) + if err != nil { + errChan <- err + break + } + + } + + } + }() + + wg.Wait() + + select { + case err, has := <-errChan: + if has { + return nil, fmt.Errorf("Unable to obtain name for LFS files. Error: %w", err) + } + default: + } + + return results, nil +} diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go new file mode 100644 index 0000000000..30d33e27e0 --- /dev/null +++ b/modules/git/pipeline/lfs_nogogit.go @@ -0,0 +1,266 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// +build !gogit + +package pipeline + +import ( + "bufio" + "bytes" + "fmt" + "io" + "sort" + "strings" + "sync" + "time" + + "code.gitea.io/gitea/modules/git" +) + +// LFSResult represents commits found using a provided pointer file hash +type LFSResult struct { + Name string + SHA string + Summary string + When time.Time + ParentHashes []git.SHA1 + BranchName string + FullCommitName string +} + +type lfsResultSlice []*LFSResult + +func (a lfsResultSlice) Len() int { return len(a) } +func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) } + +// FindLFSFile finds commits that contain a provided pointer file hash +func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) { + resultsMap := map[string]*LFSResult{} + results := make([]*LFSResult, 0) + + basePath := repo.Path + + hashStr := hash.String() + + // Use rev-list to provide us with all commits in order + revListReader, revListWriter := io.Pipe() + defer func() { + _ = revListWriter.Close() + _ = revListReader.Close() + }() + + go func() { + stderr := strings.Builder{} + err := git.NewCommand("rev-list", "--all").RunInDirPipeline(repo.Path, revListWriter, &stderr) + if err != nil { + _ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String())) + } else { + _ = revListWriter.Close() + } + }() + + // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. + // so let's create a batch stdin and stdout + batchStdinReader, batchStdinWriter := io.Pipe() + batchStdoutReader, batchStdoutWriter := io.Pipe() + defer func() { + _ = batchStdinReader.Close() + _ = batchStdinWriter.Close() + _ = batchStdoutReader.Close() + _ = batchStdoutWriter.Close() + }() + + go func() { + stderr := strings.Builder{} + err := git.NewCommand("cat-file", "--batch").RunInDirFullPipeline(repo.Path, batchStdoutWriter, &stderr, batchStdinReader) + if err != nil { + _ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String())) + } else { + _ = revListWriter.Close() + } + }() + + // For simplicities sake we'll us a buffered reader to read from the cat-file --batch + batchReader := bufio.NewReader(batchStdoutReader) + + // We'll use a scanner for the revList because it's simpler than a bufio.Reader + scan := bufio.NewScanner(revListReader) + trees := [][]byte{} + paths := []string{} + + fnameBuf := make([]byte, 4096) + modeBuf := make([]byte, 40) + workingShaBuf := make([]byte, 40) + + for scan.Scan() { + // Get the next commit ID + commitID := scan.Bytes() + + // push the commit to the cat-file --batch process + _, err := batchStdinWriter.Write(commitID) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte{'\n'}) + if err != nil { + return nil, err + } + + var curCommit *git.Commit + curPath := "" + + commitReadingLoop: + for { + _, typ, size, err := git.ReadBatchLine(batchReader) + if err != nil { + return nil, err + } + + switch typ { + case "tag": + // This shouldn't happen but if it does well just get the commit and try again + id, err := git.ReadTagObjectID(batchReader, size) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte(id + "\n")) + if err != nil { + return nil, err + } + continue + case "commit": + // Read in the commit to get its tree and in case this is one of the last used commits + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, int64(size))) + if err != nil { + return nil, err + } + + _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")) + if err != nil { + return nil, err + } + curPath = "" + case "tree": + var n int64 + for n < size { + mode, fname, sha, count, err := git.ParseTreeLine(batchReader, modeBuf, fnameBuf, workingShaBuf) + if err != nil { + return nil, err + } + n += int64(count) + if bytes.Equal(sha, []byte(hashStr)) { + result := LFSResult{ + Name: curPath + string(fname), + SHA: curCommit.ID.String(), + Summary: strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0], + When: curCommit.Author.When, + ParentHashes: curCommit.Parents, + } + resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result + } else if string(mode) == git.EntryModeTree.String() { + trees = append(trees, sha) + paths = append(paths, curPath+string(fname)+"/") + } + } + if len(trees) > 0 { + _, err := batchStdinWriter.Write(trees[len(trees)-1]) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte("\n")) + if err != nil { + return nil, err + } + curPath = paths[len(paths)-1] + trees = trees[:len(trees)-1] + paths = paths[:len(paths)-1] + } else { + break commitReadingLoop + } + } + } + } + + if err := scan.Err(); err != nil { + return nil, err + } + + for _, result := range resultsMap { + hasParent := false + for _, parentHash := range result.ParentHashes { + if _, hasParent = resultsMap[parentHash.String()+":"+result.Name]; hasParent { + break + } + } + if !hasParent { + results = append(results, result) + } + } + + sort.Sort(lfsResultSlice(results)) + + // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple + shasToNameReader, shasToNameWriter := io.Pipe() + nameRevStdinReader, nameRevStdinWriter := io.Pipe() + errChan := make(chan error, 1) + wg := sync.WaitGroup{} + wg.Add(3) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(nameRevStdinReader) + i := 0 + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + result := results[i] + result.FullCommitName = line + result.BranchName = strings.Split(line, "~")[0] + i++ + } + }() + go NameRevStdin(shasToNameReader, nameRevStdinWriter, &wg, basePath) + go func() { + defer wg.Done() + defer shasToNameWriter.Close() + for _, result := range results { + i := 0 + if i < len(result.SHA) { + n, err := shasToNameWriter.Write([]byte(result.SHA)[i:]) + if err != nil { + errChan <- err + break + } + i += n + } + var err error + n := 0 + for n < 1 { + n, err = shasToNameWriter.Write([]byte{'\n'}) + if err != nil { + errChan <- err + break + } + + } + + } + }() + + wg.Wait() + + select { + case err, has := <-errChan: + if has { + return nil, fmt.Errorf("Unable to obtain name for LFS files. Error: %w", err) + } + default: + } + + return results, nil +} |