From 511f6138d4b5b7a464a8fa3d7f8fc52bec3789a4 Mon Sep 17 00:00:00 2001 From: zeripath Date: Thu, 17 Dec 2020 14:00:47 +0000 Subject: Use native git variants by default with go-git variants as build tag (#13673) * Move last commit cache back into modules/git Signed-off-by: Andrew Thornton * Remove go-git from the interface for last commit cache Signed-off-by: Andrew Thornton * move cacheref to last_commit_cache Signed-off-by: Andrew Thornton * Remove go-git from routers/private/hook Signed-off-by: Andrew Thornton * Move FindLFSFiles to pipeline Signed-off-by: Andrew Thornton * Make no-go-git variants Signed-off-by: Andrew Thornton * Submodule RefID Signed-off-by: Andrew Thornton * fix issue with GetCommitsInfo Signed-off-by: Andrew Thornton * fix GetLastCommitForPaths Signed-off-by: Andrew Thornton * Improve efficiency Signed-off-by: Andrew Thornton * More efficiency Signed-off-by: Andrew Thornton * even faster Signed-off-by: Andrew Thornton * Reduce duplication * As per @lunny Signed-off-by: Andrew Thornton * attempt to fix drone Signed-off-by: Andrew Thornton * fix test-tags Signed-off-by: Andrew Thornton * default to use no-go-git variants and add gogit build tag Signed-off-by: Andrew Thornton * placate lint Signed-off-by: Andrew Thornton * as per @6543 Signed-off-by: Andrew Thornton Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: techknowlogick --- modules/git/pipeline/lfs.go | 159 +++++++++++++++++++++ modules/git/pipeline/lfs_nogogit.go | 266 ++++++++++++++++++++++++++++++++++++ 2 files changed, 425 insertions(+) create mode 100644 modules/git/pipeline/lfs.go create mode 100644 modules/git/pipeline/lfs_nogogit.go (limited to 'modules/git/pipeline') diff --git a/modules/git/pipeline/lfs.go b/modules/git/pipeline/lfs.go new file mode 100644 index 0000000000..d47b7d91ea --- /dev/null +++ b/modules/git/pipeline/lfs.go @@ -0,0 +1,159 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// +build gogit + +package pipeline + +import ( + "bufio" + "fmt" + "io" + "sort" + "strings" + "sync" + "time" + + "code.gitea.io/gitea/modules/git" + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/object" +) + +// LFSResult represents commits found using a provided pointer file hash +type LFSResult struct { + Name string + SHA string + Summary string + When time.Time + ParentHashes []git.SHA1 + BranchName string + FullCommitName string +} + +type lfsResultSlice []*LFSResult + +func (a lfsResultSlice) Len() int { return len(a) } +func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) } + +// FindLFSFile finds commits that contain a provided pointer file hash +func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) { + resultsMap := map[string]*LFSResult{} + results := make([]*LFSResult, 0) + + basePath := repo.Path + gogitRepo := repo.GoGitRepo() + + commitsIter, err := gogitRepo.Log(&gogit.LogOptions{ + Order: gogit.LogOrderCommitterTime, + All: true, + }) + if err != nil { + return nil, fmt.Errorf("Failed to get GoGit CommitsIter. Error: %w", err) + } + + err = commitsIter.ForEach(func(gitCommit *object.Commit) error { + tree, err := gitCommit.Tree() + if err != nil { + return err + } + treeWalker := object.NewTreeWalker(tree, true, nil) + defer treeWalker.Close() + for { + name, entry, err := treeWalker.Next() + if err == io.EOF { + break + } + if entry.Hash == hash { + result := LFSResult{ + Name: name, + SHA: gitCommit.Hash.String(), + Summary: strings.Split(strings.TrimSpace(gitCommit.Message), "\n")[0], + When: gitCommit.Author.When, + ParentHashes: gitCommit.ParentHashes, + } + resultsMap[gitCommit.Hash.String()+":"+name] = &result + } + } + return nil + }) + if err != nil && err != io.EOF { + return nil, fmt.Errorf("Failure in CommitIter.ForEach: %w", err) + } + + for _, result := range resultsMap { + hasParent := false + for _, parentHash := range result.ParentHashes { + if _, hasParent = resultsMap[parentHash.String()+":"+result.Name]; hasParent { + break + } + } + if !hasParent { + results = append(results, result) + } + } + + sort.Sort(lfsResultSlice(results)) + + // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple + shasToNameReader, shasToNameWriter := io.Pipe() + nameRevStdinReader, nameRevStdinWriter := io.Pipe() + errChan := make(chan error, 1) + wg := sync.WaitGroup{} + wg.Add(3) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(nameRevStdinReader) + i := 0 + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + result := results[i] + result.FullCommitName = line + result.BranchName = strings.Split(line, "~")[0] + i++ + } + }() + go NameRevStdin(shasToNameReader, nameRevStdinWriter, &wg, basePath) + go func() { + defer wg.Done() + defer shasToNameWriter.Close() + for _, result := range results { + i := 0 + if i < len(result.SHA) { + n, err := shasToNameWriter.Write([]byte(result.SHA)[i:]) + if err != nil { + errChan <- err + break + } + i += n + } + n := 0 + for n < 1 { + n, err = shasToNameWriter.Write([]byte{'\n'}) + if err != nil { + errChan <- err + break + } + + } + + } + }() + + wg.Wait() + + select { + case err, has := <-errChan: + if has { + return nil, fmt.Errorf("Unable to obtain name for LFS files. Error: %w", err) + } + default: + } + + return results, nil +} diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go new file mode 100644 index 0000000000..30d33e27e0 --- /dev/null +++ b/modules/git/pipeline/lfs_nogogit.go @@ -0,0 +1,266 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// +build !gogit + +package pipeline + +import ( + "bufio" + "bytes" + "fmt" + "io" + "sort" + "strings" + "sync" + "time" + + "code.gitea.io/gitea/modules/git" +) + +// LFSResult represents commits found using a provided pointer file hash +type LFSResult struct { + Name string + SHA string + Summary string + When time.Time + ParentHashes []git.SHA1 + BranchName string + FullCommitName string +} + +type lfsResultSlice []*LFSResult + +func (a lfsResultSlice) Len() int { return len(a) } +func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) } + +// FindLFSFile finds commits that contain a provided pointer file hash +func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) { + resultsMap := map[string]*LFSResult{} + results := make([]*LFSResult, 0) + + basePath := repo.Path + + hashStr := hash.String() + + // Use rev-list to provide us with all commits in order + revListReader, revListWriter := io.Pipe() + defer func() { + _ = revListWriter.Close() + _ = revListReader.Close() + }() + + go func() { + stderr := strings.Builder{} + err := git.NewCommand("rev-list", "--all").RunInDirPipeline(repo.Path, revListWriter, &stderr) + if err != nil { + _ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String())) + } else { + _ = revListWriter.Close() + } + }() + + // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. + // so let's create a batch stdin and stdout + batchStdinReader, batchStdinWriter := io.Pipe() + batchStdoutReader, batchStdoutWriter := io.Pipe() + defer func() { + _ = batchStdinReader.Close() + _ = batchStdinWriter.Close() + _ = batchStdoutReader.Close() + _ = batchStdoutWriter.Close() + }() + + go func() { + stderr := strings.Builder{} + err := git.NewCommand("cat-file", "--batch").RunInDirFullPipeline(repo.Path, batchStdoutWriter, &stderr, batchStdinReader) + if err != nil { + _ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String())) + } else { + _ = revListWriter.Close() + } + }() + + // For simplicities sake we'll us a buffered reader to read from the cat-file --batch + batchReader := bufio.NewReader(batchStdoutReader) + + // We'll use a scanner for the revList because it's simpler than a bufio.Reader + scan := bufio.NewScanner(revListReader) + trees := [][]byte{} + paths := []string{} + + fnameBuf := make([]byte, 4096) + modeBuf := make([]byte, 40) + workingShaBuf := make([]byte, 40) + + for scan.Scan() { + // Get the next commit ID + commitID := scan.Bytes() + + // push the commit to the cat-file --batch process + _, err := batchStdinWriter.Write(commitID) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte{'\n'}) + if err != nil { + return nil, err + } + + var curCommit *git.Commit + curPath := "" + + commitReadingLoop: + for { + _, typ, size, err := git.ReadBatchLine(batchReader) + if err != nil { + return nil, err + } + + switch typ { + case "tag": + // This shouldn't happen but if it does well just get the commit and try again + id, err := git.ReadTagObjectID(batchReader, size) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte(id + "\n")) + if err != nil { + return nil, err + } + continue + case "commit": + // Read in the commit to get its tree and in case this is one of the last used commits + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, int64(size))) + if err != nil { + return nil, err + } + + _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")) + if err != nil { + return nil, err + } + curPath = "" + case "tree": + var n int64 + for n < size { + mode, fname, sha, count, err := git.ParseTreeLine(batchReader, modeBuf, fnameBuf, workingShaBuf) + if err != nil { + return nil, err + } + n += int64(count) + if bytes.Equal(sha, []byte(hashStr)) { + result := LFSResult{ + Name: curPath + string(fname), + SHA: curCommit.ID.String(), + Summary: strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0], + When: curCommit.Author.When, + ParentHashes: curCommit.Parents, + } + resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result + } else if string(mode) == git.EntryModeTree.String() { + trees = append(trees, sha) + paths = append(paths, curPath+string(fname)+"/") + } + } + if len(trees) > 0 { + _, err := batchStdinWriter.Write(trees[len(trees)-1]) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte("\n")) + if err != nil { + return nil, err + } + curPath = paths[len(paths)-1] + trees = trees[:len(trees)-1] + paths = paths[:len(paths)-1] + } else { + break commitReadingLoop + } + } + } + } + + if err := scan.Err(); err != nil { + return nil, err + } + + for _, result := range resultsMap { + hasParent := false + for _, parentHash := range result.ParentHashes { + if _, hasParent = resultsMap[parentHash.String()+":"+result.Name]; hasParent { + break + } + } + if !hasParent { + results = append(results, result) + } + } + + sort.Sort(lfsResultSlice(results)) + + // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple + shasToNameReader, shasToNameWriter := io.Pipe() + nameRevStdinReader, nameRevStdinWriter := io.Pipe() + errChan := make(chan error, 1) + wg := sync.WaitGroup{} + wg.Add(3) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(nameRevStdinReader) + i := 0 + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + result := results[i] + result.FullCommitName = line + result.BranchName = strings.Split(line, "~")[0] + i++ + } + }() + go NameRevStdin(shasToNameReader, nameRevStdinWriter, &wg, basePath) + go func() { + defer wg.Done() + defer shasToNameWriter.Close() + for _, result := range results { + i := 0 + if i < len(result.SHA) { + n, err := shasToNameWriter.Write([]byte(result.SHA)[i:]) + if err != nil { + errChan <- err + break + } + i += n + } + var err error + n := 0 + for n < 1 { + n, err = shasToNameWriter.Write([]byte{'\n'}) + if err != nil { + errChan <- err + break + } + + } + + } + }() + + wg.Wait() + + select { + case err, has := <-errChan: + if has { + return nil, fmt.Errorf("Unable to obtain name for LFS files. Error: %w", err) + } + default: + } + + return results, nil +} -- cgit v1.2.3