From 2af67f6044af1cad7136ce8c123e37ab090ca9bc Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 19 Apr 2019 14:17:27 +0200 Subject: Improve listing performance by using go-git (#6478) * Use go-git for tree reading and commit info lookup. Signed-off-by: Filip Navara * Use TreeEntry.IsRegular() instead of ObjectType that was removed. Signed-off-by: Filip Navara * Use the treePath to optimize commit info search. Signed-off-by: Filip Navara * Extract the latest commit at treePath along with the other commits. Signed-off-by: Filip Navara * Fix listing commit info for a directory that was created in one commit and never modified after. Signed-off-by: Filip Navara * Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit). Signed-off-by: Filip Navara * Use go-git for reading blobs. Signed-off-by: Filip Navara * Make SHA1 type alias for plumbing.Hash in go-git. Signed-off-by: Filip Navara * Make Signature type alias for object.Signature in go-git. Signed-off-by: Filip Navara * Fix GetCommitsInfo for repository with only one commit. Signed-off-by: Filip Navara * Fix PGP signature verification. Signed-off-by: Filip Navara * Fix issues with walking commit graph across merges. Signed-off-by: Filip Navara * Fix typo in condition. Signed-off-by: Filip Navara * Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes). Signed-off-by: Filip Navara * Fix lising submodules. Signed-off-by: Filip Navara * Fix build Signed-off-by: Filip Navara * Add back commit cache because of name-rev Signed-off-by: Filip Navara * Fix tests Signed-off-by: Filip Navara * Fix code style * Fix spelling * Address PR feedback Signed-off-by: Filip Navara * Update vendor module list Signed-off-by: Filip Navara * Fix getting trees by commit id Signed-off-by: Filip Navara * Fix remaining unit test failures * Fix GetTreeBySHA * Avoid running `git name-rev` if not necessary Signed-off-by: Filip Navara * Move Branch code to git module * Clean up GPG signature verification and fix it for tagged commits * Address PR feedback (import formatting, copyright headers) * Make blob lookup by SHA working * Update tests to use public API * Allow getting content from any type of object through the blob interface * Change test to actually expect the object content that is in the GIT repository * Change one more test to actually expect the object content that is in the GIT repository * Add comments --- modules/git/blob.go | 66 ++++++++++++----------------------------------------- 1 file changed, 15 insertions(+), 51 deletions(-) (limited to 'modules/git/blob.go') diff --git a/modules/git/blob.go b/modules/git/blob.go index e194b973db..171b4a1010 100644 --- a/modules/git/blob.go +++ b/modules/git/blob.go @@ -1,76 +1,40 @@ // Copyright 2015 The Gogs Authors. All rights reserved. +// Copyright 2019 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. package git import ( - "bytes" "encoding/base64" - "fmt" "io" "io/ioutil" - "os" - "os/exec" + + "gopkg.in/src-d/go-git.v4/plumbing" ) // Blob represents a Git object. type Blob struct { - repo *Repository - *TreeEntry -} - -// Data gets content of blob all at once and wrap it as io.Reader. -// This can be very slow and memory consuming for huge content. -func (b *Blob) Data() (io.Reader, error) { - stdout := new(bytes.Buffer) - stderr := new(bytes.Buffer) - - // Preallocate memory to save ~50% memory usage on big files. - stdout.Grow(int(b.Size() + 2048)) - - if err := b.DataPipeline(stdout, stderr); err != nil { - return nil, concatenateError(err, stderr.String()) - } - return stdout, nil -} + ID SHA1 -// DataPipeline gets content of blob and write the result or error to stdout or stderr -func (b *Blob) DataPipeline(stdout, stderr io.Writer) error { - return NewCommand("show", b.ID.String()).RunInDirPipeline(b.repo.Path, stdout, stderr) -} - -type cmdReadCloser struct { - cmd *exec.Cmd - stdout io.Reader -} - -func (c cmdReadCloser) Read(p []byte) (int, error) { - return c.stdout.Read(p) -} - -func (c cmdReadCloser) Close() error { - io.Copy(ioutil.Discard, c.stdout) - return c.cmd.Wait() + gogitEncodedObj plumbing.EncodedObject + name string } // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - cmd := exec.Command("git", "show", b.ID.String()) - cmd.Dir = b.repo.Path - cmd.Stderr = os.Stderr - - stdout, err := cmd.StdoutPipe() - if err != nil { - return nil, fmt.Errorf("StdoutPipe: %v", err) - } + return b.gogitEncodedObj.Reader() +} - if err = cmd.Start(); err != nil { - return nil, fmt.Errorf("Start: %v", err) - } +// Size returns the uncompressed size of the blob +func (b *Blob) Size() int64 { + return b.gogitEncodedObj.Size() +} - return cmdReadCloser{stdout: stdout, cmd: cmd}, nil +// Name returns name of the tree entry this blob object was created from (or empty string) +func (b *Blob) Name() string { + return b.name } // GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string -- cgit v1.2.3