* Reduce calls to git cat-file -s There are multiple places where there are repeated calls to git cat-file -s due to the blobs not being created with their size. Through judicious use of git ls-tree -l and slight adjustments to the indexer code we can avoid a lot of these calls. * simplify by always expecting the long format * Also always set the sized field and tell the indexer the update is sizedtags/v1.15.0-dev
@@ -10,12 +10,13 @@ import ( | |||
"bytes" | |||
"fmt" | |||
"strconv" | |||
"strings" | |||
"github.com/go-git/go-git/v5/plumbing/filemode" | |||
"github.com/go-git/go-git/v5/plumbing/object" | |||
) | |||
// ParseTreeEntries parses the output of a `git ls-tree` command. | |||
// ParseTreeEntries parses the output of a `git ls-tree -l` command. | |||
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | |||
return parseTreeEntries(data, nil) | |||
} | |||
@@ -23,7 +24,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | |||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | |||
entries := make([]*TreeEntry, 0, 10) | |||
for pos := 0; pos < len(data); { | |||
// expect line to be of the form "<mode> <type> <sha>\t<filename>" | |||
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>" | |||
entry := new(TreeEntry) | |||
entry.gogitTreeEntry = &object.TreeEntry{} | |||
entry.ptree = ptree | |||
@@ -61,7 +62,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | |||
entry.gogitTreeEntry.Hash = id | |||
pos += 41 // skip over sha and trailing space | |||
end := pos + bytes.IndexByte(data[pos:], '\n') | |||
end := pos + bytes.IndexByte(data[pos:], '\t') | |||
if end < pos { | |||
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) | |||
} | |||
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) | |||
entry.sized = true | |||
pos = end + 1 | |||
end = pos + bytes.IndexByte(data[pos:], '\n') | |||
if end < pos { | |||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | |||
} |
@@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) { | |||
Expected: []*TreeEntry{}, | |||
}, | |||
{ | |||
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n", | |||
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 1022\texample/file2.txt\n", | |||
Expected: []*TreeEntry{ | |||
{ | |||
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), | |||
@@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) { | |||
Name: "example/file2.txt", | |||
Mode: filemode.Regular, | |||
}, | |||
size: 1022, | |||
sized: true, | |||
}, | |||
}, | |||
}, | |||
{ | |||
Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" + | |||
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n", | |||
Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 234131\t\"example/\\n.txt\"\n" + | |||
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8 -\texample\n", | |||
Expected: []*TreeEntry{ | |||
{ | |||
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), | |||
@@ -47,9 +49,12 @@ func TestParseTreeEntries(t *testing.T) { | |||
Name: "example/\n.txt", | |||
Mode: filemode.Symlink, | |||
}, | |||
size: 234131, | |||
sized: true, | |||
}, | |||
{ | |||
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), | |||
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), | |||
sized: true, | |||
gogitTreeEntry: &object.TreeEntry{ | |||
Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), | |||
Name: "example", |
@@ -10,9 +10,10 @@ import ( | |||
"bytes" | |||
"fmt" | |||
"strconv" | |||
"strings" | |||
) | |||
// ParseTreeEntries parses the output of a `git ls-tree` command. | |||
// ParseTreeEntries parses the output of a `git ls-tree -l` command. | |||
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | |||
return parseTreeEntries(data, nil) | |||
} | |||
@@ -20,7 +21,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | |||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | |||
entries := make([]*TreeEntry, 0, 10) | |||
for pos := 0; pos < len(data); { | |||
// expect line to be of the form "<mode> <type> <sha>\t<filename>" | |||
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>" | |||
entry := new(TreeEntry) | |||
entry.ptree = ptree | |||
if pos+6 > len(data) { | |||
@@ -56,7 +57,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | |||
entry.ID = id | |||
pos += 41 // skip over sha and trailing space | |||
end := pos + bytes.IndexByte(data[pos:], '\n') | |||
end := pos + bytes.IndexByte(data[pos:], '\t') | |||
if end < pos { | |||
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) | |||
} | |||
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) | |||
entry.sized = true | |||
pos = end + 1 | |||
end = pos + bytes.IndexByte(data[pos:], '\n') | |||
if end < pos { | |||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | |||
} |
@@ -0,0 +1,70 @@ | |||
// Copyright 2021 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
// +build !gogit | |||
package git | |||
import ( | |||
"testing" | |||
"github.com/stretchr/testify/assert" | |||
) | |||
func TestParseTreeEntries(t *testing.T) { | |||
testCases := []struct { | |||
Input string | |||
Expected []*TreeEntry | |||
}{ | |||
{ | |||
Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af 8218 README.md | |||
100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35 4681 README_ZH.md | |||
100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8 429 SECURITY.md | |||
040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d - assets | |||
`, | |||
Expected: []*TreeEntry{ | |||
{ | |||
ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"), | |||
name: "README.md", | |||
entryMode: EntryModeBlob, | |||
size: 8218, | |||
sized: true, | |||
}, | |||
{ | |||
ID: MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"), | |||
name: "README_ZH.md", | |||
entryMode: EntryModeBlob, | |||
size: 4681, | |||
sized: true, | |||
}, | |||
{ | |||
ID: MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"), | |||
name: "SECURITY.md", | |||
entryMode: EntryModeBlob, | |||
size: 429, | |||
sized: true, | |||
}, | |||
{ | |||
ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), | |||
name: "assets", | |||
entryMode: EntryModeTree, | |||
sized: true, | |||
}, | |||
}, | |||
}, | |||
} | |||
for _, testCase := range testCases { | |||
entries, err := ParseTreeEntries([]byte(testCase.Input)) | |||
assert.NoError(t, err) | |||
assert.EqualValues(t, len(testCase.Expected), len(entries)) | |||
for i, entry := range entries { | |||
assert.EqualValues(t, testCase.Expected[i].ID, entry.ID) | |||
assert.EqualValues(t, testCase.Expected[i].name, entry.name) | |||
assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode) | |||
assert.EqualValues(t, testCase.Expected[i].sized, entry.sized) | |||
assert.EqualValues(t, testCase.Expected[i].size, entry.size) | |||
} | |||
} | |||
} |
@@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob { | |||
ID: te.ID, | |||
repoPath: te.ptree.repo.Path, | |||
name: te.Name(), | |||
size: te.size, | |||
gotSize: te.sized, | |||
} | |||
} |
@@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) { | |||
return t.entries, nil | |||
} | |||
stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path) | |||
stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path) | |||
if err != nil { | |||
if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") { | |||
return nil, ErrNotExist{ | |||
@@ -55,7 +55,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { | |||
if t.entriesRecursiveParsed { | |||
return t.entriesRecursive, nil | |||
} | |||
stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path) | |||
stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path) | |||
if err != nil { | |||
return nil, err | |||
} |
@@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode | |||
return nil | |||
} | |||
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | |||
RunInDir(repo.RepoPath()) | |||
if err != nil { | |||
return err | |||
size := update.Size | |||
if !update.Sized { | |||
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | |||
RunInDir(repo.RepoPath()) | |||
if err != nil { | |||
return err | |||
} | |||
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { | |||
return fmt.Errorf("Misformatted git cat-file output: %v", err) | |||
} | |||
} | |||
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | |||
return fmt.Errorf("Misformatted git cat-file output: %v", err) | |||
} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | |||
if size > setting.Indexer.MaxIndexerFileSize { | |||
return b.addDelete(update.Filename, repo, batch) | |||
} | |||
@@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo | |||
return nil, nil | |||
} | |||
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | |||
RunInDir(repo.RepoPath()) | |||
if err != nil { | |||
return nil, err | |||
size := update.Size | |||
if !update.Sized { | |||
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | |||
RunInDir(repo.RepoPath()) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { | |||
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) | |||
} | |||
} | |||
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | |||
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) | |||
} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | |||
if size > setting.Indexer.MaxIndexerFileSize { | |||
return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil | |||
} | |||
@@ -17,6 +17,8 @@ import ( | |||
type fileUpdate struct { | |||
Filename string | |||
BlobSha string | |||
Size int64 | |||
Sized bool | |||
} | |||
// repoChanges changes (file additions/updates/removals) to a repo | |||
@@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { | |||
updates[idxCount] = fileUpdate{ | |||
Filename: entry.Name(), | |||
BlobSha: entry.ID.String(), | |||
Size: entry.Size(), | |||
Sized: true, | |||
} | |||
idxCount++ | |||
} | |||
@@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { | |||
// genesisChanges get changes to add repo to the indexer for the first time | |||
func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) { | |||
var changes repoChanges | |||
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision). | |||
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision). | |||
RunInDirBytes(repo.RepoPath()) | |||
if err != nil { | |||
return nil, err | |||
@@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges, | |||
} | |||
} | |||
cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--") | |||
cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--") | |||
cmd.AddArguments(updatedFilenames...) | |||
lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath()) | |||
if err != nil { |