summaryrefslogtreecommitdiffstats
path: root/modules/git
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2022-10-08 01:20:53 +0800
committerGitHub <noreply@github.com>2022-10-08 01:20:53 +0800
commitc08e42c47ef2a32b3b7ee422c73d6929c93b199e (patch)
tree7425d3fa27e7ac65de4f251ed22db1511b21adc4 /modules/git
parent69fc510d6dcf9cda7993eae8cd5c7725b345a9a1 (diff)
downloadgitea-c08e42c47ef2a32b3b7ee422c73d6929c93b199e.tar.gz
gitea-c08e42c47ef2a32b3b7ee422c73d6929c93b199e.zip
Refactor parseTreeEntries, speed up tree list (#21368)
Close #20315 (fix the panic when parsing invalid input), Speed up #20231 (use ls-tree without size field) Introduce ListEntriesRecursiveFast (ls-tree without size) and ListEntriesRecursiveWithSize (ls-tree with size)
Diffstat (limited to 'modules/git')
-rw-r--r--modules/git/parse_nogogit.go78
-rw-r--r--modules/git/parse_nogogit_test.go48
-rw-r--r--modules/git/repo_language_stats_nogogit.go2
-rw-r--r--modules/git/tree_gogit.go9
-rw-r--r--modules/git/tree_nogogit.go19
5 files changed, 106 insertions, 50 deletions
diff --git a/modules/git/parse_nogogit.go b/modules/git/parse_nogogit.go
index c8f0f994fc..fb5b63def9 100644
--- a/modules/git/parse_nogogit.go
+++ b/modules/git/parse_nogogit.go
@@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
return parseTreeEntries(data, nil)
}
+var sepSpace = []byte{' '}
+
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
- entries := make([]*TreeEntry, 0, 10)
+ var err error
+ entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
for pos := 0; pos < len(data); {
- // expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
+ // expect line to be of the form:
+ // <mode> <type> <sha> <space-padded-size>\t<filename>
+ // <mode> <type> <sha>\t<filename>
+ posEnd := bytes.IndexByte(data[pos:], '\n')
+ if posEnd == -1 {
+ posEnd = len(data)
+ } else {
+ posEnd += pos
+ }
+ line := data[pos:posEnd]
+ posTab := bytes.IndexByte(line, '\t')
+ if posTab == -1 {
+ return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
+ }
+
entry := new(TreeEntry)
entry.ptree = ptree
- if pos+6 > len(data) {
- return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
+
+ entryAttrs := line[:posTab]
+ entryName := line[posTab+1:]
+
+ entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
+ _ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type
+ entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
+ if len(entryAttrs) > 0 {
+ entrySize := entryAttrs // the last field is the space-padded-size
+ entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64)
+ entry.sized = true
}
- switch string(data[pos : pos+6]) {
+
+ switch string(entryMode) {
case "100644":
entry.entryMode = EntryModeBlob
- pos += 12 // skip over "100644 blob "
case "100755":
entry.entryMode = EntryModeExec
- pos += 12 // skip over "100755 blob "
case "120000":
entry.entryMode = EntryModeSymlink
- pos += 12 // skip over "120000 blob "
case "160000":
entry.entryMode = EntryModeCommit
- pos += 14 // skip over "160000 object "
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
entry.entryMode = EntryModeTree
- pos += 12 // skip over "040000 tree "
default:
- return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6]))
+ return nil, fmt.Errorf("unknown type: %v", string(entryMode))
}
- if pos+40 > len(data) {
- return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
- }
- id, err := NewIDFromString(string(data[pos : pos+40]))
+ entry.ID, err = NewIDFromString(string(entryObjectID))
if err != nil {
- return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
- }
- entry.ID = id
- pos += 41 // skip over sha and trailing space
-
- end := pos + bytes.IndexByte(data[pos:], '\t')
- if end < pos {
- return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
- }
- entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
- entry.sized = true
-
- pos = end + 1
-
- end = pos + bytes.IndexByte(data[pos:], '\n')
- if end < pos {
- return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
+ return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
}
- // In case entry name is surrounded by double quotes(it happens only in git-shell).
- if data[pos] == '"' {
- entry.name, err = strconv.Unquote(string(data[pos:end]))
+ if len(entryName) > 0 && entryName[0] == '"' {
+ entry.name, err = strconv.Unquote(string(entryName))
if err != nil {
- return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
+ return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
}
} else {
- entry.name = string(data[pos:end])
+ entry.name = string(entryName)
}
- pos = end + 1
+ pos = posEnd + 1
entries = append(entries, entry)
}
return entries, nil
diff --git a/modules/git/parse_nogogit_test.go b/modules/git/parse_nogogit_test.go
index 483f96e9a7..cecd3960da 100644
--- a/modules/git/parse_nogogit_test.go
+++ b/modules/git/parse_nogogit_test.go
@@ -12,7 +12,7 @@ import (
"github.com/stretchr/testify/assert"
)
-func TestParseTreeEntries(t *testing.T) {
+func TestParseTreeEntriesLong(t *testing.T) {
testCases := []struct {
Input string
Expected []*TreeEntry
@@ -59,11 +59,47 @@ func TestParseTreeEntries(t *testing.T) {
assert.NoError(t, err)
assert.Len(t, entries, len(testCase.Expected))
for i, entry := range entries {
- assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
- assert.EqualValues(t, testCase.Expected[i].name, entry.name)
- assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
- assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
- assert.EqualValues(t, testCase.Expected[i].size, entry.size)
+ assert.EqualValues(t, testCase.Expected[i], entry)
}
}
}
+
+func TestParseTreeEntriesShort(t *testing.T) {
+ testCases := []struct {
+ Input string
+ Expected []*TreeEntry
+ }{
+ {
+ Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af README.md
+040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d assets
+`,
+ Expected: []*TreeEntry{
+ {
+ ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
+ name: "README.md",
+ entryMode: EntryModeBlob,
+ },
+ {
+ ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
+ name: "assets",
+ entryMode: EntryModeTree,
+ },
+ },
+ },
+ }
+ for _, testCase := range testCases {
+ entries, err := ParseTreeEntries([]byte(testCase.Input))
+ assert.NoError(t, err)
+ assert.Len(t, entries, len(testCase.Expected))
+ for i, entry := range entries {
+ assert.EqualValues(t, testCase.Expected[i], entry)
+ }
+ }
+}
+
+func TestParseTreeEntriesInvalid(t *testing.T) {
+ // there was a panic: "runtime error: slice bounds out of range" when the input was invalid: #20315
+ entries, err := ParseTreeEntries([]byte("100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af"))
+ assert.Error(t, err)
+ assert.Len(t, entries, 0)
+}
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go
index d237924f92..7388ef403b 100644
--- a/modules/git/repo_language_stats_nogogit.go
+++ b/modules/git/repo_language_stats_nogogit.go
@@ -57,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
tree := commit.Tree
- entries, err := tree.ListEntriesRecursive()
+ entries, err := tree.ListEntriesRecursiveWithSize()
if err != nil {
return nil, err
}
diff --git a/modules/git/tree_gogit.go b/modules/git/tree_gogit.go
index 54f8e140fb..480c5e44da 100644
--- a/modules/git/tree_gogit.go
+++ b/modules/git/tree_gogit.go
@@ -57,8 +57,8 @@ func (t *Tree) ListEntries() (Entries, error) {
return entries, nil
}
-// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
-func (t *Tree) ListEntriesRecursive() (Entries, error) {
+// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees
+func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
if t.gogitTree == nil {
err := t.loadTreeObject()
if err != nil {
@@ -92,3 +92,8 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
return entries, nil
}
+
+// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version
+func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
+ return t.ListEntriesRecursiveWithSize()
+}
diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go
index 7defb064a4..d61d19e4c2 100644
--- a/modules/git/tree_nogogit.go
+++ b/modules/git/tree_nogogit.go
@@ -99,13 +99,16 @@ func (t *Tree) ListEntries() (Entries, error) {
return t.entries, err
}
-// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
-func (t *Tree) ListEntriesRecursive() (Entries, error) {
+// listEntriesRecursive returns all entries of current tree recursively including all subtrees
+// extraArgs could be "-l" to get the size, which is slower
+func (t *Tree) listEntriesRecursive(extraArgs ...string) (Entries, error) {
if t.entriesRecursiveParsed {
return t.entriesRecursive, nil
}
- stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-l", "-r", t.ID.String()).RunStdBytes(&RunOpts{Dir: t.repo.Path})
+ args := append([]string{"ls-tree", "-t", "-r"}, extraArgs...)
+ args = append(args, t.ID.String())
+ stdout, _, runErr := NewCommand(t.repo.Ctx, args...).RunStdBytes(&RunOpts{Dir: t.repo.Path})
if runErr != nil {
return nil, runErr
}
@@ -118,3 +121,13 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
return t.entriesRecursive, err
}
+
+// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size
+func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
+ return t.listEntriesRecursive()
+}
+
+// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size
+func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
+ return t.listEntriesRecursive("--long")
+}