]> source.dussan.org Git - gitea.git/commitdiff
Reduce calls to git cat-file -s (#14682)
authorzeripath <art27@cantab.net>
Wed, 17 Feb 2021 21:32:25 +0000 (21:32 +0000)
committerGitHub <noreply@github.com>
Wed, 17 Feb 2021 21:32:25 +0000 (22:32 +0100)
* Reduce calls to git cat-file -s

There are multiple places where there are repeated calls to git cat-file
-s due to the blobs not being created with their size.

Through judicious use of git ls-tree -l and slight adjustments to the
indexer code we can avoid a lot of these calls.

* simplify by always expecting the long format

* Also always set the sized field and tell the indexer the update is sized

modules/git/parse_gogit.go
modules/git/parse_gogit_test.go
modules/git/parse_nogogit.go
modules/git/parse_nogogit_test.go [new file with mode: 0644]
modules/git/tree_entry_nogogit.go
modules/git/tree_nogogit.go
modules/indexer/code/bleve.go
modules/indexer/code/elastic_search.go
modules/indexer/code/git.go

index 434fb4160f75b19b517ad3316bb0a37464142f23..a50ebec3dd72be3b4235754bc5977444451ba841 100644 (file)
@@ -10,12 +10,13 @@ import (
        "bytes"
        "fmt"
        "strconv"
+       "strings"
 
        "github.com/go-git/go-git/v5/plumbing/filemode"
        "github.com/go-git/go-git/v5/plumbing/object"
 )
 
-// ParseTreeEntries parses the output of a `git ls-tree` command.
+// ParseTreeEntries parses the output of a `git ls-tree -l` command.
 func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
        return parseTreeEntries(data, nil)
 }
@@ -23,7 +24,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
 func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
        entries := make([]*TreeEntry, 0, 10)
        for pos := 0; pos < len(data); {
-               // expect line to be of the form "<mode> <type> <sha>\t<filename>"
+               // expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
                entry := new(TreeEntry)
                entry.gogitTreeEntry = &object.TreeEntry{}
                entry.ptree = ptree
@@ -61,7 +62,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
                entry.gogitTreeEntry.Hash = id
                pos += 41 // skip over sha and trailing space
 
-               end := pos + bytes.IndexByte(data[pos:], '\n')
+               end := pos + bytes.IndexByte(data[pos:], '\t')
+               if end < pos {
+                       return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
+               }
+               entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
+               entry.sized = true
+
+               pos = end + 1
+
+               end = pos + bytes.IndexByte(data[pos:], '\n')
                if end < pos {
                        return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
                }
index cf38c29932f5aabc6bc8e795467b41d2d3302ef6..c6374133c0951f0f1b4ceacee8caf354e2eefdc2 100644 (file)
@@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) {
                        Expected: []*TreeEntry{},
                },
                {
-                       Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n",
+                       Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c    1022\texample/file2.txt\n",
                        Expected: []*TreeEntry{
                                {
                                        ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
@@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) {
                                                Name: "example/file2.txt",
                                                Mode: filemode.Regular,
                                        },
+                                       size:  1022,
+                                       sized: true,
                                },
                        },
                },
                {
-                       Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" +
-                               "040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n",
+                       Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c  234131\t\"example/\\n.txt\"\n" +
+                               "040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8       -\texample\n",
                        Expected: []*TreeEntry{
                                {
                                        ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
@@ -47,9 +49,12 @@ func TestParseTreeEntries(t *testing.T) {
                                                Name: "example/\n.txt",
                                                Mode: filemode.Symlink,
                                        },
+                                       size:  234131,
+                                       sized: true,
                                },
                                {
-                                       ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
+                                       ID:    MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
+                                       sized: true,
                                        gogitTreeEntry: &object.TreeEntry{
                                                Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
                                                Name: "example",
index 26dd700af732a72e7963291bb88f7eb9330d0efd..e9e93f66fdc1128c1d83e1a5c5530fbd4745f575 100644 (file)
@@ -10,9 +10,10 @@ import (
        "bytes"
        "fmt"
        "strconv"
+       "strings"
 )
 
-// ParseTreeEntries parses the output of a `git ls-tree` command.
+// ParseTreeEntries parses the output of a `git ls-tree -l` command.
 func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
        return parseTreeEntries(data, nil)
 }
@@ -20,7 +21,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
 func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
        entries := make([]*TreeEntry, 0, 10)
        for pos := 0; pos < len(data); {
-               // expect line to be of the form "<mode> <type> <sha>\t<filename>"
+               // expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
                entry := new(TreeEntry)
                entry.ptree = ptree
                if pos+6 > len(data) {
@@ -56,7 +57,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
                entry.ID = id
                pos += 41 // skip over sha and trailing space
 
-               end := pos + bytes.IndexByte(data[pos:], '\n')
+               end := pos + bytes.IndexByte(data[pos:], '\t')
+               if end < pos {
+                       return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
+               }
+               entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
+               entry.sized = true
+
+               pos = end + 1
+
+               end = pos + bytes.IndexByte(data[pos:], '\n')
                if end < pos {
                        return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
                }
diff --git a/modules/git/parse_nogogit_test.go b/modules/git/parse_nogogit_test.go
new file mode 100644 (file)
index 0000000..a9e7dcc
--- /dev/null
@@ -0,0 +1,70 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+// +build !gogit
+
+package git
+
+import (
+       "testing"
+
+       "github.com/stretchr/testify/assert"
+)
+
+func TestParseTreeEntries(t *testing.T) {
+
+       testCases := []struct {
+               Input    string
+               Expected []*TreeEntry
+       }{
+               {
+                       Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af    8218    README.md
+100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35    4681   README_ZH.md
+100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8     429   SECURITY.md
+040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d       -   assets
+`,
+                       Expected: []*TreeEntry{
+                               {
+                                       ID:        MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
+                                       name:      "README.md",
+                                       entryMode: EntryModeBlob,
+                                       size:      8218,
+                                       sized:     true,
+                               },
+                               {
+                                       ID:        MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"),
+                                       name:      "README_ZH.md",
+                                       entryMode: EntryModeBlob,
+                                       size:      4681,
+                                       sized:     true,
+                               },
+                               {
+                                       ID:        MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"),
+                                       name:      "SECURITY.md",
+                                       entryMode: EntryModeBlob,
+                                       size:      429,
+                                       sized:     true,
+                               },
+                               {
+                                       ID:        MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
+                                       name:      "assets",
+                                       entryMode: EntryModeTree,
+                                       sized:     true,
+                               },
+                       },
+               },
+       }
+       for _, testCase := range testCases {
+               entries, err := ParseTreeEntries([]byte(testCase.Input))
+               assert.NoError(t, err)
+               assert.EqualValues(t, len(testCase.Expected), len(entries))
+               for i, entry := range entries {
+                       assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
+                       assert.EqualValues(t, testCase.Expected[i].name, entry.name)
+                       assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
+                       assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
+                       assert.EqualValues(t, testCase.Expected[i].size, entry.size)
+               }
+       }
+}
index f18daee7788f83ad68bd3c9fce71dae10277adee..fd60de36f528dc8b74e91a8b21a160fb32fbf279 100644 (file)
@@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob {
                ID:       te.ID,
                repoPath: te.ptree.repo.Path,
                name:     te.Name(),
+               size:     te.size,
+               gotSize:  te.sized,
        }
 }
index e78115b77765ff3d76b294c3e55ab7fb7384b689..3ebdf10631dbcd3360ce5980602e8d23748207e6 100644 (file)
@@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) {
                return t.entries, nil
        }
 
-       stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path)
+       stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path)
        if err != nil {
                if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") {
                        return nil, ErrNotExist{
@@ -55,7 +55,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
        if t.entriesRecursiveParsed {
                return t.entriesRecursive, nil
        }
-       stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
+       stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
        if err != nil {
                return nil, err
        }
index 826efde4c1da5b2a021d3f67d8e3094fc8ed1332..1ebc74c43aa5f44efcc210d705044fc159dda196 100644 (file)
@@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode
                return nil
        }
 
-       stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
-               RunInDir(repo.RepoPath())
-       if err != nil {
-               return err
+       size := update.Size
+
+       if !update.Sized {
+               stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
+                       RunInDir(repo.RepoPath())
+               if err != nil {
+                       return err
+               }
+               if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
+                       return fmt.Errorf("Misformatted git cat-file output: %v", err)
+               }
        }
-       if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
-               return fmt.Errorf("Misformatted git cat-file output: %v", err)
-       } else if int64(size) > setting.Indexer.MaxIndexerFileSize {
+
+       if size > setting.Indexer.MaxIndexerFileSize {
                return b.addDelete(update.Filename, repo, batch)
        }
 
index f81dbb34d42be57cdf64dd275aab00b8f2954d73..c9d604b694b898522e8548f557c8b0c26acac773 100644 (file)
@@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
                return nil, nil
        }
 
-       stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
-               RunInDir(repo.RepoPath())
-       if err != nil {
-               return nil, err
+       size := update.Size
+
+       if !update.Sized {
+               stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
+                       RunInDir(repo.RepoPath())
+               if err != nil {
+                       return nil, err
+               }
+               if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
+                       return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
+               }
        }
-       if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
-               return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
-       } else if int64(size) > setting.Indexer.MaxIndexerFileSize {
+
+       if size > setting.Indexer.MaxIndexerFileSize {
                return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil
        }
 
index 37ab5ac3d32625ca03031e81733328698ea4dd56..919d7854062f4ca268e1b847bc39dc22e2020d98 100644 (file)
@@ -17,6 +17,8 @@ import (
 type fileUpdate struct {
        Filename string
        BlobSha  string
+       Size     int64
+       Sized    bool
 }
 
 // repoChanges changes (file additions/updates/removals) to a repo
@@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
                        updates[idxCount] = fileUpdate{
                                Filename: entry.Name(),
                                BlobSha:  entry.ID.String(),
+                               Size:     entry.Size(),
+                               Sized:    true,
                        }
                        idxCount++
                }
@@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
 // genesisChanges get changes to add repo to the indexer for the first time
 func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
        var changes repoChanges
-       stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision).
+       stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision).
                RunInDirBytes(repo.RepoPath())
        if err != nil {
                return nil, err
@@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges,
                }
        }
 
-       cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--")
+       cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--")
        cmd.AddArguments(updatedFilenames...)
        lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath())
        if err != nil {