aboutsummaryrefslogtreecommitdiffstats
path: root/models/repo_indexer.go
diff options
context:
space:
mode:
authorEthan Koenig <ethantkoenig@gmail.com>2018-02-05 00:39:51 -0800
committerLauris BH <lauris@nix.lv>2018-02-05 10:39:51 +0200
commitb16c84de7b402c03ffda2dc9dd1ebc4f89928d0f (patch)
tree5c8e07d80e8c781751a7dc985ca4cb1ab855824d /models/repo_indexer.go
parent17655cdf1b409521262d5d54eb19884d307c47ce (diff)
downloadgitea-b16c84de7b402c03ffda2dc9dd1ebc4f89928d0f.tar.gz
gitea-b16c84de7b402c03ffda2dc9dd1ebc4f89928d0f.zip
Fix synchronization bug in repo indexer (#3455)
Diffstat (limited to 'models/repo_indexer.go')
-rw-r--r--models/repo_indexer.go162
1 files changed, 100 insertions, 62 deletions
diff --git a/models/repo_indexer.go b/models/repo_indexer.go
index be409f5162..fee4784799 100644
--- a/models/repo_indexer.go
+++ b/models/repo_indexer.go
@@ -5,9 +5,7 @@
package models
import (
- "io/ioutil"
- "os"
- "path"
+ "fmt"
"strconv"
"strings"
@@ -16,8 +14,6 @@ import (
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
-
- "github.com/Unknwon/com"
)
// RepoIndexerStatus status of a repo's entry in the repo indexer
@@ -132,7 +128,11 @@ func populateRepoIndexer(maxRepoID int64) {
}
func updateRepoIndexer(repo *Repository) error {
- changes, err := getRepoChanges(repo)
+ sha, err := getDefaultBranchSha(repo)
+ if err != nil {
+ return err
+ }
+ changes, err := getRepoChanges(repo, sha)
if err != nil {
return err
} else if changes == nil {
@@ -140,12 +140,12 @@ func updateRepoIndexer(repo *Repository) error {
}
batch := indexer.RepoIndexerBatch()
- for _, filename := range changes.UpdatedFiles {
- if err := addUpdate(filename, repo, batch); err != nil {
+ for _, update := range changes.Updates {
+ if err := addUpdate(update, repo, batch); err != nil {
return err
}
}
- for _, filename := range changes.RemovedFiles {
+ for _, filename := range changes.RemovedFilenames {
if err := addDelete(filename, repo, batch); err != nil {
return err
}
@@ -153,56 +153,61 @@ func updateRepoIndexer(repo *Repository) error {
if err = batch.Flush(); err != nil {
return err
}
- return updateLastIndexSync(repo)
+ return repo.updateIndexerStatus(sha)
}
// repoChanges changes (file additions/updates/removals) to a repo
type repoChanges struct {
- UpdatedFiles []string
- RemovedFiles []string
+ Updates []fileUpdate
+ RemovedFilenames []string
}
-// getRepoChanges returns changes to repo since last indexer update
-func getRepoChanges(repo *Repository) (*repoChanges, error) {
- repoWorkingPool.CheckIn(com.ToStr(repo.ID))
- defer repoWorkingPool.CheckOut(com.ToStr(repo.ID))
+type fileUpdate struct {
+ Filename string
+ BlobSha string
+}
- if err := repo.UpdateLocalCopyBranch(""); err != nil {
- return nil, err
- } else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) {
- // repo does not have any commits yet, so nothing to update
- return nil, nil
- } else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil {
- return nil, err
- } else if err = repo.getIndexerStatus(); err != nil {
+func getDefaultBranchSha(repo *Repository) (string, error) {
+ stdout, err := git.NewCommand("show-ref", "-s", repo.DefaultBranch).RunInDir(repo.RepoPath())
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSpace(stdout), nil
+}
+
+// getRepoChanges returns changes to repo since last indexer update
+func getRepoChanges(repo *Repository, revision string) (*repoChanges, error) {
+ if err := repo.getIndexerStatus(); err != nil {
return nil, err
}
if len(repo.IndexerStatus.CommitSha) == 0 {
- return genesisChanges(repo)
+ return genesisChanges(repo, revision)
}
- return nonGenesisChanges(repo)
+ return nonGenesisChanges(repo, revision)
}
-func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error {
- filepath := path.Join(repo.LocalCopyPath(), filename)
- if stat, err := os.Stat(filepath); err != nil {
+func addUpdate(update fileUpdate, repo *Repository, batch *indexer.Batch) error {
+ stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
+ RunInDir(repo.RepoPath())
+ if err != nil {
return err
- } else if stat.Size() > setting.Indexer.MaxIndexerFileSize {
- return nil
- } else if stat.IsDir() {
- // file could actually be a directory, if it is the root of a submodule.
- // We do not index submodule contents, so don't do anything.
+ }
+ if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
+ return fmt.Errorf("Misformatted git cat-file output: %v", err)
+ } else if int64(size) > setting.Indexer.MaxIndexerFileSize {
return nil
}
- fileContents, err := ioutil.ReadFile(filepath)
+
+ fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha).
+ RunInDirBytes(repo.RepoPath())
if err != nil {
return err
} else if !base.IsTextFile(fileContents) {
return nil
}
return batch.Add(indexer.RepoIndexerUpdate{
- Filepath: filename,
+ Filepath: update.Filename,
Op: indexer.RepoIndexerOpUpdate,
Data: &indexer.RepoIndexerData{
RepoID: repo.ID,
@@ -221,42 +226,76 @@ func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
})
}
-// genesisChanges get changes to add repo to the indexer for the first time
-func genesisChanges(repo *Repository) (*repoChanges, error) {
- var changes repoChanges
- stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath())
- if err != nil {
- return nil, err
- }
- for _, line := range strings.Split(stdout, "\n") {
- filename := strings.TrimSpace(line)
- if len(filename) == 0 {
+// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
+func parseGitLsTreeOutput(stdout string) ([]fileUpdate, error) {
+ lines := strings.Split(stdout, "\n")
+ updates := make([]fileUpdate, 0, len(lines))
+ for _, line := range lines {
+ // expect line to be "<mode> <object-type> <object-sha>\t<filename>"
+ line = strings.TrimSpace(line)
+ if len(line) == 0 {
continue
- } else if filename[0] == '"' {
+ }
+ firstSpaceIndex := strings.IndexByte(line, ' ')
+ if firstSpaceIndex < 0 {
+ log.Error(4, "Misformatted git ls-tree output: %s", line)
+ continue
+ }
+ tabIndex := strings.IndexByte(line, '\t')
+ if tabIndex < 42+firstSpaceIndex || tabIndex == len(line)-1 {
+ log.Error(4, "Misformatted git ls-tree output: %s", line)
+ continue
+ }
+ if objectType := line[firstSpaceIndex+1 : tabIndex-41]; objectType != "blob" {
+ // submodules appear as commit objects, we do not index submodules
+ continue
+ }
+
+ blobSha := line[tabIndex-40 : tabIndex]
+ filename := line[tabIndex+1:]
+ if filename[0] == '"' {
+ var err error
filename, err = strconv.Unquote(filename)
if err != nil {
return nil, err
}
}
- changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
+ updates = append(updates, fileUpdate{
+ Filename: filename,
+ BlobSha: blobSha,
+ })
+ }
+ return updates, nil
+}
+
+// genesisChanges get changes to add repo to the indexer for the first time
+func genesisChanges(repo *Repository, revision string) (*repoChanges, error) {
+ var changes repoChanges
+ stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision).
+ RunInDir(repo.RepoPath())
+ if err != nil {
+ return nil, err
}
- return &changes, nil
+ changes.Updates, err = parseGitLsTreeOutput(stdout)
+ return &changes, err
}
// nonGenesisChanges get changes since the previous indexer update
-func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
+func nonGenesisChanges(repo *Repository, revision string) (*repoChanges, error) {
diffCmd := git.NewCommand("diff", "--name-status",
- repo.IndexerStatus.CommitSha, "HEAD")
- stdout, err := diffCmd.RunInDir(repo.LocalCopyPath())
+ repo.IndexerStatus.CommitSha, revision)
+ stdout, err := diffCmd.RunInDir(repo.RepoPath())
if err != nil {
// previous commit sha may have been removed by a force push, so
// try rebuilding from scratch
+ log.Warn("git diff: %v", err)
if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil {
return nil, err
}
- return genesisChanges(repo)
+ return genesisChanges(repo, revision)
}
var changes repoChanges
+ updatedFilenames := make([]string, 0, 10)
for _, line := range strings.Split(stdout, "\n") {
line = strings.TrimSpace(line)
if len(line) == 0 {
@@ -274,23 +313,22 @@ func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
switch status := line[0]; status {
case 'M', 'A':
- changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
+ updatedFilenames = append(updatedFilenames, filename)
case 'D':
- changes.RemovedFiles = append(changes.RemovedFiles, filename)
+ changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
default:
log.Warn("Unrecognized status: %c (line=%s)", status, line)
}
}
- return &changes, nil
-}
-func updateLastIndexSync(repo *Repository) error {
- stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath())
+ cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--")
+ cmd.AddArguments(updatedFilenames...)
+ stdout, err = cmd.RunInDir(repo.RepoPath())
if err != nil {
- return err
+ return nil, err
}
- sha := strings.TrimSpace(stdout)
- return repo.updateIndexerStatus(sha)
+ changes.Updates, err = parseGitLsTreeOutput(stdout)
+ return &changes, err
}
func processRepoIndexerOperationQueue() {