diff options
author | Ethan Koenig <ethantkoenig@gmail.com> | 2017-10-26 23:10:54 -0700 |
---|---|---|
committer | Lauris BH <lauris@nix.lv> | 2017-10-27 09:10:54 +0300 |
commit | 5866eb23217de4d29b181e30c26cee28ebc6aedc (patch) | |
tree | f8f67462544c709e8dd6988ca4d55a22cfc3a22c /models | |
parent | 762f1d7237de5727815ebda9593f7f9a20a5a077 (diff) | |
download | gitea-5866eb23217de4d29b181e30c26cee28ebc6aedc.tar.gz gitea-5866eb23217de4d29b181e30c26cee28ebc6aedc.zip |
Code/repo search (#2582)
Indexed search of repository contents (for default branch only)
Diffstat (limited to 'models')
-rw-r--r-- | models/fixtures/repo_indexer_status.yml | 1 | ||||
-rw-r--r-- | models/migrations/migrations.go | 2 | ||||
-rw-r--r-- | models/migrations/v48.go | 25 | ||||
-rw-r--r-- | models/models.go | 24 | ||||
-rw-r--r-- | models/repo.go | 17 | ||||
-rw-r--r-- | models/repo_editor.go | 2 | ||||
-rw-r--r-- | models/repo_indexer.go | 302 | ||||
-rw-r--r-- | models/update.go | 4 |
8 files changed, 367 insertions, 10 deletions
diff --git a/models/fixtures/repo_indexer_status.yml b/models/fixtures/repo_indexer_status.yml new file mode 100644 index 0000000000..ca780a73aa --- /dev/null +++ b/models/fixtures/repo_indexer_status.yml @@ -0,0 +1 @@ +[] # empty diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go index f1cb1a67ed..ba27568fd4 100644 --- a/models/migrations/migrations.go +++ b/models/migrations/migrations.go @@ -144,6 +144,8 @@ var migrations = []Migration{ NewMigration("remove organization watch repositories", removeOrganizationWatchRepo), // v47 -> v48 NewMigration("add deleted branches", addDeletedBranch), + // v48 -> v49 + NewMigration("add repo indexer status", addRepoIndexerStatus), } // Migrate database to current version diff --git a/models/migrations/v48.go b/models/migrations/v48.go new file mode 100644 index 0000000000..6cea66b5ac --- /dev/null +++ b/models/migrations/v48.go @@ -0,0 +1,25 @@ +// Copyright 2017 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package migrations + +import ( + "fmt" + + "github.com/go-xorm/xorm" +) + +func addRepoIndexerStatus(x *xorm.Engine) error { + // RepoIndexerStatus see models/repo_indexer.go + type RepoIndexerStatus struct { + ID int64 `xorm:"pk autoincr"` + RepoID int64 `xorm:"INDEX NOT NULL"` + CommitSha string `xorm:"VARCHAR(40)"` + } + + if err := x.Sync2(new(RepoIndexerStatus)); err != nil { + return fmt.Errorf("Sync2: %v", err) + } + return nil +} diff --git a/models/models.go b/models/models.go index 66a7d97872..853b9799ed 100644 --- a/models/models.go +++ b/models/models.go @@ -13,6 +13,10 @@ import ( "path" "strings" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" + // Needed for the MySQL driver _ "github.com/go-sql-driver/mysql" "github.com/go-xorm/core" @@ -23,9 +27,6 @@ import ( // Needed for the MSSSQL driver _ "github.com/denisenkom/go-mssqldb" - - "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/setting" ) // Engine represents a xorm engine or session. @@ -115,6 +116,7 @@ func init() { new(Stopwatch), new(TrackedTime), new(DeletedBranch), + new(RepoIndexerStatus), ) gonicNames := []string{"SSL", "UID"} @@ -150,8 +152,13 @@ func LoadConfigs() { DbCfg.Timeout = sec.Key("SQLITE_TIMEOUT").MustInt(500) sec = setting.Cfg.Section("indexer") - setting.Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve") + setting.Indexer.IssuePath = absolutePath( + sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve")) + setting.Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false) + setting.Indexer.RepoPath = absolutePath( + sec.Key("REPO_INDEXER_PATH").MustString("indexers/repos.bleve")) setting.Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) + setting.Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(512 * 1024 * 1024) } // parsePostgreSQLHostPort parses given input in various forms defined in @@ -336,3 +343,12 @@ func DumpDatabase(filePath string, dbType string) error { } return x.DumpTablesToFile(tbs, filePath) } + +// absolutePath make path absolute if it is relative +func absolutePath(path string) string { + workDir, err := setting.WorkDir() + if err != nil { + log.Fatal(4, "Failed to get work directory: %v", err) + } + return util.EnsureAbsolutePath(path, workDir) +} diff --git a/models/repo.go b/models/repo.go index eca71568ee..8d57ae51a5 100644 --- a/models/repo.go +++ b/models/repo.go @@ -205,10 +205,11 @@ type Repository struct { ExternalMetas map[string]string `xorm:"-"` Units []*RepoUnit `xorm:"-"` - IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"` - ForkID int64 `xorm:"INDEX"` - BaseRepo *Repository `xorm:"-"` - Size int64 `xorm:"NOT NULL DEFAULT 0"` + IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"` + ForkID int64 `xorm:"INDEX"` + BaseRepo *Repository `xorm:"-"` + Size int64 `xorm:"NOT NULL DEFAULT 0"` + IndexerStatus *RepoIndexerStatus `xorm:"-"` Created time.Time `xorm:"-"` CreatedUnix int64 `xorm:"INDEX created"` @@ -782,8 +783,10 @@ func UpdateLocalCopyBranch(repoPath, localPath, branch string) error { if err != nil { return fmt.Errorf("git fetch origin: %v", err) } - if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil { - return fmt.Errorf("git reset --hard origin/%s: %v", branch, err) + if len(branch) > 0 { + if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil { + return fmt.Errorf("git reset --hard origin/%s: %v", branch, err) + } } } return nil @@ -989,6 +992,7 @@ func MigrateRepository(doer, u *User, opts MigrateRepoOptions) (*Repository, err if err = SyncReleasesWithTags(repo, gitRepo); err != nil { log.Error(4, "Failed to synchronize tags to releases for repository: %v", err) } + UpdateRepoIndexer(repo) } if err = repo.UpdateSize(); err != nil { @@ -1883,6 +1887,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error { go HookQueue.Add(repo.ID) } + DeleteRepoFromIndexer(repo) return nil } diff --git a/models/repo_editor.go b/models/repo_editor.go index 692fe8c51b..1adaa2c955 100644 --- a/models/repo_editor.go +++ b/models/repo_editor.go @@ -178,6 +178,8 @@ func (repo *Repository) UpdateRepoFile(doer *User, opts UpdateRepoFileOptions) ( if err != nil { return fmt.Errorf("PushUpdate: %v", err) } + UpdateRepoIndexer(repo) + return nil } diff --git a/models/repo_indexer.go b/models/repo_indexer.go new file mode 100644 index 0000000000..a6b049e081 --- /dev/null +++ b/models/repo_indexer.go @@ -0,0 +1,302 @@ +// Copyright 2017 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package models + +import ( + "io/ioutil" + "os" + "path" + "strconv" + "strings" + + "code.gitea.io/git" + "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/indexer" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + + "github.com/Unknwon/com" +) + +// RepoIndexerStatus status of a repo's entry in the repo indexer +// For now, implicitly refers to default branch +type RepoIndexerStatus struct { + ID int64 `xorm:"pk autoincr"` + RepoID int64 `xorm:"INDEX"` + CommitSha string `xorm:"VARCHAR(40)"` +} + +func (repo *Repository) getIndexerStatus() error { + if repo.IndexerStatus != nil { + return nil + } + status := &RepoIndexerStatus{RepoID: repo.ID} + has, err := x.Get(status) + if err != nil { + return err + } else if !has { + status.CommitSha = "" + } + repo.IndexerStatus = status + return nil +} + +func (repo *Repository) updateIndexerStatus(sha string) error { + if err := repo.getIndexerStatus(); err != nil { + return err + } + if len(repo.IndexerStatus.CommitSha) == 0 { + repo.IndexerStatus.CommitSha = sha + _, err := x.Insert(repo.IndexerStatus) + return err + } + repo.IndexerStatus.CommitSha = sha + _, err := x.ID(repo.IndexerStatus.ID).Cols("commit_sha"). + Update(repo.IndexerStatus) + return err +} + +type repoIndexerOperation struct { + repo *Repository + deleted bool +} + +var repoIndexerOperationQueue chan repoIndexerOperation + +// InitRepoIndexer initialize the repo indexer +func InitRepoIndexer() { + if !setting.Indexer.RepoIndexerEnabled { + return + } + indexer.InitRepoIndexer(populateRepoIndexer) + repoIndexerOperationQueue = make(chan repoIndexerOperation, setting.Indexer.UpdateQueueLength) + go processRepoIndexerOperationQueue() +} + +// populateRepoIndexer populate the repo indexer with data +func populateRepoIndexer() error { + log.Info("Populating repository indexer (this may take a while)") + for page := 1; ; page++ { + repos, _, err := SearchRepositoryByName(&SearchRepoOptions{ + Page: page, + PageSize: 10, + OrderBy: SearchOrderByID, + Private: true, + }) + if err != nil { + return err + } else if len(repos) == 0 { + return nil + } + for _, repo := range repos { + if err = updateRepoIndexer(repo); err != nil { + // only log error, since this should not prevent + // gitea from starting up + log.Error(4, "updateRepoIndexer: repoID=%d, %v", repo.ID, err) + } + } + } +} + +type updateBatch struct { + updates []indexer.RepoIndexerUpdate +} + +func updateRepoIndexer(repo *Repository) error { + changes, err := getRepoChanges(repo) + if err != nil { + return err + } else if changes == nil { + return nil + } + + batch := indexer.RepoIndexerBatch() + for _, filename := range changes.UpdatedFiles { + if err := addUpdate(filename, repo, batch); err != nil { + return err + } + } + for _, filename := range changes.RemovedFiles { + if err := addDelete(filename, repo, batch); err != nil { + return err + } + } + if err = batch.Flush(); err != nil { + return err + } + return updateLastIndexSync(repo) +} + +// repoChanges changes (file additions/updates/removals) to a repo +type repoChanges struct { + UpdatedFiles []string + RemovedFiles []string +} + +// getRepoChanges returns changes to repo since last indexer update +func getRepoChanges(repo *Repository) (*repoChanges, error) { + repoWorkingPool.CheckIn(com.ToStr(repo.ID)) + defer repoWorkingPool.CheckOut(com.ToStr(repo.ID)) + + if err := repo.UpdateLocalCopyBranch(""); err != nil { + return nil, err + } else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) { + // repo does not have any commits yet, so nothing to update + return nil, nil + } else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil { + return nil, err + } else if err = repo.getIndexerStatus(); err != nil { + return nil, err + } + + if len(repo.IndexerStatus.CommitSha) == 0 { + return genesisChanges(repo) + } + return nonGenesisChanges(repo) +} + +func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error { + filepath := path.Join(repo.LocalCopyPath(), filename) + if stat, err := os.Stat(filepath); err != nil { + return err + } else if stat.Size() > setting.Indexer.MaxIndexerFileSize { + return nil + } + fileContents, err := ioutil.ReadFile(filepath) + if err != nil { + return err + } else if !base.IsTextFile(fileContents) { + return nil + } + return batch.Add(indexer.RepoIndexerUpdate{ + Filepath: filename, + Op: indexer.RepoIndexerOpUpdate, + Data: &indexer.RepoIndexerData{ + RepoID: repo.ID, + Content: string(fileContents), + }, + }) +} + +func addDelete(filename string, repo *Repository, batch *indexer.Batch) error { + return batch.Add(indexer.RepoIndexerUpdate{ + Filepath: filename, + Op: indexer.RepoIndexerOpDelete, + Data: &indexer.RepoIndexerData{ + RepoID: repo.ID, + }, + }) +} + +// genesisChanges get changes to add repo to the indexer for the first time +func genesisChanges(repo *Repository) (*repoChanges, error) { + var changes repoChanges + stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath()) + if err != nil { + return nil, err + } + for _, line := range strings.Split(stdout, "\n") { + filename := strings.TrimSpace(line) + if len(filename) == 0 { + continue + } else if filename[0] == '"' { + filename, err = strconv.Unquote(filename) + if err != nil { + return nil, err + } + } + changes.UpdatedFiles = append(changes.UpdatedFiles, filename) + } + return &changes, nil +} + +// nonGenesisChanges get changes since the previous indexer update +func nonGenesisChanges(repo *Repository) (*repoChanges, error) { + diffCmd := git.NewCommand("diff", "--name-status", + repo.IndexerStatus.CommitSha, "HEAD") + stdout, err := diffCmd.RunInDir(repo.LocalCopyPath()) + if err != nil { + // previous commit sha may have been removed by a force push, so + // try rebuilding from scratch + if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil { + return nil, err + } + return genesisChanges(repo) + } + var changes repoChanges + for _, line := range strings.Split(stdout, "\n") { + line = strings.TrimSpace(line) + if len(line) == 0 { + continue + } + filename := strings.TrimSpace(line[1:]) + if len(filename) == 0 { + continue + } else if filename[0] == '"' { + filename, err = strconv.Unquote(filename) + if err != nil { + return nil, err + } + } + + switch status := line[0]; status { + case 'M', 'A': + changes.UpdatedFiles = append(changes.UpdatedFiles, filename) + case 'D': + changes.RemovedFiles = append(changes.RemovedFiles, filename) + default: + log.Warn("Unrecognized status: %c (line=%s)", status, line) + } + } + return &changes, nil +} + +func updateLastIndexSync(repo *Repository) error { + stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath()) + if err != nil { + return err + } + sha := strings.TrimSpace(stdout) + return repo.updateIndexerStatus(sha) +} + +func processRepoIndexerOperationQueue() { + for { + op := <-repoIndexerOperationQueue + if op.deleted { + if err := indexer.DeleteRepoFromIndexer(op.repo.ID); err != nil { + log.Error(4, "DeleteRepoFromIndexer: %v", err) + } + } else { + if err := updateRepoIndexer(op.repo); err != nil { + log.Error(4, "updateRepoIndexer: %v", err) + } + } + } +} + +// DeleteRepoFromIndexer remove all of a repository's entries from the indexer +func DeleteRepoFromIndexer(repo *Repository) { + addOperationToQueue(repoIndexerOperation{repo: repo, deleted: true}) +} + +// UpdateRepoIndexer update a repository's entries in the indexer +func UpdateRepoIndexer(repo *Repository) { + addOperationToQueue(repoIndexerOperation{repo: repo, deleted: false}) +} + +func addOperationToQueue(op repoIndexerOperation) { + if !setting.Indexer.RepoIndexerEnabled { + return + } + select { + case repoIndexerOperationQueue <- op: + break + default: + go func() { + repoIndexerOperationQueue <- op + }() + } +} diff --git a/models/update.go b/models/update.go index f3bd6cce13..f91559d9ec 100644 --- a/models/update.go +++ b/models/update.go @@ -263,6 +263,10 @@ func pushUpdate(opts PushUpdateOptions) (repo *Repository, err error) { commits = ListToPushCommits(l) } + if opts.RefFullName == git.BranchPrefix+repo.DefaultBranch { + UpdateRepoIndexer(repo) + } + if err := CommitRepoAction(CommitRepoActionOptions{ PusherName: opts.PusherName, RepoOwnerID: owner.ID, |