summaryrefslogtreecommitdiffstats
path: root/models
diff options
context:
space:
mode:
authorEthan Koenig <ethantkoenig@gmail.com>2017-10-26 23:10:54 -0700
committerLauris BH <lauris@nix.lv>2017-10-27 09:10:54 +0300
commit5866eb23217de4d29b181e30c26cee28ebc6aedc (patch)
treef8f67462544c709e8dd6988ca4d55a22cfc3a22c /models
parent762f1d7237de5727815ebda9593f7f9a20a5a077 (diff)
downloadgitea-5866eb23217de4d29b181e30c26cee28ebc6aedc.tar.gz
gitea-5866eb23217de4d29b181e30c26cee28ebc6aedc.zip
Code/repo search (#2582)
Indexed search of repository contents (for default branch only)
Diffstat (limited to 'models')
-rw-r--r--models/fixtures/repo_indexer_status.yml1
-rw-r--r--models/migrations/migrations.go2
-rw-r--r--models/migrations/v48.go25
-rw-r--r--models/models.go24
-rw-r--r--models/repo.go17
-rw-r--r--models/repo_editor.go2
-rw-r--r--models/repo_indexer.go302
-rw-r--r--models/update.go4
8 files changed, 367 insertions, 10 deletions
diff --git a/models/fixtures/repo_indexer_status.yml b/models/fixtures/repo_indexer_status.yml
new file mode 100644
index 0000000000..ca780a73aa
--- /dev/null
+++ b/models/fixtures/repo_indexer_status.yml
@@ -0,0 +1 @@
+[] # empty
diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go
index f1cb1a67ed..ba27568fd4 100644
--- a/models/migrations/migrations.go
+++ b/models/migrations/migrations.go
@@ -144,6 +144,8 @@ var migrations = []Migration{
NewMigration("remove organization watch repositories", removeOrganizationWatchRepo),
// v47 -> v48
NewMigration("add deleted branches", addDeletedBranch),
+ // v48 -> v49
+ NewMigration("add repo indexer status", addRepoIndexerStatus),
}
// Migrate database to current version
diff --git a/models/migrations/v48.go b/models/migrations/v48.go
new file mode 100644
index 0000000000..6cea66b5ac
--- /dev/null
+++ b/models/migrations/v48.go
@@ -0,0 +1,25 @@
+// Copyright 2017 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package migrations
+
+import (
+ "fmt"
+
+ "github.com/go-xorm/xorm"
+)
+
+func addRepoIndexerStatus(x *xorm.Engine) error {
+ // RepoIndexerStatus see models/repo_indexer.go
+ type RepoIndexerStatus struct {
+ ID int64 `xorm:"pk autoincr"`
+ RepoID int64 `xorm:"INDEX NOT NULL"`
+ CommitSha string `xorm:"VARCHAR(40)"`
+ }
+
+ if err := x.Sync2(new(RepoIndexerStatus)); err != nil {
+ return fmt.Errorf("Sync2: %v", err)
+ }
+ return nil
+}
diff --git a/models/models.go b/models/models.go
index 66a7d97872..853b9799ed 100644
--- a/models/models.go
+++ b/models/models.go
@@ -13,6 +13,10 @@ import (
"path"
"strings"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/util"
+
// Needed for the MySQL driver
_ "github.com/go-sql-driver/mysql"
"github.com/go-xorm/core"
@@ -23,9 +27,6 @@ import (
// Needed for the MSSSQL driver
_ "github.com/denisenkom/go-mssqldb"
-
- "code.gitea.io/gitea/modules/log"
- "code.gitea.io/gitea/modules/setting"
)
// Engine represents a xorm engine or session.
@@ -115,6 +116,7 @@ func init() {
new(Stopwatch),
new(TrackedTime),
new(DeletedBranch),
+ new(RepoIndexerStatus),
)
gonicNames := []string{"SSL", "UID"}
@@ -150,8 +152,13 @@ func LoadConfigs() {
DbCfg.Timeout = sec.Key("SQLITE_TIMEOUT").MustInt(500)
sec = setting.Cfg.Section("indexer")
- setting.Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve")
+ setting.Indexer.IssuePath = absolutePath(
+ sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve"))
+ setting.Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false)
+ setting.Indexer.RepoPath = absolutePath(
+ sec.Key("REPO_INDEXER_PATH").MustString("indexers/repos.bleve"))
setting.Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
+ setting.Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(512 * 1024 * 1024)
}
// parsePostgreSQLHostPort parses given input in various forms defined in
@@ -336,3 +343,12 @@ func DumpDatabase(filePath string, dbType string) error {
}
return x.DumpTablesToFile(tbs, filePath)
}
+
+// absolutePath make path absolute if it is relative
+func absolutePath(path string) string {
+ workDir, err := setting.WorkDir()
+ if err != nil {
+ log.Fatal(4, "Failed to get work directory: %v", err)
+ }
+ return util.EnsureAbsolutePath(path, workDir)
+}
diff --git a/models/repo.go b/models/repo.go
index eca71568ee..8d57ae51a5 100644
--- a/models/repo.go
+++ b/models/repo.go
@@ -205,10 +205,11 @@ type Repository struct {
ExternalMetas map[string]string `xorm:"-"`
Units []*RepoUnit `xorm:"-"`
- IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
- ForkID int64 `xorm:"INDEX"`
- BaseRepo *Repository `xorm:"-"`
- Size int64 `xorm:"NOT NULL DEFAULT 0"`
+ IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
+ ForkID int64 `xorm:"INDEX"`
+ BaseRepo *Repository `xorm:"-"`
+ Size int64 `xorm:"NOT NULL DEFAULT 0"`
+ IndexerStatus *RepoIndexerStatus `xorm:"-"`
Created time.Time `xorm:"-"`
CreatedUnix int64 `xorm:"INDEX created"`
@@ -782,8 +783,10 @@ func UpdateLocalCopyBranch(repoPath, localPath, branch string) error {
if err != nil {
return fmt.Errorf("git fetch origin: %v", err)
}
- if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
- return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
+ if len(branch) > 0 {
+ if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
+ return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
+ }
}
}
return nil
@@ -989,6 +992,7 @@ func MigrateRepository(doer, u *User, opts MigrateRepoOptions) (*Repository, err
if err = SyncReleasesWithTags(repo, gitRepo); err != nil {
log.Error(4, "Failed to synchronize tags to releases for repository: %v", err)
}
+ UpdateRepoIndexer(repo)
}
if err = repo.UpdateSize(); err != nil {
@@ -1883,6 +1887,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error {
go HookQueue.Add(repo.ID)
}
+ DeleteRepoFromIndexer(repo)
return nil
}
diff --git a/models/repo_editor.go b/models/repo_editor.go
index 692fe8c51b..1adaa2c955 100644
--- a/models/repo_editor.go
+++ b/models/repo_editor.go
@@ -178,6 +178,8 @@ func (repo *Repository) UpdateRepoFile(doer *User, opts UpdateRepoFileOptions) (
if err != nil {
return fmt.Errorf("PushUpdate: %v", err)
}
+ UpdateRepoIndexer(repo)
+
return nil
}
diff --git a/models/repo_indexer.go b/models/repo_indexer.go
new file mode 100644
index 0000000000..a6b049e081
--- /dev/null
+++ b/models/repo_indexer.go
@@ -0,0 +1,302 @@
+// Copyright 2017 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package models
+
+import (
+ "io/ioutil"
+ "os"
+ "path"
+ "strconv"
+ "strings"
+
+ "code.gitea.io/git"
+ "code.gitea.io/gitea/modules/base"
+ "code.gitea.io/gitea/modules/indexer"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
+
+ "github.com/Unknwon/com"
+)
+
+// RepoIndexerStatus status of a repo's entry in the repo indexer
+// For now, implicitly refers to default branch
+type RepoIndexerStatus struct {
+ ID int64 `xorm:"pk autoincr"`
+ RepoID int64 `xorm:"INDEX"`
+ CommitSha string `xorm:"VARCHAR(40)"`
+}
+
+func (repo *Repository) getIndexerStatus() error {
+ if repo.IndexerStatus != nil {
+ return nil
+ }
+ status := &RepoIndexerStatus{RepoID: repo.ID}
+ has, err := x.Get(status)
+ if err != nil {
+ return err
+ } else if !has {
+ status.CommitSha = ""
+ }
+ repo.IndexerStatus = status
+ return nil
+}
+
+func (repo *Repository) updateIndexerStatus(sha string) error {
+ if err := repo.getIndexerStatus(); err != nil {
+ return err
+ }
+ if len(repo.IndexerStatus.CommitSha) == 0 {
+ repo.IndexerStatus.CommitSha = sha
+ _, err := x.Insert(repo.IndexerStatus)
+ return err
+ }
+ repo.IndexerStatus.CommitSha = sha
+ _, err := x.ID(repo.IndexerStatus.ID).Cols("commit_sha").
+ Update(repo.IndexerStatus)
+ return err
+}
+
+type repoIndexerOperation struct {
+ repo *Repository
+ deleted bool
+}
+
+var repoIndexerOperationQueue chan repoIndexerOperation
+
+// InitRepoIndexer initialize the repo indexer
+func InitRepoIndexer() {
+ if !setting.Indexer.RepoIndexerEnabled {
+ return
+ }
+ indexer.InitRepoIndexer(populateRepoIndexer)
+ repoIndexerOperationQueue = make(chan repoIndexerOperation, setting.Indexer.UpdateQueueLength)
+ go processRepoIndexerOperationQueue()
+}
+
+// populateRepoIndexer populate the repo indexer with data
+func populateRepoIndexer() error {
+ log.Info("Populating repository indexer (this may take a while)")
+ for page := 1; ; page++ {
+ repos, _, err := SearchRepositoryByName(&SearchRepoOptions{
+ Page: page,
+ PageSize: 10,
+ OrderBy: SearchOrderByID,
+ Private: true,
+ })
+ if err != nil {
+ return err
+ } else if len(repos) == 0 {
+ return nil
+ }
+ for _, repo := range repos {
+ if err = updateRepoIndexer(repo); err != nil {
+ // only log error, since this should not prevent
+ // gitea from starting up
+ log.Error(4, "updateRepoIndexer: repoID=%d, %v", repo.ID, err)
+ }
+ }
+ }
+}
+
+type updateBatch struct {
+ updates []indexer.RepoIndexerUpdate
+}
+
+func updateRepoIndexer(repo *Repository) error {
+ changes, err := getRepoChanges(repo)
+ if err != nil {
+ return err
+ } else if changes == nil {
+ return nil
+ }
+
+ batch := indexer.RepoIndexerBatch()
+ for _, filename := range changes.UpdatedFiles {
+ if err := addUpdate(filename, repo, batch); err != nil {
+ return err
+ }
+ }
+ for _, filename := range changes.RemovedFiles {
+ if err := addDelete(filename, repo, batch); err != nil {
+ return err
+ }
+ }
+ if err = batch.Flush(); err != nil {
+ return err
+ }
+ return updateLastIndexSync(repo)
+}
+
+// repoChanges changes (file additions/updates/removals) to a repo
+type repoChanges struct {
+ UpdatedFiles []string
+ RemovedFiles []string
+}
+
+// getRepoChanges returns changes to repo since last indexer update
+func getRepoChanges(repo *Repository) (*repoChanges, error) {
+ repoWorkingPool.CheckIn(com.ToStr(repo.ID))
+ defer repoWorkingPool.CheckOut(com.ToStr(repo.ID))
+
+ if err := repo.UpdateLocalCopyBranch(""); err != nil {
+ return nil, err
+ } else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) {
+ // repo does not have any commits yet, so nothing to update
+ return nil, nil
+ } else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil {
+ return nil, err
+ } else if err = repo.getIndexerStatus(); err != nil {
+ return nil, err
+ }
+
+ if len(repo.IndexerStatus.CommitSha) == 0 {
+ return genesisChanges(repo)
+ }
+ return nonGenesisChanges(repo)
+}
+
+func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error {
+ filepath := path.Join(repo.LocalCopyPath(), filename)
+ if stat, err := os.Stat(filepath); err != nil {
+ return err
+ } else if stat.Size() > setting.Indexer.MaxIndexerFileSize {
+ return nil
+ }
+ fileContents, err := ioutil.ReadFile(filepath)
+ if err != nil {
+ return err
+ } else if !base.IsTextFile(fileContents) {
+ return nil
+ }
+ return batch.Add(indexer.RepoIndexerUpdate{
+ Filepath: filename,
+ Op: indexer.RepoIndexerOpUpdate,
+ Data: &indexer.RepoIndexerData{
+ RepoID: repo.ID,
+ Content: string(fileContents),
+ },
+ })
+}
+
+func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
+ return batch.Add(indexer.RepoIndexerUpdate{
+ Filepath: filename,
+ Op: indexer.RepoIndexerOpDelete,
+ Data: &indexer.RepoIndexerData{
+ RepoID: repo.ID,
+ },
+ })
+}
+
+// genesisChanges get changes to add repo to the indexer for the first time
+func genesisChanges(repo *Repository) (*repoChanges, error) {
+ var changes repoChanges
+ stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath())
+ if err != nil {
+ return nil, err
+ }
+ for _, line := range strings.Split(stdout, "\n") {
+ filename := strings.TrimSpace(line)
+ if len(filename) == 0 {
+ continue
+ } else if filename[0] == '"' {
+ filename, err = strconv.Unquote(filename)
+ if err != nil {
+ return nil, err
+ }
+ }
+ changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
+ }
+ return &changes, nil
+}
+
+// nonGenesisChanges get changes since the previous indexer update
+func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
+ diffCmd := git.NewCommand("diff", "--name-status",
+ repo.IndexerStatus.CommitSha, "HEAD")
+ stdout, err := diffCmd.RunInDir(repo.LocalCopyPath())
+ if err != nil {
+ // previous commit sha may have been removed by a force push, so
+ // try rebuilding from scratch
+ if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil {
+ return nil, err
+ }
+ return genesisChanges(repo)
+ }
+ var changes repoChanges
+ for _, line := range strings.Split(stdout, "\n") {
+ line = strings.TrimSpace(line)
+ if len(line) == 0 {
+ continue
+ }
+ filename := strings.TrimSpace(line[1:])
+ if len(filename) == 0 {
+ continue
+ } else if filename[0] == '"' {
+ filename, err = strconv.Unquote(filename)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ switch status := line[0]; status {
+ case 'M', 'A':
+ changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
+ case 'D':
+ changes.RemovedFiles = append(changes.RemovedFiles, filename)
+ default:
+ log.Warn("Unrecognized status: %c (line=%s)", status, line)
+ }
+ }
+ return &changes, nil
+}
+
+func updateLastIndexSync(repo *Repository) error {
+ stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath())
+ if err != nil {
+ return err
+ }
+ sha := strings.TrimSpace(stdout)
+ return repo.updateIndexerStatus(sha)
+}
+
+func processRepoIndexerOperationQueue() {
+ for {
+ op := <-repoIndexerOperationQueue
+ if op.deleted {
+ if err := indexer.DeleteRepoFromIndexer(op.repo.ID); err != nil {
+ log.Error(4, "DeleteRepoFromIndexer: %v", err)
+ }
+ } else {
+ if err := updateRepoIndexer(op.repo); err != nil {
+ log.Error(4, "updateRepoIndexer: %v", err)
+ }
+ }
+ }
+}
+
+// DeleteRepoFromIndexer remove all of a repository's entries from the indexer
+func DeleteRepoFromIndexer(repo *Repository) {
+ addOperationToQueue(repoIndexerOperation{repo: repo, deleted: true})
+}
+
+// UpdateRepoIndexer update a repository's entries in the indexer
+func UpdateRepoIndexer(repo *Repository) {
+ addOperationToQueue(repoIndexerOperation{repo: repo, deleted: false})
+}
+
+func addOperationToQueue(op repoIndexerOperation) {
+ if !setting.Indexer.RepoIndexerEnabled {
+ return
+ }
+ select {
+ case repoIndexerOperationQueue <- op:
+ break
+ default:
+ go func() {
+ repoIndexerOperationQueue <- op
+ }()
+ }
+}
diff --git a/models/update.go b/models/update.go
index f3bd6cce13..f91559d9ec 100644
--- a/models/update.go
+++ b/models/update.go
@@ -263,6 +263,10 @@ func pushUpdate(opts PushUpdateOptions) (repo *Repository, err error) {
commits = ListToPushCommits(l)
}
+ if opts.RefFullName == git.BranchPrefix+repo.DefaultBranch {
+ UpdateRepoIndexer(repo)
+ }
+
if err := CommitRepoAction(CommitRepoActionOptions{
PusherName: opts.PusherName,
RepoOwnerID: owner.ID,