diff options
author | Ethan Koenig <ethantkoenig@gmail.com> | 2018-02-05 10:29:17 -0800 |
---|---|---|
committer | Lauris BH <lauris@nix.lv> | 2018-02-05 20:29:17 +0200 |
commit | a89592d4abfef01e68e3c53a3cdb3846b03abd2b (patch) | |
tree | 4d72baa635595eb9088c0a89977996d07dddeb9d /modules/indexer | |
parent | 283e87d8145ac5dd61f86f61e347ffa684ac5684 (diff) | |
download | gitea-a89592d4abfef01e68e3c53a3cdb3846b03abd2b.tar.gz gitea-a89592d4abfef01e68e3c53a3cdb3846b03abd2b.zip |
Reduce repo indexer disk usage (#3452)
Diffstat (limited to 'modules/indexer')
-rw-r--r-- | modules/indexer/indexer.go | 59 | ||||
-rw-r--r-- | modules/indexer/issue.go | 59 | ||||
-rw-r--r-- | modules/indexer/repo.go | 76 |
3 files changed, 105 insertions, 89 deletions
diff --git a/modules/indexer/indexer.go b/modules/indexer/indexer.go index d5bdd51f9c..9e12a7f501 100644 --- a/modules/indexer/indexer.go +++ b/modules/indexer/indexer.go @@ -6,12 +6,17 @@ package indexer import ( "fmt" + "os" "strconv" + "code.gitea.io/gitea/modules/setting" + "github.com/blevesearch/bleve" "github.com/blevesearch/bleve/analysis/token/unicodenorm" + "github.com/blevesearch/bleve/index/upsidedown" "github.com/blevesearch/bleve/mapping" "github.com/blevesearch/bleve/search/query" + "github.com/ethantkoenig/rupture" ) // indexerID a bleve-compatible unique identifier for an integer id @@ -53,40 +58,36 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { }) } -// Update represents an update to an indexer -type Update interface { - addToBatch(batch *bleve.Batch) error -} - const maxBatchSize = 16 -// Batch batch of indexer updates that automatically flushes once it -// reaches a certain size -type Batch struct { - batch *bleve.Batch - index bleve.Index -} - -// Add add update to batch, possibly flushing -func (batch *Batch) Add(update Update) error { - if err := update.addToBatch(batch.batch); err != nil { - return err +// openIndexer open the index at the specified path, checking for metadata +// updates and bleve version updates. If index needs to be created (or +// re-created), returns (nil, nil) +func openIndexer(path string, latestVersion int) (bleve.Index, error) { + _, err := os.Stat(setting.Indexer.IssuePath) + if err != nil && os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err } - return batch.flushIfFull() -} -func (batch *Batch) flushIfFull() error { - if batch.batch.Size() >= maxBatchSize { - return batch.Flush() + metadata, err := rupture.ReadIndexMetadata(path) + if err != nil { + return nil, err + } + if metadata.Version < latestVersion { + // the indexer is using a previous version, so we should delete it and + // re-populate + return nil, os.RemoveAll(path) } - return nil -} -// Flush manually flush the batch, regardless of its size -func (batch *Batch) Flush() error { - if err := batch.index.Batch(batch.batch); err != nil { - return err + index, err := bleve.Open(path) + if err != nil && err == upsidedown.IncompatibleVersion { + // the indexer was built with a previous version of bleve, so we should + // delete it and re-populate + return nil, os.RemoveAll(path) + } else if err != nil { + return nil, err } - batch.batch.Reset() - return nil + return index, nil } diff --git a/modules/indexer/issue.go b/modules/indexer/issue.go index 62a18e2b3b..b0d231a7cf 100644 --- a/modules/indexer/issue.go +++ b/modules/indexer/issue.go @@ -5,8 +5,6 @@ package indexer import ( - "os" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -14,12 +12,19 @@ import ( "github.com/blevesearch/bleve/analysis/analyzer/custom" "github.com/blevesearch/bleve/analysis/token/lowercase" "github.com/blevesearch/bleve/analysis/tokenizer/unicode" - "github.com/blevesearch/bleve/index/upsidedown" + "github.com/ethantkoenig/rupture" ) // issueIndexer (thread-safe) index for searching issues var issueIndexer bleve.Index +const ( + issueIndexerAnalyzer = "issueIndexer" + issueIndexerDocType = "issueIndexerDocType" + + issueIndexerLatestVersion = 1 +) + // IssueIndexerData data stored in the issue indexer type IssueIndexerData struct { RepoID int64 @@ -28,35 +33,33 @@ type IssueIndexerData struct { Comments []string } +// Type returns the document type, for bleve's mapping.Classifier interface. +func (i *IssueIndexerData) Type() string { + return issueIndexerDocType +} + // IssueIndexerUpdate an update to the issue indexer type IssueIndexerUpdate struct { IssueID int64 Data *IssueIndexerData } -func (update IssueIndexerUpdate) addToBatch(batch *bleve.Batch) error { - return batch.Index(indexerID(update.IssueID), update.Data) +// AddToFlushingBatch adds the update to the given flushing batch. +func (i IssueIndexerUpdate) AddToFlushingBatch(batch rupture.FlushingBatch) error { + return batch.Index(indexerID(i.IssueID), i.Data) } -const issueIndexerAnalyzer = "issueIndexer" - // InitIssueIndexer initialize issue indexer func InitIssueIndexer(populateIndexer func() error) { - _, err := os.Stat(setting.Indexer.IssuePath) - if err != nil && !os.IsNotExist(err) { + var err error + issueIndexer, err = openIndexer(setting.Indexer.IssuePath, issueIndexerLatestVersion) + if err != nil { log.Fatal(4, "InitIssueIndexer: %v", err) - } else if err == nil { - issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) - if err == nil { - return - } else if err != upsidedown.IncompatibleVersion { - log.Fatal(4, "InitIssueIndexer, open index: %v", err) - } - log.Warn("Incompatible bleve version, deleting and recreating issue indexer") - if err = os.RemoveAll(setting.Indexer.IssuePath); err != nil { - log.Fatal(4, "InitIssueIndexer: remove index, %v", err) - } } + if issueIndexer != nil { + return + } + if err = createIssueIndexer(); err != nil { log.Fatal(4, "InitIssuesIndexer: create index, %v", err) } @@ -70,9 +73,13 @@ func createIssueIndexer() error { mapping := bleve.NewIndexMapping() docMapping := bleve.NewDocumentMapping() - docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) + numericFieldMapping := bleve.NewNumericFieldMapping() + numericFieldMapping.IncludeInAll = false + docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping) textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = false + textFieldMapping.IncludeInAll = false docMapping.AddFieldMappingsAt("Title", textFieldMapping) docMapping.AddFieldMappingsAt("Content", textFieldMapping) docMapping.AddFieldMappingsAt("Comments", textFieldMapping) @@ -89,7 +96,8 @@ func createIssueIndexer() error { } mapping.DefaultAnalyzer = issueIndexerAnalyzer - mapping.AddDocumentMapping("issues", docMapping) + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) var err error issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) @@ -97,11 +105,8 @@ func createIssueIndexer() error { } // IssueIndexerBatch batch to add updates to -func IssueIndexerBatch() *Batch { - return &Batch{ - batch: issueIndexer.NewBatch(), - index: issueIndexer, - } +func IssueIndexerBatch() rupture.FlushingBatch { + return rupture.NewFlushingBatch(issueIndexer, maxBatchSize) } // SearchIssuesByKeyword searches for issues by given conditions. diff --git a/modules/indexer/repo.go b/modules/indexer/repo.go index 226e565e3e..ffb1dc1e62 100644 --- a/modules/indexer/repo.go +++ b/modules/indexer/repo.go @@ -5,7 +5,6 @@ package indexer import ( - "os" "strings" "code.gitea.io/gitea/modules/log" @@ -15,10 +14,17 @@ import ( "github.com/blevesearch/bleve/analysis/analyzer/custom" "github.com/blevesearch/bleve/analysis/token/camelcase" "github.com/blevesearch/bleve/analysis/token/lowercase" + "github.com/blevesearch/bleve/analysis/token/unique" "github.com/blevesearch/bleve/analysis/tokenizer/unicode" + "github.com/ethantkoenig/rupture" ) -const repoIndexerAnalyzer = "repoIndexerAnalyzer" +const ( + repoIndexerAnalyzer = "repoIndexerAnalyzer" + repoIndexerDocType = "repoIndexerDocType" + + repoIndexerLatestVersion = 1 +) // repoIndexer (thread-safe) index for repository contents var repoIndexer bleve.Index @@ -40,6 +46,11 @@ type RepoIndexerData struct { Content string } +// Type returns the document type, for bleve's mapping.Classifier interface. +func (d *RepoIndexerData) Type() string { + return repoIndexerDocType +} + // RepoIndexerUpdate an update to the repo indexer type RepoIndexerUpdate struct { Filepath string @@ -47,13 +58,14 @@ type RepoIndexerUpdate struct { Data *RepoIndexerData } -func (update RepoIndexerUpdate) addToBatch(batch *bleve.Batch) error { +// AddToFlushingBatch adds the update to the given flushing batch. +func (update RepoIndexerUpdate) AddToFlushingBatch(batch rupture.FlushingBatch) error { id := filenameIndexerID(update.Data.RepoID, update.Filepath) switch update.Op { case RepoIndexerOpUpdate: return batch.Index(id, update.Data) case RepoIndexerOpDelete: - batch.Delete(id) + return batch.Delete(id) default: log.Error(4, "Unrecognized repo indexer op: %d", update.Op) } @@ -62,48 +74,50 @@ func (update RepoIndexerUpdate) addToBatch(batch *bleve.Batch) error { // InitRepoIndexer initialize repo indexer func InitRepoIndexer(populateIndexer func() error) { - _, err := os.Stat(setting.Indexer.RepoPath) + var err error + repoIndexer, err = openIndexer(setting.Indexer.RepoPath, repoIndexerLatestVersion) if err != nil { - if os.IsNotExist(err) { - if err = createRepoIndexer(); err != nil { - log.Fatal(4, "CreateRepoIndexer: %v", err) - } - if err = populateIndexer(); err != nil { - log.Fatal(4, "PopulateRepoIndex: %v", err) - } - } else { - log.Fatal(4, "InitRepoIndexer: %v", err) - } - } else { - repoIndexer, err = bleve.Open(setting.Indexer.RepoPath) - if err != nil { - log.Fatal(4, "InitRepoIndexer, open index: %v", err) - } + log.Fatal(4, "InitRepoIndexer: %v", err) + } + if repoIndexer != nil { + return + } + + if err = createRepoIndexer(); err != nil { + log.Fatal(4, "CreateRepoIndexer: %v", err) + } + if err = populateIndexer(); err != nil { + log.Fatal(4, "PopulateRepoIndex: %v", err) } } // createRepoIndexer create a repo indexer if one does not already exist func createRepoIndexer() error { + var err error docMapping := bleve.NewDocumentMapping() - docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) + numericFieldMapping := bleve.NewNumericFieldMapping() + numericFieldMapping.IncludeInAll = false + docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping) textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.IncludeInAll = false docMapping.AddFieldMappingsAt("Content", textFieldMapping) mapping := bleve.NewIndexMapping() - if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { + if err = addUnicodeNormalizeTokenFilter(mapping); err != nil { return err - } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{ + } else if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{ "type": custom.Name, "char_filters": []string{}, "tokenizer": unicode.Name, - "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name, unique.Name}, }); err != nil { return err } mapping.DefaultAnalyzer = repoIndexerAnalyzer - mapping.AddDocumentMapping("repo", docMapping) - var err error + mapping.AddDocumentMapping(repoIndexerDocType, docMapping) + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) + repoIndexer, err = bleve.New(setting.Indexer.RepoPath, mapping) return err } @@ -121,11 +135,8 @@ func filenameOfIndexerID(indexerID string) string { } // RepoIndexerBatch batch to add updates to -func RepoIndexerBatch() *Batch { - return &Batch{ - batch: repoIndexer.NewBatch(), - index: repoIndexer, - } +func RepoIndexerBatch() rupture.FlushingBatch { + return rupture.NewFlushingBatch(repoIndexer, maxBatchSize) } // DeleteRepoFromIndexer delete all of a repo's files from indexer @@ -138,8 +149,7 @@ func DeleteRepoFromIndexer(repoID int64) error { } batch := RepoIndexerBatch() for _, hit := range result.Hits { - batch.batch.Delete(hit.ID) - if err = batch.flushIfFull(); err != nil { + if err = batch.Delete(hit.ID); err != nil { return err } } |