summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorEthan Koenig <ethantkoenig@gmail.com>2018-02-05 10:29:17 -0800
committerLauris BH <lauris@nix.lv>2018-02-05 20:29:17 +0200
commita89592d4abfef01e68e3c53a3cdb3846b03abd2b (patch)
tree4d72baa635595eb9088c0a89977996d07dddeb9d /modules
parent283e87d8145ac5dd61f86f61e347ffa684ac5684 (diff)
downloadgitea-a89592d4abfef01e68e3c53a3cdb3846b03abd2b.tar.gz
gitea-a89592d4abfef01e68e3c53a3cdb3846b03abd2b.zip
Reduce repo indexer disk usage (#3452)
Diffstat (limited to 'modules')
-rw-r--r--modules/indexer/indexer.go59
-rw-r--r--modules/indexer/issue.go59
-rw-r--r--modules/indexer/repo.go76
3 files changed, 105 insertions, 89 deletions
diff --git a/modules/indexer/indexer.go b/modules/indexer/indexer.go
index d5bdd51f9c..9e12a7f501 100644
--- a/modules/indexer/indexer.go
+++ b/modules/indexer/indexer.go
@@ -6,12 +6,17 @@ package indexer
import (
"fmt"
+ "os"
"strconv"
+ "code.gitea.io/gitea/modules/setting"
+
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/token/unicodenorm"
+ "github.com/blevesearch/bleve/index/upsidedown"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search/query"
+ "github.com/ethantkoenig/rupture"
)
// indexerID a bleve-compatible unique identifier for an integer id
@@ -53,40 +58,36 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
})
}
-// Update represents an update to an indexer
-type Update interface {
- addToBatch(batch *bleve.Batch) error
-}
-
const maxBatchSize = 16
-// Batch batch of indexer updates that automatically flushes once it
-// reaches a certain size
-type Batch struct {
- batch *bleve.Batch
- index bleve.Index
-}
-
-// Add add update to batch, possibly flushing
-func (batch *Batch) Add(update Update) error {
- if err := update.addToBatch(batch.batch); err != nil {
- return err
+// openIndexer open the index at the specified path, checking for metadata
+// updates and bleve version updates. If index needs to be created (or
+// re-created), returns (nil, nil)
+func openIndexer(path string, latestVersion int) (bleve.Index, error) {
+ _, err := os.Stat(setting.Indexer.IssuePath)
+ if err != nil && os.IsNotExist(err) {
+ return nil, nil
+ } else if err != nil {
+ return nil, err
}
- return batch.flushIfFull()
-}
-func (batch *Batch) flushIfFull() error {
- if batch.batch.Size() >= maxBatchSize {
- return batch.Flush()
+ metadata, err := rupture.ReadIndexMetadata(path)
+ if err != nil {
+ return nil, err
+ }
+ if metadata.Version < latestVersion {
+ // the indexer is using a previous version, so we should delete it and
+ // re-populate
+ return nil, os.RemoveAll(path)
}
- return nil
-}
-// Flush manually flush the batch, regardless of its size
-func (batch *Batch) Flush() error {
- if err := batch.index.Batch(batch.batch); err != nil {
- return err
+ index, err := bleve.Open(path)
+ if err != nil && err == upsidedown.IncompatibleVersion {
+ // the indexer was built with a previous version of bleve, so we should
+ // delete it and re-populate
+ return nil, os.RemoveAll(path)
+ } else if err != nil {
+ return nil, err
}
- batch.batch.Reset()
- return nil
+ return index, nil
}
diff --git a/modules/indexer/issue.go b/modules/indexer/issue.go
index 62a18e2b3b..b0d231a7cf 100644
--- a/modules/indexer/issue.go
+++ b/modules/indexer/issue.go
@@ -5,8 +5,6 @@
package indexer
import (
- "os"
-
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
@@ -14,12 +12,19 @@ import (
"github.com/blevesearch/bleve/analysis/analyzer/custom"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
- "github.com/blevesearch/bleve/index/upsidedown"
+ "github.com/ethantkoenig/rupture"
)
// issueIndexer (thread-safe) index for searching issues
var issueIndexer bleve.Index
+const (
+ issueIndexerAnalyzer = "issueIndexer"
+ issueIndexerDocType = "issueIndexerDocType"
+
+ issueIndexerLatestVersion = 1
+)
+
// IssueIndexerData data stored in the issue indexer
type IssueIndexerData struct {
RepoID int64
@@ -28,35 +33,33 @@ type IssueIndexerData struct {
Comments []string
}
+// Type returns the document type, for bleve's mapping.Classifier interface.
+func (i *IssueIndexerData) Type() string {
+ return issueIndexerDocType
+}
+
// IssueIndexerUpdate an update to the issue indexer
type IssueIndexerUpdate struct {
IssueID int64
Data *IssueIndexerData
}
-func (update IssueIndexerUpdate) addToBatch(batch *bleve.Batch) error {
- return batch.Index(indexerID(update.IssueID), update.Data)
+// AddToFlushingBatch adds the update to the given flushing batch.
+func (i IssueIndexerUpdate) AddToFlushingBatch(batch rupture.FlushingBatch) error {
+ return batch.Index(indexerID(i.IssueID), i.Data)
}
-const issueIndexerAnalyzer = "issueIndexer"
-
// InitIssueIndexer initialize issue indexer
func InitIssueIndexer(populateIndexer func() error) {
- _, err := os.Stat(setting.Indexer.IssuePath)
- if err != nil && !os.IsNotExist(err) {
+ var err error
+ issueIndexer, err = openIndexer(setting.Indexer.IssuePath, issueIndexerLatestVersion)
+ if err != nil {
log.Fatal(4, "InitIssueIndexer: %v", err)
- } else if err == nil {
- issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
- if err == nil {
- return
- } else if err != upsidedown.IncompatibleVersion {
- log.Fatal(4, "InitIssueIndexer, open index: %v", err)
- }
- log.Warn("Incompatible bleve version, deleting and recreating issue indexer")
- if err = os.RemoveAll(setting.Indexer.IssuePath); err != nil {
- log.Fatal(4, "InitIssueIndexer: remove index, %v", err)
- }
}
+ if issueIndexer != nil {
+ return
+ }
+
if err = createIssueIndexer(); err != nil {
log.Fatal(4, "InitIssuesIndexer: create index, %v", err)
}
@@ -70,9 +73,13 @@ func createIssueIndexer() error {
mapping := bleve.NewIndexMapping()
docMapping := bleve.NewDocumentMapping()
- docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
+ numericFieldMapping := bleve.NewNumericFieldMapping()
+ numericFieldMapping.IncludeInAll = false
+ docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
textFieldMapping := bleve.NewTextFieldMapping()
+ textFieldMapping.Store = false
+ textFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
@@ -89,7 +96,8 @@ func createIssueIndexer() error {
}
mapping.DefaultAnalyzer = issueIndexerAnalyzer
- mapping.AddDocumentMapping("issues", docMapping)
+ mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
+ mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
var err error
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
@@ -97,11 +105,8 @@ func createIssueIndexer() error {
}
// IssueIndexerBatch batch to add updates to
-func IssueIndexerBatch() *Batch {
- return &Batch{
- batch: issueIndexer.NewBatch(),
- index: issueIndexer,
- }
+func IssueIndexerBatch() rupture.FlushingBatch {
+ return rupture.NewFlushingBatch(issueIndexer, maxBatchSize)
}
// SearchIssuesByKeyword searches for issues by given conditions.
diff --git a/modules/indexer/repo.go b/modules/indexer/repo.go
index 226e565e3e..ffb1dc1e62 100644
--- a/modules/indexer/repo.go
+++ b/modules/indexer/repo.go
@@ -5,7 +5,6 @@
package indexer
import (
- "os"
"strings"
"code.gitea.io/gitea/modules/log"
@@ -15,10 +14,17 @@ import (
"github.com/blevesearch/bleve/analysis/analyzer/custom"
"github.com/blevesearch/bleve/analysis/token/camelcase"
"github.com/blevesearch/bleve/analysis/token/lowercase"
+ "github.com/blevesearch/bleve/analysis/token/unique"
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+ "github.com/ethantkoenig/rupture"
)
-const repoIndexerAnalyzer = "repoIndexerAnalyzer"
+const (
+ repoIndexerAnalyzer = "repoIndexerAnalyzer"
+ repoIndexerDocType = "repoIndexerDocType"
+
+ repoIndexerLatestVersion = 1
+)
// repoIndexer (thread-safe) index for repository contents
var repoIndexer bleve.Index
@@ -40,6 +46,11 @@ type RepoIndexerData struct {
Content string
}
+// Type returns the document type, for bleve's mapping.Classifier interface.
+func (d *RepoIndexerData) Type() string {
+ return repoIndexerDocType
+}
+
// RepoIndexerUpdate an update to the repo indexer
type RepoIndexerUpdate struct {
Filepath string
@@ -47,13 +58,14 @@ type RepoIndexerUpdate struct {
Data *RepoIndexerData
}
-func (update RepoIndexerUpdate) addToBatch(batch *bleve.Batch) error {
+// AddToFlushingBatch adds the update to the given flushing batch.
+func (update RepoIndexerUpdate) AddToFlushingBatch(batch rupture.FlushingBatch) error {
id := filenameIndexerID(update.Data.RepoID, update.Filepath)
switch update.Op {
case RepoIndexerOpUpdate:
return batch.Index(id, update.Data)
case RepoIndexerOpDelete:
- batch.Delete(id)
+ return batch.Delete(id)
default:
log.Error(4, "Unrecognized repo indexer op: %d", update.Op)
}
@@ -62,48 +74,50 @@ func (update RepoIndexerUpdate) addToBatch(batch *bleve.Batch) error {
// InitRepoIndexer initialize repo indexer
func InitRepoIndexer(populateIndexer func() error) {
- _, err := os.Stat(setting.Indexer.RepoPath)
+ var err error
+ repoIndexer, err = openIndexer(setting.Indexer.RepoPath, repoIndexerLatestVersion)
if err != nil {
- if os.IsNotExist(err) {
- if err = createRepoIndexer(); err != nil {
- log.Fatal(4, "CreateRepoIndexer: %v", err)
- }
- if err = populateIndexer(); err != nil {
- log.Fatal(4, "PopulateRepoIndex: %v", err)
- }
- } else {
- log.Fatal(4, "InitRepoIndexer: %v", err)
- }
- } else {
- repoIndexer, err = bleve.Open(setting.Indexer.RepoPath)
- if err != nil {
- log.Fatal(4, "InitRepoIndexer, open index: %v", err)
- }
+ log.Fatal(4, "InitRepoIndexer: %v", err)
+ }
+ if repoIndexer != nil {
+ return
+ }
+
+ if err = createRepoIndexer(); err != nil {
+ log.Fatal(4, "CreateRepoIndexer: %v", err)
+ }
+ if err = populateIndexer(); err != nil {
+ log.Fatal(4, "PopulateRepoIndex: %v", err)
}
}
// createRepoIndexer create a repo indexer if one does not already exist
func createRepoIndexer() error {
+ var err error
docMapping := bleve.NewDocumentMapping()
- docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
+ numericFieldMapping := bleve.NewNumericFieldMapping()
+ numericFieldMapping.IncludeInAll = false
+ docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
textFieldMapping := bleve.NewTextFieldMapping()
+ textFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
mapping := bleve.NewIndexMapping()
- if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
+ if err = addUnicodeNormalizeTokenFilter(mapping); err != nil {
return err
- } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
+ } else if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
- "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
+ "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name, unique.Name},
}); err != nil {
return err
}
mapping.DefaultAnalyzer = repoIndexerAnalyzer
- mapping.AddDocumentMapping("repo", docMapping)
- var err error
+ mapping.AddDocumentMapping(repoIndexerDocType, docMapping)
+ mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
+
repoIndexer, err = bleve.New(setting.Indexer.RepoPath, mapping)
return err
}
@@ -121,11 +135,8 @@ func filenameOfIndexerID(indexerID string) string {
}
// RepoIndexerBatch batch to add updates to
-func RepoIndexerBatch() *Batch {
- return &Batch{
- batch: repoIndexer.NewBatch(),
- index: repoIndexer,
- }
+func RepoIndexerBatch() rupture.FlushingBatch {
+ return rupture.NewFlushingBatch(repoIndexer, maxBatchSize)
}
// DeleteRepoFromIndexer delete all of a repo's files from indexer
@@ -138,8 +149,7 @@ func DeleteRepoFromIndexer(repoID int64) error {
}
batch := RepoIndexerBatch()
for _, hit := range result.Hits {
- batch.batch.Delete(hit.ID)
- if err = batch.flushIfFull(); err != nil {
+ if err = batch.Delete(hit.ID); err != nil {
return err
}
}