summaryrefslogtreecommitdiffstats
path: root/modules/indexer
diff options
context:
space:
mode:
Diffstat (limited to 'modules/indexer')
-rw-r--r--modules/indexer/code/bleve.go114
-rw-r--r--modules/indexer/code/bleve_test.go9
-rw-r--r--modules/indexer/code/indexer.go24
-rw-r--r--modules/indexer/code/search.go21
-rw-r--r--modules/indexer/code/wrapped.go6
5 files changed, 140 insertions, 34 deletions
diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go
index 6052304f83..39171d17a6 100644
--- a/modules/indexer/code/bleve.go
+++ b/modules/indexer/code/bleve.go
@@ -9,16 +9,20 @@ import (
"os"
"strconv"
"strings"
+ "time"
"code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/timeutil"
"github.com/blevesearch/bleve"
- "github.com/blevesearch/bleve/analysis/analyzer/custom"
+ analyzer_custom "github.com/blevesearch/bleve/analysis/analyzer/custom"
+ analyzer_keyword "github.com/blevesearch/bleve/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
@@ -26,6 +30,7 @@ import (
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search/query"
"github.com/ethantkoenig/rupture"
+ "github.com/src-d/enry/v2"
)
const unicodeNormalizeName = "unicodeNormalize"
@@ -86,8 +91,11 @@ func openIndexer(path string, latestVersion int) (bleve.Index, error) {
// RepoIndexerData data stored in the repo indexer
type RepoIndexerData struct {
- RepoID int64
- Content string
+ RepoID int64
+ CommitID string
+ Content string
+ Language string
+ UpdatedAt time.Time
}
// Type returns the document type, for bleve's mapping.Classifier interface.
@@ -95,7 +103,11 @@ func (d *RepoIndexerData) Type() string {
return repoIndexerDocType
}
-func addUpdate(update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
+func addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
+ // Ignore vendored files in code search
+ if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
+ return nil
+ }
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
RunInDir(repo.RepoPath())
if err != nil {
@@ -118,8 +130,11 @@ func addUpdate(update fileUpdate, repo *models.Repository, batch rupture.Flushin
id := filenameIndexerID(repo.ID, update.Filename)
return batch.Index(id, &RepoIndexerData{
- RepoID: repo.ID,
- Content: string(charset.ToUTF8DropErrors(fileContents)),
+ RepoID: repo.ID,
+ CommitID: commitSha,
+ Content: string(charset.ToUTF8DropErrors(fileContents)),
+ Language: analyze.GetCodeLanguage(update.Filename, fileContents),
+ UpdatedAt: time.Now().UTC(),
})
}
@@ -131,7 +146,7 @@ func addDelete(filename string, repo *models.Repository, batch rupture.FlushingB
const (
repoIndexerAnalyzer = "repoIndexerAnalyzer"
repoIndexerDocType = "repoIndexerDocType"
- repoIndexerLatestVersion = 4
+ repoIndexerLatestVersion = 5
)
// createRepoIndexer create a repo indexer if one does not already exist
@@ -145,11 +160,21 @@ func createRepoIndexer(path string, latestVersion int) (bleve.Index, error) {
textFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
+ termFieldMapping := bleve.NewTextFieldMapping()
+ termFieldMapping.IncludeInAll = false
+ termFieldMapping.Analyzer = analyzer_keyword.Name
+ docMapping.AddFieldMappingsAt("Language", termFieldMapping)
+ docMapping.AddFieldMappingsAt("CommitID", termFieldMapping)
+
+ timeFieldMapping := bleve.NewDateTimeFieldMapping()
+ timeFieldMapping.IncludeInAll = false
+ docMapping.AddFieldMappingsAt("UpdatedAt", timeFieldMapping)
+
mapping := bleve.NewIndexMapping()
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
return nil, err
} else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
- "type": custom.Name,
+ "type": analyzer_custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, lowercase.Name},
@@ -255,7 +280,7 @@ func (b *BleveIndexer) Index(repoID int64) error {
batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize)
for _, update := range changes.Updates {
- if err := addUpdate(update, repo, batch); err != nil {
+ if err := addUpdate(sha, update, repo, batch); err != nil {
return err
}
}
@@ -289,7 +314,7 @@ func (b *BleveIndexer) Delete(repoID int64) error {
// Search searches for files in the specified repo.
// Returns the matching file-paths
-func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error) {
+func (b *BleveIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
@@ -309,16 +334,35 @@ func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize in
indexerQuery = phraseQuery
}
+ // Save for reuse without language filter
+ facetQuery := indexerQuery
+ if len(language) > 0 {
+ languageQuery := bleve.NewMatchQuery(language)
+ languageQuery.FieldVal = "Language"
+ languageQuery.Analyzer = analyzer_keyword.Name
+
+ indexerQuery = bleve.NewConjunctionQuery(
+ indexerQuery,
+ languageQuery,
+ )
+ }
+
from := (page - 1) * pageSize
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
- searchRequest.Fields = []string{"Content", "RepoID"}
+ searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
searchRequest.IncludeLocations = true
+ if len(language) == 0 {
+ searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
+ }
+
result, err := b.indexer.Search(searchRequest)
if err != nil {
- return 0, nil, err
+ return 0, nil, nil, err
}
+ total := int64(result.Total)
+
searchResults := make([]*SearchResult, len(result.Hits))
for i, hit := range result.Hits {
var startIndex, endIndex int = -1, -1
@@ -333,13 +377,47 @@ func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize in
endIndex = locationEnd
}
}
+ language := hit.Fields["Language"].(string)
+ var updatedUnix timeutil.TimeStamp
+ if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil {
+ updatedUnix = timeutil.TimeStamp(t.Unix())
+ }
searchResults[i] = &SearchResult{
- RepoID: int64(hit.Fields["RepoID"].(float64)),
- StartIndex: startIndex,
- EndIndex: endIndex,
- Filename: filenameOfIndexerID(hit.ID),
- Content: hit.Fields["Content"].(string),
+ RepoID: int64(hit.Fields["RepoID"].(float64)),
+ StartIndex: startIndex,
+ EndIndex: endIndex,
+ Filename: filenameOfIndexerID(hit.ID),
+ Content: hit.Fields["Content"].(string),
+ CommitID: hit.Fields["CommitID"].(string),
+ UpdatedUnix: updatedUnix,
+ Language: language,
+ Color: enry.GetColor(language),
+ }
+ }
+
+ searchResultLanguages := make([]*SearchResultLanguages, 0, 10)
+ if len(language) > 0 {
+ // Use separate query to go get all language counts
+ facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
+ facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
+ facetRequest.IncludeLocations = true
+ facetRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
+
+ if result, err = b.indexer.Search(facetRequest); err != nil {
+ return 0, nil, nil, err
+ }
+
+ }
+ languagesFacet := result.Facets["languages"]
+ for _, term := range languagesFacet.Terms {
+ if len(term.Term) == 0 {
+ continue
}
+ searchResultLanguages = append(searchResultLanguages, &SearchResultLanguages{
+ Language: term.Term,
+ Color: enry.GetColor(term.Term),
+ Count: term.Count,
+ })
}
- return int64(result.Total), searchResults, nil
+ return total, searchResults, searchResultLanguages, nil
}
diff --git a/modules/indexer/code/bleve_test.go b/modules/indexer/code/bleve_test.go
index 695dceb259..89cfceea2d 100644
--- a/modules/indexer/code/bleve_test.go
+++ b/modules/indexer/code/bleve_test.go
@@ -49,27 +49,34 @@ func TestIndexAndSearch(t *testing.T) {
keywords = []struct {
Keyword string
IDs []int64
+ Langs int
}{
{
Keyword: "Description",
IDs: []int64{1},
+ Langs: 1,
},
{
Keyword: "repo1",
IDs: []int64{1},
+ Langs: 1,
},
{
Keyword: "non-exist",
IDs: []int64{},
+ Langs: 0,
},
}
)
for _, kw := range keywords {
- total, res, err := idx.Search(nil, kw.Keyword, 1, 10)
+ total, res, langs, err := idx.Search(nil, "", kw.Keyword, 1, 10)
assert.NoError(t, err)
assert.EqualValues(t, len(kw.IDs), total)
+ assert.NotNil(t, langs)
+ assert.Len(t, langs, kw.Langs)
+
var ids = make([]int64, 0, len(res))
for _, hit := range res {
ids = append(ids, hit.RepoID)
diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go
index 3f9461cd0e..6cbda1491b 100644
--- a/modules/indexer/code/indexer.go
+++ b/modules/indexer/code/indexer.go
@@ -12,22 +12,34 @@ import (
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/timeutil"
)
// SearchResult result of performing a search in a repo
type SearchResult struct {
- RepoID int64
- StartIndex int
- EndIndex int
- Filename string
- Content string
+ RepoID int64
+ StartIndex int
+ EndIndex int
+ Filename string
+ Content string
+ CommitID string
+ UpdatedUnix timeutil.TimeStamp
+ Language string
+ Color string
+}
+
+// SearchResultLanguages result of top languages count in search results
+type SearchResultLanguages struct {
+ Language string
+ Color string
+ Count int
}
// Indexer defines an interface to indexer issues contents
type Indexer interface {
Index(repoID int64) error
Delete(repoID int64) error
- Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error)
+ Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error)
Close()
}
diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go
index 18f193a532..ca57b3ff88 100644
--- a/modules/indexer/code/search.go
+++ b/modules/indexer/code/search.go
@@ -11,6 +11,7 @@ import (
"strings"
"code.gitea.io/gitea/modules/highlight"
+ "code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
)
@@ -18,6 +19,10 @@ import (
type Result struct {
RepoID int64
Filename string
+ CommitID string
+ UpdatedUnix timeutil.TimeStamp
+ Language string
+ Color string
HighlightClass string
LineNumbers []int
FormattedLines gotemplate.HTML
@@ -100,6 +105,10 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
return &Result{
RepoID: result.RepoID,
Filename: result.Filename,
+ CommitID: result.CommitID,
+ UpdatedUnix: result.UpdatedUnix,
+ Language: result.Language,
+ Color: result.Color,
HighlightClass: highlight.FileNameToHighlightClass(result.Filename),
LineNumbers: lineNumbers,
FormattedLines: gotemplate.HTML(formattedLinesBuffer.String()),
@@ -107,14 +116,14 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
}
// PerformSearch perform a search on a repository
-func PerformSearch(repoIDs []int64, keyword string, page, pageSize int) (int, []*Result, error) {
+func PerformSearch(repoIDs []int64, language, keyword string, page, pageSize int) (int, []*Result, []*SearchResultLanguages, error) {
if len(keyword) == 0 {
- return 0, nil, nil
+ return 0, nil, nil, nil
}
- total, results, err := indexer.Search(repoIDs, keyword, page, pageSize)
+ total, results, resultLanguages, err := indexer.Search(repoIDs, language, keyword, page, pageSize)
if err != nil {
- return 0, nil, err
+ return 0, nil, nil, err
}
displayResults := make([]*Result, len(results))
@@ -123,8 +132,8 @@ func PerformSearch(repoIDs []int64, keyword string, page, pageSize int) (int, []
startIndex, endIndex := indices(result.Content, result.StartIndex, result.EndIndex)
displayResults[i], err = searchResult(result, startIndex, endIndex)
if err != nil {
- return 0, nil, err
+ return 0, nil, nil, err
}
}
- return int(total), displayResults, nil
+ return int(total), displayResults, resultLanguages, nil
}
diff --git a/modules/indexer/code/wrapped.go b/modules/indexer/code/wrapped.go
index 6a20883989..926597a382 100644
--- a/modules/indexer/code/wrapped.go
+++ b/modules/indexer/code/wrapped.go
@@ -71,12 +71,12 @@ func (w *wrappedIndexer) Delete(repoID int64) error {
return indexer.Delete(repoID)
}
-func (w *wrappedIndexer) Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error) {
+func (w *wrappedIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
indexer, err := w.get()
if err != nil {
- return 0, nil, err
+ return 0, nil, nil, err
}
- return indexer.Search(repoIDs, keyword, page, pageSize)
+ return indexer.Search(repoIDs, language, keyword, page, pageSize)
}