diff options
Diffstat (limited to 'modules/indexer')
-rw-r--r-- | modules/indexer/code/bleve.go | 114 | ||||
-rw-r--r-- | modules/indexer/code/bleve_test.go | 9 | ||||
-rw-r--r-- | modules/indexer/code/indexer.go | 24 | ||||
-rw-r--r-- | modules/indexer/code/search.go | 21 | ||||
-rw-r--r-- | modules/indexer/code/wrapped.go | 6 |
5 files changed, 140 insertions, 34 deletions
diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 6052304f83..39171d17a6 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -9,16 +9,20 @@ import ( "os" "strconv" "strings" + "time" "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/timeutil" "github.com/blevesearch/bleve" - "github.com/blevesearch/bleve/analysis/analyzer/custom" + analyzer_custom "github.com/blevesearch/bleve/analysis/analyzer/custom" + analyzer_keyword "github.com/blevesearch/bleve/analysis/analyzer/keyword" "github.com/blevesearch/bleve/analysis/token/lowercase" "github.com/blevesearch/bleve/analysis/token/unicodenorm" "github.com/blevesearch/bleve/analysis/tokenizer/unicode" @@ -26,6 +30,7 @@ import ( "github.com/blevesearch/bleve/mapping" "github.com/blevesearch/bleve/search/query" "github.com/ethantkoenig/rupture" + "github.com/src-d/enry/v2" ) const unicodeNormalizeName = "unicodeNormalize" @@ -86,8 +91,11 @@ func openIndexer(path string, latestVersion int) (bleve.Index, error) { // RepoIndexerData data stored in the repo indexer type RepoIndexerData struct { - RepoID int64 - Content string + RepoID int64 + CommitID string + Content string + Language string + UpdatedAt time.Time } // Type returns the document type, for bleve's mapping.Classifier interface. @@ -95,7 +103,11 @@ func (d *RepoIndexerData) Type() string { return repoIndexerDocType } -func addUpdate(update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { +func addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { + // Ignore vendored files in code search + if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { + return nil + } stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). RunInDir(repo.RepoPath()) if err != nil { @@ -118,8 +130,11 @@ func addUpdate(update fileUpdate, repo *models.Repository, batch rupture.Flushin id := filenameIndexerID(repo.ID, update.Filename) return batch.Index(id, &RepoIndexerData{ - RepoID: repo.ID, - Content: string(charset.ToUTF8DropErrors(fileContents)), + RepoID: repo.ID, + CommitID: commitSha, + Content: string(charset.ToUTF8DropErrors(fileContents)), + Language: analyze.GetCodeLanguage(update.Filename, fileContents), + UpdatedAt: time.Now().UTC(), }) } @@ -131,7 +146,7 @@ func addDelete(filename string, repo *models.Repository, batch rupture.FlushingB const ( repoIndexerAnalyzer = "repoIndexerAnalyzer" repoIndexerDocType = "repoIndexerDocType" - repoIndexerLatestVersion = 4 + repoIndexerLatestVersion = 5 ) // createRepoIndexer create a repo indexer if one does not already exist @@ -145,11 +160,21 @@ func createRepoIndexer(path string, latestVersion int) (bleve.Index, error) { textFieldMapping.IncludeInAll = false docMapping.AddFieldMappingsAt("Content", textFieldMapping) + termFieldMapping := bleve.NewTextFieldMapping() + termFieldMapping.IncludeInAll = false + termFieldMapping.Analyzer = analyzer_keyword.Name + docMapping.AddFieldMappingsAt("Language", termFieldMapping) + docMapping.AddFieldMappingsAt("CommitID", termFieldMapping) + + timeFieldMapping := bleve.NewDateTimeFieldMapping() + timeFieldMapping.IncludeInAll = false + docMapping.AddFieldMappingsAt("UpdatedAt", timeFieldMapping) + mapping := bleve.NewIndexMapping() if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { return nil, err } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{ - "type": custom.Name, + "type": analyzer_custom.Name, "char_filters": []string{}, "tokenizer": unicode.Name, "token_filters": []string{unicodeNormalizeName, lowercase.Name}, @@ -255,7 +280,7 @@ func (b *BleveIndexer) Index(repoID int64) error { batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize) for _, update := range changes.Updates { - if err := addUpdate(update, repo, batch); err != nil { + if err := addUpdate(sha, update, repo, batch); err != nil { return err } } @@ -289,7 +314,7 @@ func (b *BleveIndexer) Delete(repoID int64) error { // Search searches for files in the specified repo. // Returns the matching file-paths -func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error) { +func (b *BleveIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) { phraseQuery := bleve.NewMatchPhraseQuery(keyword) phraseQuery.FieldVal = "Content" phraseQuery.Analyzer = repoIndexerAnalyzer @@ -309,16 +334,35 @@ func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize in indexerQuery = phraseQuery } + // Save for reuse without language filter + facetQuery := indexerQuery + if len(language) > 0 { + languageQuery := bleve.NewMatchQuery(language) + languageQuery.FieldVal = "Language" + languageQuery.Analyzer = analyzer_keyword.Name + + indexerQuery = bleve.NewConjunctionQuery( + indexerQuery, + languageQuery, + ) + } + from := (page - 1) * pageSize searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false) - searchRequest.Fields = []string{"Content", "RepoID"} + searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} searchRequest.IncludeLocations = true + if len(language) == 0 { + searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10)) + } + result, err := b.indexer.Search(searchRequest) if err != nil { - return 0, nil, err + return 0, nil, nil, err } + total := int64(result.Total) + searchResults := make([]*SearchResult, len(result.Hits)) for i, hit := range result.Hits { var startIndex, endIndex int = -1, -1 @@ -333,13 +377,47 @@ func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize in endIndex = locationEnd } } + language := hit.Fields["Language"].(string) + var updatedUnix timeutil.TimeStamp + if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil { + updatedUnix = timeutil.TimeStamp(t.Unix()) + } searchResults[i] = &SearchResult{ - RepoID: int64(hit.Fields["RepoID"].(float64)), - StartIndex: startIndex, - EndIndex: endIndex, - Filename: filenameOfIndexerID(hit.ID), - Content: hit.Fields["Content"].(string), + RepoID: int64(hit.Fields["RepoID"].(float64)), + StartIndex: startIndex, + EndIndex: endIndex, + Filename: filenameOfIndexerID(hit.ID), + Content: hit.Fields["Content"].(string), + CommitID: hit.Fields["CommitID"].(string), + UpdatedUnix: updatedUnix, + Language: language, + Color: enry.GetColor(language), + } + } + + searchResultLanguages := make([]*SearchResultLanguages, 0, 10) + if len(language) > 0 { + // Use separate query to go get all language counts + facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false) + facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} + facetRequest.IncludeLocations = true + facetRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10)) + + if result, err = b.indexer.Search(facetRequest); err != nil { + return 0, nil, nil, err + } + + } + languagesFacet := result.Facets["languages"] + for _, term := range languagesFacet.Terms { + if len(term.Term) == 0 { + continue } + searchResultLanguages = append(searchResultLanguages, &SearchResultLanguages{ + Language: term.Term, + Color: enry.GetColor(term.Term), + Count: term.Count, + }) } - return int64(result.Total), searchResults, nil + return total, searchResults, searchResultLanguages, nil } diff --git a/modules/indexer/code/bleve_test.go b/modules/indexer/code/bleve_test.go index 695dceb259..89cfceea2d 100644 --- a/modules/indexer/code/bleve_test.go +++ b/modules/indexer/code/bleve_test.go @@ -49,27 +49,34 @@ func TestIndexAndSearch(t *testing.T) { keywords = []struct { Keyword string IDs []int64 + Langs int }{ { Keyword: "Description", IDs: []int64{1}, + Langs: 1, }, { Keyword: "repo1", IDs: []int64{1}, + Langs: 1, }, { Keyword: "non-exist", IDs: []int64{}, + Langs: 0, }, } ) for _, kw := range keywords { - total, res, err := idx.Search(nil, kw.Keyword, 1, 10) + total, res, langs, err := idx.Search(nil, "", kw.Keyword, 1, 10) assert.NoError(t, err) assert.EqualValues(t, len(kw.IDs), total) + assert.NotNil(t, langs) + assert.Len(t, langs, kw.Langs) + var ids = make([]int64, 0, len(res)) for _, hit := range res { ids = append(ids, hit.RepoID) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 3f9461cd0e..6cbda1491b 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -12,22 +12,34 @@ import ( "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/timeutil" ) // SearchResult result of performing a search in a repo type SearchResult struct { - RepoID int64 - StartIndex int - EndIndex int - Filename string - Content string + RepoID int64 + StartIndex int + EndIndex int + Filename string + Content string + CommitID string + UpdatedUnix timeutil.TimeStamp + Language string + Color string +} + +// SearchResultLanguages result of top languages count in search results +type SearchResultLanguages struct { + Language string + Color string + Count int } // Indexer defines an interface to indexer issues contents type Indexer interface { Index(repoID int64) error Delete(repoID int64) error - Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error) + Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) Close() } diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 18f193a532..ca57b3ff88 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -11,6 +11,7 @@ import ( "strings" "code.gitea.io/gitea/modules/highlight" + "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" ) @@ -18,6 +19,10 @@ import ( type Result struct { RepoID int64 Filename string + CommitID string + UpdatedUnix timeutil.TimeStamp + Language string + Color string HighlightClass string LineNumbers []int FormattedLines gotemplate.HTML @@ -100,6 +105,10 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro return &Result{ RepoID: result.RepoID, Filename: result.Filename, + CommitID: result.CommitID, + UpdatedUnix: result.UpdatedUnix, + Language: result.Language, + Color: result.Color, HighlightClass: highlight.FileNameToHighlightClass(result.Filename), LineNumbers: lineNumbers, FormattedLines: gotemplate.HTML(formattedLinesBuffer.String()), @@ -107,14 +116,14 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro } // PerformSearch perform a search on a repository -func PerformSearch(repoIDs []int64, keyword string, page, pageSize int) (int, []*Result, error) { +func PerformSearch(repoIDs []int64, language, keyword string, page, pageSize int) (int, []*Result, []*SearchResultLanguages, error) { if len(keyword) == 0 { - return 0, nil, nil + return 0, nil, nil, nil } - total, results, err := indexer.Search(repoIDs, keyword, page, pageSize) + total, results, resultLanguages, err := indexer.Search(repoIDs, language, keyword, page, pageSize) if err != nil { - return 0, nil, err + return 0, nil, nil, err } displayResults := make([]*Result, len(results)) @@ -123,8 +132,8 @@ func PerformSearch(repoIDs []int64, keyword string, page, pageSize int) (int, [] startIndex, endIndex := indices(result.Content, result.StartIndex, result.EndIndex) displayResults[i], err = searchResult(result, startIndex, endIndex) if err != nil { - return 0, nil, err + return 0, nil, nil, err } } - return int(total), displayResults, nil + return int(total), displayResults, resultLanguages, nil } diff --git a/modules/indexer/code/wrapped.go b/modules/indexer/code/wrapped.go index 6a20883989..926597a382 100644 --- a/modules/indexer/code/wrapped.go +++ b/modules/indexer/code/wrapped.go @@ -71,12 +71,12 @@ func (w *wrappedIndexer) Delete(repoID int64) error { return indexer.Delete(repoID) } -func (w *wrappedIndexer) Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error) { +func (w *wrappedIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) { indexer, err := w.get() if err != nil { - return 0, nil, err + return 0, nil, nil, err } - return indexer.Search(repoIDs, keyword, page, pageSize) + return indexer.Search(repoIDs, language, keyword, page, pageSize) } |