123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297 |
- // Copyright 2018 The Gitea Authors. All rights reserved.
- // SPDX-License-Identifier: MIT
-
- package bleve
-
- import (
- "context"
-
- indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
- inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
- "code.gitea.io/gitea/modules/indexer/issues/internal"
-
- "github.com/blevesearch/bleve/v2"
- "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
- "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
- "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
- "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
- "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
- "github.com/blevesearch/bleve/v2/mapping"
- "github.com/blevesearch/bleve/v2/search/query"
- )
-
- const (
- issueIndexerAnalyzer = "issueIndexer"
- issueIndexerDocType = "issueIndexerDocType"
- issueIndexerLatestVersion = 4
- )
-
- const unicodeNormalizeName = "unicodeNormalize"
-
- func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
- return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
- "type": unicodenorm.Name,
- "form": unicodenorm.NFC,
- })
- }
-
- const maxBatchSize = 16
-
- // IndexerData an update to the issue indexer
- type IndexerData internal.IndexerData
-
- // Type returns the document type, for bleve's mapping.Classifier interface.
- func (i *IndexerData) Type() string {
- return issueIndexerDocType
- }
-
- // generateIssueIndexMapping generates the bleve index mapping for issues
- func generateIssueIndexMapping() (mapping.IndexMapping, error) {
- mapping := bleve.NewIndexMapping()
- docMapping := bleve.NewDocumentMapping()
-
- numericFieldMapping := bleve.NewNumericFieldMapping()
- numericFieldMapping.Store = false
- numericFieldMapping.IncludeInAll = false
- docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping)
-
- textFieldMapping := bleve.NewTextFieldMapping()
- textFieldMapping.Store = false
- textFieldMapping.IncludeInAll = false
-
- boolFieldMapping := bleve.NewBooleanFieldMapping()
- boolFieldMapping.Store = false
- boolFieldMapping.IncludeInAll = false
-
- numberFieldMapping := bleve.NewNumericFieldMapping()
- numberFieldMapping.Store = false
- numberFieldMapping.IncludeInAll = false
-
- docMapping.AddFieldMappingsAt("is_public", boolFieldMapping)
-
- docMapping.AddFieldMappingsAt("title", textFieldMapping)
- docMapping.AddFieldMappingsAt("content", textFieldMapping)
- docMapping.AddFieldMappingsAt("comments", textFieldMapping)
-
- docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping)
- docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping)
- docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping)
- docMapping.AddFieldMappingsAt("no_label", boolFieldMapping)
- docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping)
- docMapping.AddFieldMappingsAt("project_id", numberFieldMapping)
- docMapping.AddFieldMappingsAt("project_board_id", numberFieldMapping)
- docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping)
- docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping)
- docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping)
- docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping)
- docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping)
- docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping)
- docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping)
-
- docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping)
- docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping)
- docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping)
-
- if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
- return nil, err
- } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
- "type": custom.Name,
- "char_filters": []string{},
- "tokenizer": unicode.Name,
- "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
- }); err != nil {
- return nil, err
- }
-
- mapping.DefaultAnalyzer = issueIndexerAnalyzer
- mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
- mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
- mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs
-
- return mapping, nil
- }
-
- var _ internal.Indexer = &Indexer{}
-
- // Indexer implements Indexer interface
- type Indexer struct {
- inner *inner_bleve.Indexer
- indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
- }
-
- // NewIndexer creates a new bleve local indexer
- func NewIndexer(indexDir string) *Indexer {
- inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping)
- return &Indexer{
- Indexer: inner,
- inner: inner,
- }
- }
-
- // Index will save the index data
- func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
- batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
- for _, issue := range issues {
- if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
- return err
- }
- }
- return batch.Flush()
- }
-
- // Delete deletes indexes by ids
- func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
- batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
- for _, id := range ids {
- if err := batch.Delete(indexer_internal.Base36(id)); err != nil {
- return err
- }
- }
- return batch.Flush()
- }
-
- // Search searches for issues by given conditions.
- // Returns the matching issue IDs
- func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
- var queries []query.Query
-
- if options.Keyword != "" {
- if options.IsFuzzyKeyword {
- queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
- inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
- inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
- inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
- }...))
- } else {
- queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
- inner_bleve.PrefixQuery(options.Keyword, "title"),
- inner_bleve.PrefixQuery(options.Keyword, "content"),
- inner_bleve.PrefixQuery(options.Keyword, "comments"),
- }...))
- }
- }
-
- if len(options.RepoIDs) > 0 || options.AllPublic {
- var repoQueries []query.Query
- for _, repoID := range options.RepoIDs {
- repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id"))
- }
- if options.AllPublic {
- repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public"))
- }
- queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...))
- }
-
- if options.IsPull.Has() {
- queries = append(queries, inner_bleve.BoolFieldQuery(options.IsPull.Value(), "is_pull"))
- }
- if options.IsClosed.Has() {
- queries = append(queries, inner_bleve.BoolFieldQuery(options.IsClosed.Value(), "is_closed"))
- }
-
- if options.NoLabelOnly {
- queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_label"))
- } else {
- if len(options.IncludedLabelIDs) > 0 {
- var includeQueries []query.Query
- for _, labelID := range options.IncludedLabelIDs {
- includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
- }
- queries = append(queries, bleve.NewConjunctionQuery(includeQueries...))
- } else if len(options.IncludedAnyLabelIDs) > 0 {
- var includeQueries []query.Query
- for _, labelID := range options.IncludedAnyLabelIDs {
- includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
- }
- queries = append(queries, bleve.NewDisjunctionQuery(includeQueries...))
- }
- if len(options.ExcludedLabelIDs) > 0 {
- var excludeQueries []query.Query
- for _, labelID := range options.ExcludedLabelIDs {
- q := bleve.NewBooleanQuery()
- q.AddMustNot(inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
- excludeQueries = append(excludeQueries, q)
- }
- queries = append(queries, bleve.NewConjunctionQuery(excludeQueries...))
- }
- }
-
- if len(options.MilestoneIDs) > 0 {
- var milestoneQueries []query.Query
- for _, milestoneID := range options.MilestoneIDs {
- milestoneQueries = append(milestoneQueries, inner_bleve.NumericEqualityQuery(milestoneID, "milestone_id"))
- }
- queries = append(queries, bleve.NewDisjunctionQuery(milestoneQueries...))
- }
-
- if options.ProjectID.Has() {
- queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectID.Value(), "project_id"))
- }
- if options.ProjectBoardID.Has() {
- queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectBoardID.Value(), "project_board_id"))
- }
-
- if options.PosterID.Has() {
- queries = append(queries, inner_bleve.NumericEqualityQuery(options.PosterID.Value(), "poster_id"))
- }
-
- if options.AssigneeID.Has() {
- queries = append(queries, inner_bleve.NumericEqualityQuery(options.AssigneeID.Value(), "assignee_id"))
- }
-
- if options.MentionID.Has() {
- queries = append(queries, inner_bleve.NumericEqualityQuery(options.MentionID.Value(), "mention_ids"))
- }
-
- if options.ReviewedID.Has() {
- queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewedID.Value(), "reviewed_ids"))
- }
- if options.ReviewRequestedID.Has() {
- queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewRequestedID.Value(), "review_requested_ids"))
- }
-
- if options.SubscriberID.Has() {
- queries = append(queries, inner_bleve.NumericEqualityQuery(options.SubscriberID.Value(), "subscriber_ids"))
- }
-
- if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
- queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(
- options.UpdatedAfterUnix,
- options.UpdatedBeforeUnix,
- "updated_unix"))
- }
-
- var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
- if len(queries) == 0 {
- indexerQuery = bleve.NewMatchAllQuery()
- }
-
- skip, limit := indexer_internal.ParsePaginator(options.Paginator)
- search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false)
-
- if options.SortBy == "" {
- options.SortBy = internal.SortByCreatedAsc
- }
-
- search.SortBy([]string{string(options.SortBy), "-_id"})
-
- result, err := b.inner.Indexer.SearchInContext(ctx, search)
- if err != nil {
- return nil, err
- }
-
- ret := &internal.SearchResult{
- Total: int64(result.Total),
- Hits: make([]internal.Match, 0, len(result.Hits)),
- }
- for _, hit := range result.Hits {
- id, err := indexer_internal.ParseBase36(hit.ID)
- if err != nil {
- return nil, err
- }
- ret.Hits = append(ret.Hits, internal.Match{
- ID: id,
- })
- }
- return ret, nil
- }
|