Allow code search by filename (#32210)

author Bruno Sofiato <bruno.sofiato@gmail.com>

Fri, 11 Oct 2024 23:35:04 +0000 (20:35 -0300)

committer GitHub <noreply@github.com>

Fri, 11 Oct 2024 23:35:04 +0000 (23:35 +0000)
author Bruno Sofiato <bruno.sofiato@gmail.com>
Fri, 11 Oct 2024 23:35:04 +0000 (20:35 -0300)
committer GitHub <noreply@github.com>
Fri, 11 Oct 2024 23:35:04 +0000 (23:35 +0000)
diff --git a/models/fixtures/repo_unit.yml b/models/fixtures/repo_unit.yml

index 8a22db0445c64bd1577bed464771d44da5fafa8b..f6b6252da1f88438642fad5d94e562376eecbff7 100644 (file)
--- a/models/fixtures/repo_unit.yml
+++ b/models/fixtures/repo_unit.yml
@@ -712,3 +712,24 @@
    type: 3
    config: "{\"IgnoreWhitespaceConflicts\":false,\"AllowMerge\":true,\"AllowRebase\":true,\"AllowRebaseMerge\":true,\"AllowSquash\":true}"
    created_unix: 946684810
+
+-
+  id: 108
+  repo_id: 62
+  type: 1
+  config: "{}"
+  created_unix: 946684810
+
+-
+  id: 109
+  repo_id: 62
+  type: 2
+  config: "{\"EnableTimetracker\":true,\"AllowOnlyContributorsToTrackTime\":true}"
+  created_unix: 946684810
+
+-
+  id: 110
+  repo_id: 62
+  type: 3
+  config: "{\"IgnoreWhitespaceConflicts\":false,\"AllowMerge\":true,\"AllowRebase\":true,\"AllowRebaseMerge\":true,\"AllowSquash\":true}"
+  created_unix: 946684810
diff --git a/models/fixtures/repository.yml b/models/fixtures/repository.yml

index e141593f41576af44e9c15909fc7440d48fc6bbf..b7970cb7c82f61d23a32fa5a9af7376cdc94b747 100644 (file)
--- a/models/fixtures/repository.yml
+++ b/models/fixtures/repository.yml
@@ -1768,3 +1768,34 @@
    size: 0
    is_fsck_enabled: true
    close_issues_via_commit_in_any_branch: false
+
+-
+  id: 62
+  owner_id: 42
+  owner_name: org42
+  lower_name: search-by-path
+  name: search-by-path
+  default_branch: master
+  num_watches: 0
+  num_stars: 0
+  num_forks: 0
+  num_issues: 0
+  num_closed_issues: 0
+  num_pulls: 0
+  num_closed_pulls: 0
+  num_milestones: 0
+  num_closed_milestones: 0
+  num_projects: 0
+  num_closed_projects: 0
+  is_private: false
+  is_empty: false
+  is_archived: false
+  is_mirror: false
+  status: 0
+  is_fork: false
+  fork_id: 0
+  is_template: false
+  template_id: 0
+  size: 0
+  is_fsck_enabled: true
+  close_issues_via_commit_in_any_branch: false
diff --git a/models/fixtures/user.yml b/models/fixtures/user.yml

index 8504d88ce59951c3d834a1d5cb8199be7668ab25..c0296deec55bd92fd459fe47d3f96875a7ec6f78 100644 (file)
--- a/models/fixtures/user.yml
+++ b/models/fixtures/user.yml
@@ -1517,3 +1517,40 @@
    repo_admin_change_team_access: false
    theme: ""
    keep_activity_private: false
+
+-
+  id: 42
+  lower_name: org42
+  name: org42
+  full_name: Org42
+  email: org42@example.com
+  keep_email_private: false
+  email_notifications_preference: onmention
+  passwd: ZogKvWdyEx:password
+  passwd_hash_algo: dummy
+  must_change_password: false
+  login_source: 0
+  login_name: org42
+  type: 1
+  salt: ZogKvWdyEx
+  max_repo_creation: -1
+  is_active: false
+  is_admin: false
+  is_restricted: false
+  allow_git_hook: false
+  allow_import_local: false
+  allow_create_organization: true
+  prohibit_login: false
+  avatar: avatar42
+  avatar_email: org42@example.com
+  use_custom_avatar: false
+  num_followers: 0
+  num_following: 0
+  num_stars: 0
+  num_repos: 1
+  num_teams: 0
+  num_members: 0
+  visibility: 0
+  repo_admin_change_team_access: false
+  theme: ""
+  keep_activity_private: false
diff --git a/models/repo/repo_list_test.go b/models/repo/repo_list_test.go

index 88cfcde6208324a7ae5df953d5e1cd632bb96a2c..ca6007f6c7882c0bc1304997f2f24e676688d1f6 100644 (file)
--- a/models/repo/repo_list_test.go
+++ b/models/repo/repo_list_test.go
@@ -138,12 +138,12 @@ func getTestCases() []struct {
                 {
                         name:  "AllPublic/PublicRepositoriesOfUserIncludingCollaborative",
                         opts:  &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, AllPublic: true, Template: optional.Some(false)},
-                       count: 33,
+                       count: 34,
                 },
                 {
                         name:  "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborative",
                         opts:  &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, Private: true, AllPublic: true, AllLimited: true, Template: optional.Some(false)},
-                       count: 38,
+                       count: 39,
                 },
                 {
                         name:  "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborativeByName",
@@ -158,7 +158,7 @@ func getTestCases() []struct {
                 {
                         name:  "AllPublic/PublicRepositoriesOfOrganization",
                         opts:  &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 17, AllPublic: true, Collaborate: optional.Some(false), Template: optional.Some(false)},
-                       count: 33,
+                       count: 34,
                 },
                 {
                         name:  "AllTemplates",
diff --git a/models/user/user_test.go b/models/user/user_test.go

index 67efb3859fdf35c4f9d8a1333ea7f5e49269dc77..bc1abc64512c7e9929a6a40b1ff5cb75c2c6d2f5 100644 (file)
--- a/models/user/user_test.go
+++ b/models/user/user_test.go
@@ -92,7 +92,10 @@ func TestSearchUsers(t *testing.T) {
         testOrgSuccess(&user_model.SearchUserOptions{OrderBy: "id ASC", ListOptions: db.ListOptions{Page: 4, PageSize: 2}},
                 []int64{26, 41})
  
-       testOrgSuccess(&user_model.SearchUserOptions{ListOptions: db.ListOptions{Page: 5, PageSize: 2}},
+       testOrgSuccess(&user_model.SearchUserOptions{OrderBy: "id ASC", ListOptions: db.ListOptions{Page: 5, PageSize: 2}},
+               []int64{42})
+
+       testOrgSuccess(&user_model.SearchUserOptions{ListOptions: db.ListOptions{Page: 6, PageSize: 2}},
                 []int64{})
  
         // test users
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go

index c17f56d3cff5ab4708362f5bacedb60bd118b7a0..90e5e62bcb4aafb3a47e90559e06fdb2b5bc6b8d 100644 (file)
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -17,6 +17,7 @@ import (
         "code.gitea.io/gitea/modules/charset"
         "code.gitea.io/gitea/modules/git"
         "code.gitea.io/gitea/modules/gitrepo"
+       path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path"
         "code.gitea.io/gitea/modules/indexer/code/internal"
         indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
         inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
@@ -53,6 +54,7 @@ type RepoIndexerData struct {
         RepoID    int64
         CommitID  string
         Content   string
+       Filename  string
         Language  string
         UpdatedAt time.Time
  }
@@ -64,8 +66,10 @@ func (d *RepoIndexerData) Type() string {
  
  const (
         repoIndexerAnalyzer      = "repoIndexerAnalyzer"
+       filenameIndexerAnalyzer  = "filenameIndexerAnalyzer"
+       filenameIndexerTokenizer = "filenameIndexerTokenizer"
         repoIndexerDocType       = "repoIndexerDocType"
-       repoIndexerLatestVersion = 6
+       repoIndexerLatestVersion = 7
  )
  
  // generateBleveIndexMapping generates a bleve index mapping for the repo indexer
@@ -79,6 +83,11 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
         textFieldMapping.IncludeInAll = false
         docMapping.AddFieldMappingsAt("Content", textFieldMapping)
  
+       fileNamedMapping := bleve.NewTextFieldMapping()
+       fileNamedMapping.IncludeInAll = false
+       fileNamedMapping.Analyzer = filenameIndexerAnalyzer
+       docMapping.AddFieldMappingsAt("Filename", fileNamedMapping)
+
         termFieldMapping := bleve.NewTextFieldMapping()
         termFieldMapping.IncludeInAll = false
         termFieldMapping.Analyzer = analyzer_keyword.Name
@@ -90,6 +99,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
         docMapping.AddFieldMappingsAt("UpdatedAt", timeFieldMapping)
  
         mapping := bleve.NewIndexMapping()
+
         if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
                 return nil, err
         } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]any{
@@ -100,6 +110,16 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
         }); err != nil {
                 return nil, err
         }
+
+       if err := mapping.AddCustomAnalyzer(filenameIndexerAnalyzer, map[string]any{
+               "type":          analyzer_custom.Name,
+               "char_filters":  []string{},
+               "tokenizer":     unicode.Name,
+               "token_filters": []string{unicodeNormalizeName, path_filter.Name, lowercase.Name},
+       }); err != nil {
+               return nil, err
+       }
+
         mapping.DefaultAnalyzer = repoIndexerAnalyzer
         mapping.AddDocumentMapping(repoIndexerDocType, docMapping)
         mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
@@ -174,6 +194,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
         return batch.Index(id, &RepoIndexerData{
                 RepoID:    repo.ID,
                 CommitID:  commitSha,
+               Filename:  update.Filename,
                 Content:   string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
                 Language:  analyze.GetCodeLanguage(update.Filename, fileContents),
                 UpdatedAt: time.Now().UTC(),
@@ -240,14 +261,19 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                 keywordQuery query.Query
         )
  
-       phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
-       phraseQuery.FieldVal = "Content"
-       phraseQuery.Analyzer = repoIndexerAnalyzer
-       keywordQuery = phraseQuery
+       pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword))
+       pathQuery.FieldVal = "Filename"
+       pathQuery.SetBoost(10)
+
+       contentQuery := bleve.NewMatchQuery(opts.Keyword)
+       contentQuery.FieldVal = "Content"
+
         if opts.IsKeywordFuzzy {
-               phraseQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
+               contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
         }
  
+       keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery)
+
         if len(opts.RepoIDs) > 0 {
                 repoQueries := make([]query.Query, 0, len(opts.RepoIDs))
                 for _, repoID := range opts.RepoIDs {
@@ -277,7 +303,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
  
         from, pageSize := opts.GetSkipTake()
         searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
-       searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
+       searchRequest.Fields = []string{"Content", "Filename", "RepoID", "Language", "CommitID", "UpdatedAt"}
         searchRequest.IncludeLocations = true
  
         if len(opts.Language) == 0 {
@@ -307,6 +333,10 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                                 endIndex = locationEnd
                         }
                 }
+               if len(hit.Locations["Filename"]) > 0 {
+                       startIndex, endIndex = internal.FilenameMatchIndexPos(hit.Fields["Content"].(string))
+               }
+
                 language := hit.Fields["Language"].(string)
                 var updatedUnix timeutil.TimeStamp
                 if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil {
diff --git a/modules/indexer/code/bleve/token/path/path.go b/modules/indexer/code/bleve/token/path/path.go

new file mode 100644 (file)

index 0000000..107e0da
--- /dev/null
+++ b/modules/indexer/code/bleve/token/path/path.go
@@ -0,0 +1,101 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package path
+
+import (
+       "slices"
+       "strings"
+
+       "github.com/blevesearch/bleve/v2/analysis"
+       "github.com/blevesearch/bleve/v2/registry"
+)
+
+const (
+       Name = "gitea/path"
+)
+
+type TokenFilter struct{}
+
+func NewTokenFilter() *TokenFilter {
+       return &TokenFilter{}
+}
+
+func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) {
+       return NewTokenFilter(), nil
+}
+
+func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+       if len(input) == 1 {
+               // if there is only one token, we dont need to generate the reversed chain
+               return generatePathTokens(input, false)
+       }
+
+       normal := generatePathTokens(input, false)
+       reversed := generatePathTokens(input, true)
+
+       return append(normal, reversed...)
+}
+
+// Generates path tokens from the input tokens.
+// This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component
+// in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md).
+//
+// If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful
+// to efficiently search for filenames without supplying the fullpath.
+func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream {
+       terms := make([]string, 0, len(input))
+       longestTerm := 0
+
+       if reversed {
+               slices.Reverse(input)
+       }
+
+       for i := 0; i < len(input); i++ {
+               var sb strings.Builder
+               sb.WriteString(string(input[0].Term))
+
+               for j := 1; j < i; j++ {
+                       sb.WriteString("/")
+                       sb.WriteString(string(input[j].Term))
+               }
+
+               term := sb.String()
+
+               if longestTerm < len(term) {
+                       longestTerm = len(term)
+               }
+
+               terms = append(terms, term)
+       }
+
+       output := make(analysis.TokenStream, 0, len(terms))
+
+       for _, term := range terms {
+               var start, end int
+
+               if reversed {
+                       start = 0
+                       end = len(term)
+               } else {
+                       start = longestTerm - len(term)
+                       end = longestTerm
+               }
+
+               token := analysis.Token{
+                       Position: 1,
+                       Start:    start,
+                       End:      end,
+                       Type:     analysis.AlphaNumeric,
+                       Term:     []byte(term),
+               }
+
+               output = append(output, &token)
+       }
+
+       return output
+}
+
+func init() {
+       registry.RegisterTokenFilter(Name, TokenFilterConstructor)
+}
diff --git a/modules/indexer/code/bleve/token/path/path_test.go b/modules/indexer/code/bleve/token/path/path_test.go

new file mode 100644 (file)

index 0000000..cc52021
--- /dev/null
+++ b/modules/indexer/code/bleve/token/path/path_test.go
@@ -0,0 +1,76 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package path
+
+import (
+       "fmt"
+       "testing"
+
+       "github.com/blevesearch/bleve/v2/analysis"
+       "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+       "github.com/stretchr/testify/assert"
+)
+
+type Scenario struct {
+       Input  string
+       Tokens []string
+}
+
+func TestTokenFilter(t *testing.T) {
+       scenarios := []struct {
+               Input string
+               Terms []string
+       }{
+               {
+                       Input: "Dockerfile",
+                       Terms: []string{"Dockerfile"},
+               },
+               {
+                       Input: "Dockerfile.rootless",
+                       Terms: []string{"Dockerfile.rootless"},
+               },
+               {
+                       Input: "a/b/c/Dockerfile.rootless",
+                       Terms: []string{"a", "a/b", "a/b/c", "a/b/c/Dockerfile.rootless", "Dockerfile.rootless", "Dockerfile.rootless/c", "Dockerfile.rootless/c/b", "Dockerfile.rootless/c/b/a"},
+               },
+               {
+                       Input: "",
+                       Terms: []string{},
+               },
+       }
+
+       for _, scenario := range scenarios {
+               t.Run(fmt.Sprintf("ensure terms of '%s'", scenario.Input), func(t *testing.T) {
+                       terms := extractTerms(scenario.Input)
+
+                       assert.Len(t, terms, len(scenario.Terms))
+
+                       for _, term := range terms {
+                               assert.Contains(t, scenario.Terms, term)
+                       }
+               })
+       }
+}
+
+func extractTerms(input string) []string {
+       tokens := tokenize(input)
+       filteredTokens := filter(tokens)
+       terms := make([]string, 0, len(filteredTokens))
+
+       for _, token := range filteredTokens {
+               terms = append(terms, string(token.Term))
+       }
+
+       return terms
+}
+
+func filter(input analysis.TokenStream) analysis.TokenStream {
+       filter := NewTokenFilter()
+       return filter.Filter(input)
+}
+
+func tokenize(input string) analysis.TokenStream {
+       tokenizer := unicode.NewUnicodeTokenizer()
+       return tokenizer.Tokenize([]byte(input))
+}
diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go

index 5c01034450be7987a69dce82160c3e59661a39d7..669a1bafcc90883cfbc8cbd30732e171f354b9d0 100644 (file)
--- a/modules/indexer/code/elasticsearch/elasticsearch.go
+++ b/modules/indexer/code/elasticsearch/elasticsearch.go
@@ -30,7 +30,7 @@ import (
  )
  
  const (
-       esRepoIndexerLatestVersion = 1
+       esRepoIndexerLatestVersion = 2
         // multi-match-types, currently only 2 types are used
         // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
         esMultiMatchTypeBestFields   = "best_fields"
@@ -57,12 +57,50 @@ func NewIndexer(url, indexerName string) *Indexer {
  
  const (
         defaultMapping = `{
+               "settings": {
+               "analysis": {
+                       "analyzer": {
+                               "filename_path_analyzer": {
+                                       "tokenizer": "path_tokenizer"
+                               },
+                               "reversed_filename_path_analyzer": {
+                                       "tokenizer": "reversed_path_tokenizer"
+                               }
+                       },
+                               "tokenizer": {
+                                       "path_tokenizer": {
+                                               "type": "path_hierarchy",
+                                               "delimiter": "/"
+                                       },
+                                       "reversed_path_tokenizer": {
+                                               "type": "path_hierarchy",
+                                               "delimiter": "/",
+                                               "reverse": true
+                                       }
+                               }
+                       }
+               },
                 "mappings": {
                         "properties": {
                                 "repo_id": {
                                         "type": "long",
                                         "index": true
                                 },
+                               "filename": {
+                                       "type": "text",
+                                       "term_vector": "with_positions_offsets",
+                                       "index": true,
+                                       "fields": {
+                                       "path": {
+                                       "type": "text",
+                                       "analyzer": "reversed_filename_path_analyzer"
+                                               },
+                                       "path_reversed": {
+                                       "type": "text",
+                                       "analyzer": "filename_path_analyzer"
+                                       }
+                               }
+                               },
                                 "content": {
                                         "type": "text",
                                         "term_vector": "with_positions_offsets",
@@ -136,6 +174,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
                         Id(id).
                         Doc(map[string]any{
                                 "repo_id":    repo.ID,
+                               "filename":   update.Filename,
                                 "content":    string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
                                 "commit_id":  sha,
                                 "language":   analyze.GetCodeLanguage(update.Filename, fileContents),
@@ -231,11 +270,11 @@ func (b *Indexer) doDelete(ctx context.Context, repoID int64) error {
         return err
  }
  
-// indexPos find words positions for start and the following end on content. It will
+// contentMatchIndexPos find words positions for start and the following end on content. It will
  // return the beginning position of the first start and the ending position of the
  // first end following the start string.
  // If not found any of the positions, it will return -1, -1.
-func indexPos(content, start, end string) (int, int) {
+func contentMatchIndexPos(content, start, end string) (int, int) {
         startIdx := strings.Index(content, start)
         if startIdx < 0 {
                 return -1, -1
@@ -244,22 +283,29 @@ func indexPos(content, start, end string) (int, int) {
         if endIdx < 0 {
                 return -1, -1
         }
-       return startIdx, startIdx + len(start) + endIdx + len(end)
+       return startIdx, (startIdx + len(start) + endIdx + len(end)) - 9 // remove the length <em></em> since we give Content the original data
  }
  
  func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
         hits := make([]*internal.SearchResult, 0, pageSize)
         for _, hit := range searchResult.Hits.Hits {
+               repoID, fileName := internal.ParseIndexerID(hit.Id)
+               res := make(map[string]any)
+               if err := json.Unmarshal(hit.Source, &res); err != nil {
+                       return 0, nil, nil, err
+               }
+
                 // FIXME: There is no way to get the position the keyword on the content currently on the same request.
                 // So we get it from content, this may made the query slower. See
                 // https://discuss.elastic.co/t/fetching-position-of-keyword-in-matched-document/94291
                 var startIndex, endIndex int
-               c, ok := hit.Highlight["content"]
-               if ok && len(c) > 0 {
+               if c, ok := hit.Highlight["filename"]; ok && len(c) > 0 {
+                       startIndex, endIndex = internal.FilenameMatchIndexPos(res["content"].(string))
+               } else if c, ok := hit.Highlight["content"]; ok && len(c) > 0 {
                         // FIXME: Since the highlighting content will include <em> and </em> for the keywords,
                         // now we should find the positions. But how to avoid html content which contains the
                         // <em> and </em> tags? If elastic search has handled that?
-                       startIndex, endIndex = indexPos(c[0], "<em>", "</em>")
+                       startIndex, endIndex = contentMatchIndexPos(c[0], "<em>", "</em>")
                         if startIndex == -1 {
                                 panic(fmt.Sprintf("1===%s,,,%#v,,,%s", kw, hit.Highlight, c[0]))
                         }
@@ -267,12 +313,6 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
                         panic(fmt.Sprintf("2===%#v", hit.Highlight))
                 }
  
-               repoID, fileName := internal.ParseIndexerID(hit.Id)
-               res := make(map[string]any)
-               if err := json.Unmarshal(hit.Source, &res); err != nil {
-                       return 0, nil, nil, err
-               }
-
                 language := res["language"].(string)
  
                 hits = append(hits, &internal.SearchResult{
@@ -283,7 +323,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
                         UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
                         Language:    language,
                         StartIndex:  startIndex,
-                       EndIndex:    endIndex - 9, // remove the length <em></em> since we give Content the original data
+                       EndIndex:    endIndex,
                         Color:       enry.GetColor(language),
                 })
         }
@@ -315,7 +355,10 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                 searchType = esMultiMatchTypeBestFields
         }
  
-       kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType)
+       kwQuery := elastic.NewBoolQuery().Should(
+               elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType),
+               elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix),
+       )
         query := elastic.NewBoolQuery()
         query = query.Must(kwQuery)
         if len(opts.RepoIDs) > 0 {
@@ -341,6 +384,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                         Highlight(
                                 elastic.NewHighlight().
                                         Field("content").
+                                       Field("filename").
                                         NumOfFragments(0). // return all highting content on fragments
                                         HighlighterType("fvh"),
                         ).
@@ -373,6 +417,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                 Highlight(
                         elastic.NewHighlight().
                                 Field("content").
+                               Field("filename").
                                 NumOfFragments(0). // return all highting content on fragments
                                 HighlighterType("fvh"),
                 ).
diff --git a/modules/indexer/code/elasticsearch/elasticsearch_test.go b/modules/indexer/code/elasticsearch/elasticsearch_test.go

index c6ba93e76d4695a1ebd2831afb2dbfacfc4217d1..a6d2af92b2b11b436449dc5103416fb6cf15ec72 100644 (file)
--- a/modules/indexer/code/elasticsearch/elasticsearch_test.go
+++ b/modules/indexer/code/elasticsearch/elasticsearch_test.go
@@ -10,7 +10,7 @@ import (
  )
  
  func TestIndexPos(t *testing.T) {
-       startIdx, endIdx := indexPos("test index start and end", "start", "end")
+       startIdx, endIdx := contentMatchIndexPos("test index start and end", "start", "end")
         assert.EqualValues(t, 11, startIdx)
-       assert.EqualValues(t, 24, endIdx)
+       assert.EqualValues(t, 15, endIdx)
  }
diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go

index 8975c5ce4083bfba1d00b8898c6166253ddfea32..5b33528dcde04e3a8b20aa8a684413f563342ff8 100644 (file)
--- a/modules/indexer/code/indexer_test.go
+++ b/modules/indexer/code/indexer_test.go
@@ -6,6 +6,7 @@ package code
  import (
         "context"
         "os"
+       "slices"
         "testing"
  
         "code.gitea.io/gitea/models/db"
@@ -20,53 +21,166 @@ import (
         _ "code.gitea.io/gitea/models/activities"
  
         "github.com/stretchr/testify/assert"
+
+       _ "github.com/mattn/go-sqlite3"
  )
  
+type codeSearchResult struct {
+       Filename string
+       Content  string
+}
+
  func TestMain(m *testing.M) {
         unittest.MainTest(m)
  }
  
  func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
         t.Run(name, func(t *testing.T) {
-               var repoID int64 = 1
-               err := index(git.DefaultContext, indexer, repoID)
-               assert.NoError(t, err)
+               assert.NoError(t, setupRepositoryIndexes(git.DefaultContext, indexer))
+
                 keywords := []struct {
                         RepoIDs []int64
                         Keyword string
-                       IDs     []int64
                         Langs   int
+                       Results []codeSearchResult
                 }{
+                       // Search for an exact match on the contents of a file
+                       // This scenario yields a single result (the file README.md on the repo '1')
                         {
                                 RepoIDs: nil,
                                 Keyword: "Description",
-                               IDs:     []int64{repoID},
                                 Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "README.md",
+                                               Content:  "# repo1\n\nDescription for repo1",
+                                       },
+                               },
                         },
+                       // Search for an exact match on the contents of a file within the repo '2'.
+                       // This scenario yields no results
                         {
                                 RepoIDs: []int64{2},
                                 Keyword: "Description",
-                               IDs:     []int64{},
                                 Langs:   0,
                         },
+                       // Search for an exact match on the contents of a file
+                       // This scenario yields a single result (the file README.md on the repo '1')
                         {
                                 RepoIDs: nil,
                                 Keyword: "repo1",
-                               IDs:     []int64{repoID},
                                 Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "README.md",
+                                               Content:  "# repo1\n\nDescription for repo1",
+                                       },
+                               },
                         },
+                       // Search for an exact match on the contents of a file within the repo '2'.
+                       // This scenario yields no results
                         {
                                 RepoIDs: []int64{2},
                                 Keyword: "repo1",
-                               IDs:     []int64{},
                                 Langs:   0,
                         },
+                       // Search for a non-existing term.
+                       // This scenario yields no results
                         {
                                 RepoIDs: nil,
                                 Keyword: "non-exist",
-                               IDs:     []int64{},
                                 Langs:   0,
                         },
+                       // Search for an exact match on the contents of a file within the repo '62'.
+                       // This scenario yields a single result (the file avocado.md on the repo '62')
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "pineaple",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "avocado.md",
+                                               Content:  "# repo1\n\npineaple pie of cucumber juice",
+                                       },
+                               },
+                       },
+                       // Search for an exact match on the filename within the repo '62'.
+                       // This scenario yields a single result (the file avocado.md on the repo '62')
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "avocado.md",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "avocado.md",
+                                               Content:  "# repo1\n\npineaple pie of cucumber juice",
+                                       },
+                               },
+                       },
+                       // Search for an partial match on the filename within the repo '62'.
+                       // This scenario yields a single result (the file avocado.md on the repo '62')
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "avo",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "avocado.md",
+                                               Content:  "# repo1\n\npineaple pie of cucumber juice",
+                                       },
+                               },
+                       },
+                       // Search for matches on both the contents and the filenames within the repo '62'.
+                       // This scenario yields two results: the first result is baed on the file (cucumber.md) while the second is based on the contents
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "cucumber",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "cucumber.md",
+                                               Content:  "Salad is good for your health",
+                                       },
+                                       {
+                                               Filename: "avocado.md",
+                                               Content:  "# repo1\n\npineaple pie of cucumber juice",
+                                       },
+                               },
+                       },
+                       // Search for matches on the filenames within the repo '62'.
+                       // This scenario yields two results (both are based on filename, the first one is an exact match)
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "ham",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "ham.md",
+                                               Content:  "This is also not cheese",
+                                       },
+                                       {
+                                               Filename: "potato/ham.md",
+                                               Content:  "This is not cheese",
+                                       },
+                               },
+                       },
+                       // Search for matches on the contents of files within the repo '62'.
+                       // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one)
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "This is not cheese",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "potato/ham.md",
+                                               Content:  "This is not cheese",
+                                       },
+                                       {
+                                               Filename: "ham.md",
+                                               Content:  "This is also not cheese",
+                                       },
+                               },
+                       },
                 }
  
                 for _, kw := range keywords {
@@ -81,19 +195,37 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
                                         IsKeywordFuzzy: true,
                                 })
                                 assert.NoError(t, err)
-                               assert.Len(t, kw.IDs, int(total))
                                 assert.Len(t, langs, kw.Langs)
  
-                               ids := make([]int64, 0, len(res))
+                               hits := make([]codeSearchResult, 0, len(res))
+
+                               if total > 0 {
+                                       assert.NotEmpty(t, kw.Results, "The given scenario does not provide any expected results")
+                               }
+
                                 for _, hit := range res {
-                                       ids = append(ids, hit.RepoID)
-                                       assert.EqualValues(t, "# repo1\n\nDescription for repo1", hit.Content)
+                                       hits = append(hits, codeSearchResult{
+                                               Filename: hit.Filename,
+                                               Content:  hit.Content,
+                                       })
+                               }
+
+                               lastIndex := -1
+
+                               for _, expected := range kw.Results {
+                                       index := slices.Index(hits, expected)
+                                       if index == -1 {
+                                               assert.Failf(t, "Result not found", "Expected %v in %v", expected, hits)
+                                       } else if lastIndex > index {
+                                               assert.Failf(t, "Result is out of order", "The order of %v within %v is wrong", expected, hits)
+                                       } else {
+                                               lastIndex = index
+                                       }
                                 }
-                               assert.EqualValues(t, kw.IDs, ids)
                         })
                 }
  
-               assert.NoError(t, indexer.Delete(context.Background(), repoID))
+               assert.NoError(t, tearDownRepositoryIndexes(indexer))
         })
  }
  
@@ -136,3 +268,25 @@ func TestESIndexAndSearch(t *testing.T) {
  
         testIndexer("elastic_search", t, indexer)
  }
+
+func setupRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error {
+       for _, repoID := range repositoriesToSearch() {
+               if err := index(ctx, indexer, repoID); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+func tearDownRepositoryIndexes(indexer internal.Indexer) error {
+       for _, repoID := range repositoriesToSearch() {
+               if err := indexer.Delete(context.Background(), repoID); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+func repositoriesToSearch() []int64 {
+       return []int64{1, 62}
+}
diff --git a/modules/indexer/code/internal/util.go b/modules/indexer/code/internal/util.go

index 689c4f4584b144cb31c7f533512da89a6e395e19..5b95783d9fcfe4beadd53b4dd81add631d7be749 100644 (file)
--- a/modules/indexer/code/internal/util.go
+++ b/modules/indexer/code/internal/util.go
@@ -10,6 +10,10 @@ import (
         "code.gitea.io/gitea/modules/log"
  )
  
+const (
+       filenameMatchNumberOfLines = 7 // Copied from github search
+)
+
  func FilenameIndexerID(repoID int64, filename string) string {
         return internal.Base36(repoID) + "_" + filename
  }
@@ -30,3 +34,17 @@ func FilenameOfIndexerID(indexerID string) string {
         }
         return indexerID[index+1:]
  }
+
+// Given the contents of file, returns the boundaries of its first seven lines.
+func FilenameMatchIndexPos(content string) (int, int) {
+       count := 1
+       for i, c := range content {
+               if c == '\n' {
+                       count++
+                       if count == filenameMatchNumberOfLines {
+                               return 0, i
+                       }
+               }
+       }
+       return 0, len(content)
+}
diff --git a/modules/indexer/internal/bleve/util.go b/modules/indexer/internal/bleve/util.go

index a2265f86e6b35dcbe77f836cc83028d110f46845..b426b39bc20dbd351f7455374039f78480603cfb 100644 (file)
--- a/modules/indexer/internal/bleve/util.go
+++ b/modules/indexer/internal/bleve/util.go
@@ -11,10 +11,15 @@ import (
         "code.gitea.io/gitea/modules/util"
  
         "github.com/blevesearch/bleve/v2"
+       "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
         "github.com/blevesearch/bleve/v2/index/upsidedown"
         "github.com/ethantkoenig/rupture"
  )
  
+const (
+       maxFuzziness = 2
+)
+
  // openIndexer open the index at the specified path, checking for metadata
  // updates and bleve version updates.  If index needs to be created (or
  // re-created), returns (nil, nil)
@@ -48,7 +53,27 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
         return index, 0, nil
  }
  
+// This method test the GuessFuzzinessByKeyword method. The fuzziness is based on the levenshtein distance and determines how many chars
+// may be different on two string and they still be considered equivalent.
+// Given a phrasse, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
  func GuessFuzzinessByKeyword(s string) int {
+       tokenizer := unicode.NewUnicodeTokenizer()
+       tokens := tokenizer.Tokenize([]byte(s))
+
+       if len(tokens) > 0 {
+               fuzziness := maxFuzziness
+
+               for _, token := range tokens {
+                       fuzziness = min(fuzziness, guessFuzzinessByKeyword(string(token.Term)))
+               }
+
+               return fuzziness
+       }
+
+       return 0
+}
+
+func guessFuzzinessByKeyword(s string) int {
         // according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
         // magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
         // BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
@@ -57,5 +82,5 @@ func GuessFuzzinessByKeyword(s string) int {
                         return 0
                 }
         }
-       return min(2, len(s)/4)
+       return min(maxFuzziness, len(s)/4)
  }
diff --git a/modules/indexer/internal/bleve/util_test.go b/modules/indexer/internal/bleve/util_test.go

new file mode 100644 (file)

index 0000000..ae0b12c
--- /dev/null
+++ b/modules/indexer/internal/bleve/util_test.go
@@ -0,0 +1,45 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package bleve
+
+import (
+       "fmt"
+       "testing"
+
+       "github.com/stretchr/testify/assert"
+)
+
+func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
+       scenarios := []struct {
+               Input     string
+               Fuzziness int // See util.go for the definition of fuzziness in this particular context
+       }{
+               {
+                       Input:     "",
+                       Fuzziness: 0,
+               },
+               {
+                       Input:     "Avocado",
+                       Fuzziness: 1,
+               },
+               {
+                       Input:     "Geschwindigkeit",
+                       Fuzziness: 2,
+               },
+               {
+                       Input:     "non-exist",
+                       Fuzziness: 0,
+               },
+               {
+                       Input:     "갃갃갃",
+                       Fuzziness: 0,
+               },
+       }
+
+       for _, scenario := range scenarios {
+               t.Run(fmt.Sprintf("ensure fuzziness of '%s' is '%d'", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
+                       assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input))
+               })
+       }
+}
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG b/tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG

new file mode 100644 (file)

index 0000000..8b13789
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG
@@ -0,0 +1 @@
+
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/HEAD b/tests/gitea-repositories-meta/org42/search-by-path.git/HEAD

new file mode 100644 (file)

index 0000000..cb089cd
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/config b/tests/gitea-repositories-meta/org42/search-by-path.git/config

new file mode 100644 (file)

index 0000000..07d359d
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/config
@@ -0,0 +1,4 @@
+[core]
+       repositoryformatversion = 0
+       filemode = true
+       bare = true
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/description b/tests/gitea-repositories-meta/org42/search-by-path.git/description

new file mode 100644 (file)

index 0000000..382e2d7
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/description
@@ -0,0 +1,8 @@
+This repository will be used to test code search. The snippet below shows its directory structure
+
+.
+├── avocado.md
+├── cucumber.md
+├── ham.md
+└── potato
+    └── ham.md
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive

new file mode 100755 (executable)

index 0000000..4b3d452
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+ORI_DIR=`pwd`
+SHELL_FOLDER=$(cd "$(dirname "$0")";pwd)
+cd "$ORI_DIR"
+for i in `ls "$SHELL_FOLDER/post-receive.d"`; do
+    sh "$SHELL_FOLDER/post-receive.d/$i"
+done
+\ No newline at end of file
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea

new file mode 100755 (executable)

index 0000000..43a948d
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" post-receive
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive

new file mode 100755 (executable)

index 0000000..4127013
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+ORI_DIR=`pwd`
+SHELL_FOLDER=$(cd "$(dirname "$0")";pwd)
+cd "$ORI_DIR"
+for i in `ls "$SHELL_FOLDER/pre-receive.d"`; do
+    sh "$SHELL_FOLDER/pre-receive.d/$i"
+done
+\ No newline at end of file
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea

new file mode 100755 (executable)

index 0000000..49d0940
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" pre-receive
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive

new file mode 100755 (executable)

index 0000000..af2808b
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+ORI_DIR=`pwd`
+SHELL_FOLDER=$(cd "$(dirname "$0")";pwd)
+cd "$ORI_DIR"
+for i in `ls "$SHELL_FOLDER/proc-receive.d"`; do
+    sh "$SHELL_FOLDER/proc-receive.d/$i"
+done
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea

new file mode 100755 (executable)

index 0000000..97521c6
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" proc-receive
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update

new file mode 100755 (executable)

index 0000000..c186fe4
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+ORI_DIR=`pwd`
+SHELL_FOLDER=$(cd "$(dirname "$0")";pwd)
+cd "$ORI_DIR"
+for i in `ls "$SHELL_FOLDER/update.d"`; do
+    sh "$SHELL_FOLDER/update.d/$i" $1 $2 $3
+done
+\ No newline at end of file
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea

new file mode 100755 (executable)

index 0000000..38101c2
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" update $1 $2 $3
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude b/tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude

new file mode 100644 (file)

index 0000000..a5196d1
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude
@@ -0,0 +1,6 @@
+# git ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/info/refs b/tests/gitea-repositories-meta/org42/search-by-path.git/info/refs

new file mode 100644 (file)

index 0000000..6b948c9
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/info/refs
@@ -0,0 +1,13 @@
+90c1019714259b24fb81711d4416ac0f18667dfa       refs/heads/DefaultBranch
+985f0301dba5e7b34be866819cd15ad3d8f508ee       refs/heads/branch2
+65f1bf27bc3bf70f64657658635e66094edbcb4d       refs/heads/develop
+65f1bf27bc3bf70f64657658635e66094edbcb4d       refs/heads/feature/1
+78fb907e3a3309eae4fe8fef030874cebbf1cd5e       refs/heads/home-md-img-check
+3731fe53b763859aaf83e703ee731f6b9447ff1e       refs/heads/master
+62fb502a7172d4453f0322a2cc85bddffa57f07a       refs/heads/pr-to-update
+4649299398e4d39a5c09eb4f534df6f1e1eb87cc       refs/heads/sub-home-md-img-check
+3fa2f829675543ecfc16b2891aebe8bf0608a8f4       refs/notes/commits
+4a357436d925b5c974181ff12a994538ddc5a269       refs/pull/2/head
+5f22f7d0d95d614d25a5b68592adb345a4b5c7fd       refs/pull/3/head
+62fb502a7172d4453f0322a2cc85bddffa57f07a       refs/pull/5/head
+65f1bf27bc3bf70f64657658635e66094edbcb4d       refs/tags/v1.1
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/logs/refs/heads/master b/tests/gitea-repositories-meta/org42/search-by-path.git/logs/refs/heads/master

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph

new file mode 100644 (file)

index 0000000..b38715b

Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs

new file mode 100644 (file)

index 0000000..b2af8c8
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs
@@ -0,0 +1,2 @@
+P pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack
+
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap

new file mode 100644 (file)

index 0000000..1fdef22

Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx

new file mode 100644 (file)

index 0000000..0d930e7

Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack

new file mode 100644 (file)

index 0000000..f1aac1e

Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev

new file mode 100644 (file)

index 0000000..869860b

Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs b/tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs

new file mode 100644 (file)

index 0000000..70e69af
--- /dev/null
+++ b/tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs
@@ -0,0 +1,14 @@
+# pack-refs with: peeled fully-peeled sorted 
+90c1019714259b24fb81711d4416ac0f18667dfa refs/heads/DefaultBranch
+985f0301dba5e7b34be866819cd15ad3d8f508ee refs/heads/branch2
+65f1bf27bc3bf70f64657658635e66094edbcb4d refs/heads/develop
+65f1bf27bc3bf70f64657658635e66094edbcb4d refs/heads/feature/1
+78fb907e3a3309eae4fe8fef030874cebbf1cd5e refs/heads/home-md-img-check
+3731fe53b763859aaf83e703ee731f6b9447ff1e refs/heads/master
+62fb502a7172d4453f0322a2cc85bddffa57f07a refs/heads/pr-to-update
+4649299398e4d39a5c09eb4f534df6f1e1eb87cc refs/heads/sub-home-md-img-check
+3fa2f829675543ecfc16b2891aebe8bf0608a8f4 refs/notes/commits
+4a357436d925b5c974181ff12a994538ddc5a269 refs/pull/2/head
+5f22f7d0d95d614d25a5b68592adb345a4b5c7fd refs/pull/3/head
+62fb502a7172d4453f0322a2cc85bddffa57f07a refs/pull/5/head
+65f1bf27bc3bf70f64657658635e66094edbcb4d refs/tags/v1.1
diff --git a/tests/integration/api_org_test.go b/tests/integration/api_org_test.go

index 70d3a446f768879b1fd947c865db473322431f02..fff121490c9ca7c68679f311d0f6c0d5bc1540b5 100644 (file)
--- a/tests/integration/api_org_test.go
+++ b/tests/integration/api_org_test.go
@@ -177,7 +177,7 @@ func TestAPIGetAll(t *testing.T) {
         var apiOrgList []*api.Organization
  
         DecodeJSON(t, resp, &apiOrgList)
-       assert.Len(t, apiOrgList, 12)
+       assert.Len(t, apiOrgList, 13)
         assert.Equal(t, "Limited Org 36", apiOrgList[1].FullName)
         assert.Equal(t, "limited", apiOrgList[1].Visibility)
  
@@ -186,7 +186,7 @@ func TestAPIGetAll(t *testing.T) {
         resp = MakeRequest(t, req, http.StatusOK)
  
         DecodeJSON(t, resp, &apiOrgList)
-       assert.Len(t, apiOrgList, 8)
+       assert.Len(t, apiOrgList, 9)
         assert.Equal(t, "org 17", apiOrgList[0].FullName)
         assert.Equal(t, "public", apiOrgList[0].Visibility)
  }
diff --git a/tests/integration/api_repo_test.go b/tests/integration/api_repo_test.go

index 716da762e542de227796b127dcd3481076ecc3f7..93c9ca0920d496e90aad892e91d49529af462fd2 100644 (file)
--- a/tests/integration/api_repo_test.go
+++ b/tests/integration/api_repo_test.go
@@ -94,9 +94,9 @@ func TestAPISearchRepo(t *testing.T) {
         }{
                 {
                         name: "RepositoriesMax50", requestURL: "/api/v1/repos/search?limit=50&private=false", expectedResults: expectedResults{
-                               nil:   {count: 35},
-                               user:  {count: 35},
-                               user2: {count: 35},
+                               nil:   {count: 36},
+                               user:  {count: 36},
+                               user2: {count: 36},
                         },
                 },
                 {
author	Bruno Sofiato <bruno.sofiato@gmail.com>
	Fri, 11 Oct 2024 23:35:04 +0000 (20:35 -0300)
committer	GitHub <noreply@github.com>
	Fri, 11 Oct 2024 23:35:04 +0000 (23:35 +0000)
models/fixtures/repo_unit.yml		patch \| blob \| history
models/fixtures/repository.yml		patch \| blob \| history
models/fixtures/user.yml		patch \| blob \| history
models/repo/repo_list_test.go		patch \| blob \| history
models/user/user_test.go		patch \| blob \| history
modules/indexer/code/bleve/bleve.go		patch \| blob \| history
modules/indexer/code/bleve/token/path/path.go	[new file with mode: 0644]	patch \| blob
modules/indexer/code/bleve/token/path/path_test.go	[new file with mode: 0644]	patch \| blob
modules/indexer/code/elasticsearch/elasticsearch.go		patch \| blob \| history
modules/indexer/code/elasticsearch/elasticsearch_test.go		patch \| blob \| history
modules/indexer/code/indexer_test.go		patch \| blob \| history
modules/indexer/code/internal/util.go		patch \| blob \| history
modules/indexer/internal/bleve/util.go		patch \| blob \| history
modules/indexer/internal/bleve/util_test.go	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/HEAD	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/config	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/description	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive	[new file with mode: 0755]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea	[new file with mode: 0755]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive	[new file with mode: 0755]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea	[new file with mode: 0755]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive	[new file with mode: 0755]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea	[new file with mode: 0755]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update	[new file with mode: 0755]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea	[new file with mode: 0755]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/info/refs	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/logs/refs/heads/master	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev	[new file with mode: 0644]	patch \| blob
tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs	[new file with mode: 0644]	patch \| blob
tests/integration/api_org_test.go		patch \| blob \| history
tests/integration/api_repo_test.go		patch \| blob \| history