]> source.dussan.org Git - gitea.git/commitdiff
Allow code search by filename (#32210)
authorBruno Sofiato <bruno.sofiato@gmail.com>
Fri, 11 Oct 2024 23:35:04 +0000 (20:35 -0300)
committerGitHub <noreply@github.com>
Fri, 11 Oct 2024 23:35:04 +0000 (23:35 +0000)
This is a large and complex PR, so let me explain in detail its changes.

First, I had to create new index mappings for Bleve and ElasticSerach as
the current ones do not support search by filename. This requires Gitea
to recreate the code search indexes (I do not know if this is a breaking
change, but I feel it deserves a heads-up).

I've used [this
approach](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/analysis-pathhierarchy-tokenizer.html)
to model the filename index. It allows us to efficiently search for both
the full path and the name of a file. Bleve, however, does not support
this out-of-box, so I had to code a brand new [token
filter](https://blevesearch.com/docs/Token-Filters/) to generate the
search terms.

I also did an overhaul in the `indexer_test.go` file. It now asserts the
order of the expected results (this is important since matches based on
the name of a file are more relevant than those based on its content).
I've added new test scenarios that deal with searching by filename. They
use a new repo included in the Gitea fixture.

The screenshot below depicts how Gitea shows the search results. It
shows results based on content in the same way as the current version
does. In matches based on the filename, the first seven lines of the
file contents are shown (BTW, this is how GitHub does it).

![image](https://github.com/user-attachments/assets/9d938d86-1a8d-4f89-8644-1921a473e858)

Resolves #32096

---------

Signed-off-by: Bruno Sofiato <bruno.sofiato@gmail.com>
38 files changed:
models/fixtures/repo_unit.yml
models/fixtures/repository.yml
models/fixtures/user.yml
models/repo/repo_list_test.go
models/user/user_test.go
modules/indexer/code/bleve/bleve.go
modules/indexer/code/bleve/token/path/path.go [new file with mode: 0644]
modules/indexer/code/bleve/token/path/path_test.go [new file with mode: 0644]
modules/indexer/code/elasticsearch/elasticsearch.go
modules/indexer/code/elasticsearch/elasticsearch_test.go
modules/indexer/code/indexer_test.go
modules/indexer/code/internal/util.go
modules/indexer/internal/bleve/util.go
modules/indexer/internal/bleve/util_test.go [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/HEAD [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/config [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/description [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive [new file with mode: 0755]
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea [new file with mode: 0755]
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive [new file with mode: 0755]
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea [new file with mode: 0755]
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive [new file with mode: 0755]
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea [new file with mode: 0755]
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update [new file with mode: 0755]
tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea [new file with mode: 0755]
tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/info/refs [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/logs/refs/heads/master [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev [new file with mode: 0644]
tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs [new file with mode: 0644]
tests/integration/api_org_test.go
tests/integration/api_repo_test.go

index 8a22db0445c64bd1577bed464771d44da5fafa8b..f6b6252da1f88438642fad5d94e562376eecbff7 100644 (file)
   type: 3
   config: "{\"IgnoreWhitespaceConflicts\":false,\"AllowMerge\":true,\"AllowRebase\":true,\"AllowRebaseMerge\":true,\"AllowSquash\":true}"
   created_unix: 946684810
+
+-
+  id: 108
+  repo_id: 62
+  type: 1
+  config: "{}"
+  created_unix: 946684810
+
+-
+  id: 109
+  repo_id: 62
+  type: 2
+  config: "{\"EnableTimetracker\":true,\"AllowOnlyContributorsToTrackTime\":true}"
+  created_unix: 946684810
+
+-
+  id: 110
+  repo_id: 62
+  type: 3
+  config: "{\"IgnoreWhitespaceConflicts\":false,\"AllowMerge\":true,\"AllowRebase\":true,\"AllowRebaseMerge\":true,\"AllowSquash\":true}"
+  created_unix: 946684810
index e141593f41576af44e9c15909fc7440d48fc6bbf..b7970cb7c82f61d23a32fa5a9af7376cdc94b747 100644 (file)
   size: 0
   is_fsck_enabled: true
   close_issues_via_commit_in_any_branch: false
+
+-
+  id: 62
+  owner_id: 42
+  owner_name: org42
+  lower_name: search-by-path
+  name: search-by-path
+  default_branch: master
+  num_watches: 0
+  num_stars: 0
+  num_forks: 0
+  num_issues: 0
+  num_closed_issues: 0
+  num_pulls: 0
+  num_closed_pulls: 0
+  num_milestones: 0
+  num_closed_milestones: 0
+  num_projects: 0
+  num_closed_projects: 0
+  is_private: false
+  is_empty: false
+  is_archived: false
+  is_mirror: false
+  status: 0
+  is_fork: false
+  fork_id: 0
+  is_template: false
+  template_id: 0
+  size: 0
+  is_fsck_enabled: true
+  close_issues_via_commit_in_any_branch: false
index 8504d88ce59951c3d834a1d5cb8199be7668ab25..c0296deec55bd92fd459fe47d3f96875a7ec6f78 100644 (file)
   repo_admin_change_team_access: false
   theme: ""
   keep_activity_private: false
+
+-
+  id: 42
+  lower_name: org42
+  name: org42
+  full_name: Org42
+  email: org42@example.com
+  keep_email_private: false
+  email_notifications_preference: onmention
+  passwd: ZogKvWdyEx:password
+  passwd_hash_algo: dummy
+  must_change_password: false
+  login_source: 0
+  login_name: org42
+  type: 1
+  salt: ZogKvWdyEx
+  max_repo_creation: -1
+  is_active: false
+  is_admin: false
+  is_restricted: false
+  allow_git_hook: false
+  allow_import_local: false
+  allow_create_organization: true
+  prohibit_login: false
+  avatar: avatar42
+  avatar_email: org42@example.com
+  use_custom_avatar: false
+  num_followers: 0
+  num_following: 0
+  num_stars: 0
+  num_repos: 1
+  num_teams: 0
+  num_members: 0
+  visibility: 0
+  repo_admin_change_team_access: false
+  theme: ""
+  keep_activity_private: false
index 88cfcde6208324a7ae5df953d5e1cd632bb96a2c..ca6007f6c7882c0bc1304997f2f24e676688d1f6 100644 (file)
@@ -138,12 +138,12 @@ func getTestCases() []struct {
                {
                        name:  "AllPublic/PublicRepositoriesOfUserIncludingCollaborative",
                        opts:  &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, AllPublic: true, Template: optional.Some(false)},
-                       count: 33,
+                       count: 34,
                },
                {
                        name:  "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborative",
                        opts:  &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, Private: true, AllPublic: true, AllLimited: true, Template: optional.Some(false)},
-                       count: 38,
+                       count: 39,
                },
                {
                        name:  "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborativeByName",
@@ -158,7 +158,7 @@ func getTestCases() []struct {
                {
                        name:  "AllPublic/PublicRepositoriesOfOrganization",
                        opts:  &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 17, AllPublic: true, Collaborate: optional.Some(false), Template: optional.Some(false)},
-                       count: 33,
+                       count: 34,
                },
                {
                        name:  "AllTemplates",
index 67efb3859fdf35c4f9d8a1333ea7f5e49269dc77..bc1abc64512c7e9929a6a40b1ff5cb75c2c6d2f5 100644 (file)
@@ -92,7 +92,10 @@ func TestSearchUsers(t *testing.T) {
        testOrgSuccess(&user_model.SearchUserOptions{OrderBy: "id ASC", ListOptions: db.ListOptions{Page: 4, PageSize: 2}},
                []int64{26, 41})
 
-       testOrgSuccess(&user_model.SearchUserOptions{ListOptions: db.ListOptions{Page: 5, PageSize: 2}},
+       testOrgSuccess(&user_model.SearchUserOptions{OrderBy: "id ASC", ListOptions: db.ListOptions{Page: 5, PageSize: 2}},
+               []int64{42})
+
+       testOrgSuccess(&user_model.SearchUserOptions{ListOptions: db.ListOptions{Page: 6, PageSize: 2}},
                []int64{})
 
        // test users
index c17f56d3cff5ab4708362f5bacedb60bd118b7a0..90e5e62bcb4aafb3a47e90559e06fdb2b5bc6b8d 100644 (file)
@@ -17,6 +17,7 @@ import (
        "code.gitea.io/gitea/modules/charset"
        "code.gitea.io/gitea/modules/git"
        "code.gitea.io/gitea/modules/gitrepo"
+       path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path"
        "code.gitea.io/gitea/modules/indexer/code/internal"
        indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
        inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
@@ -53,6 +54,7 @@ type RepoIndexerData struct {
        RepoID    int64
        CommitID  string
        Content   string
+       Filename  string
        Language  string
        UpdatedAt time.Time
 }
@@ -64,8 +66,10 @@ func (d *RepoIndexerData) Type() string {
 
 const (
        repoIndexerAnalyzer      = "repoIndexerAnalyzer"
+       filenameIndexerAnalyzer  = "filenameIndexerAnalyzer"
+       filenameIndexerTokenizer = "filenameIndexerTokenizer"
        repoIndexerDocType       = "repoIndexerDocType"
-       repoIndexerLatestVersion = 6
+       repoIndexerLatestVersion = 7
 )
 
 // generateBleveIndexMapping generates a bleve index mapping for the repo indexer
@@ -79,6 +83,11 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
        textFieldMapping.IncludeInAll = false
        docMapping.AddFieldMappingsAt("Content", textFieldMapping)
 
+       fileNamedMapping := bleve.NewTextFieldMapping()
+       fileNamedMapping.IncludeInAll = false
+       fileNamedMapping.Analyzer = filenameIndexerAnalyzer
+       docMapping.AddFieldMappingsAt("Filename", fileNamedMapping)
+
        termFieldMapping := bleve.NewTextFieldMapping()
        termFieldMapping.IncludeInAll = false
        termFieldMapping.Analyzer = analyzer_keyword.Name
@@ -90,6 +99,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
        docMapping.AddFieldMappingsAt("UpdatedAt", timeFieldMapping)
 
        mapping := bleve.NewIndexMapping()
+
        if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
                return nil, err
        } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]any{
@@ -100,6 +110,16 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
        }); err != nil {
                return nil, err
        }
+
+       if err := mapping.AddCustomAnalyzer(filenameIndexerAnalyzer, map[string]any{
+               "type":          analyzer_custom.Name,
+               "char_filters":  []string{},
+               "tokenizer":     unicode.Name,
+               "token_filters": []string{unicodeNormalizeName, path_filter.Name, lowercase.Name},
+       }); err != nil {
+               return nil, err
+       }
+
        mapping.DefaultAnalyzer = repoIndexerAnalyzer
        mapping.AddDocumentMapping(repoIndexerDocType, docMapping)
        mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
@@ -174,6 +194,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
        return batch.Index(id, &RepoIndexerData{
                RepoID:    repo.ID,
                CommitID:  commitSha,
+               Filename:  update.Filename,
                Content:   string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
                Language:  analyze.GetCodeLanguage(update.Filename, fileContents),
                UpdatedAt: time.Now().UTC(),
@@ -240,14 +261,19 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                keywordQuery query.Query
        )
 
-       phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
-       phraseQuery.FieldVal = "Content"
-       phraseQuery.Analyzer = repoIndexerAnalyzer
-       keywordQuery = phraseQuery
+       pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword))
+       pathQuery.FieldVal = "Filename"
+       pathQuery.SetBoost(10)
+
+       contentQuery := bleve.NewMatchQuery(opts.Keyword)
+       contentQuery.FieldVal = "Content"
+
        if opts.IsKeywordFuzzy {
-               phraseQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
+               contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
        }
 
+       keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery)
+
        if len(opts.RepoIDs) > 0 {
                repoQueries := make([]query.Query, 0, len(opts.RepoIDs))
                for _, repoID := range opts.RepoIDs {
@@ -277,7 +303,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
 
        from, pageSize := opts.GetSkipTake()
        searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
-       searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
+       searchRequest.Fields = []string{"Content", "Filename", "RepoID", "Language", "CommitID", "UpdatedAt"}
        searchRequest.IncludeLocations = true
 
        if len(opts.Language) == 0 {
@@ -307,6 +333,10 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                                endIndex = locationEnd
                        }
                }
+               if len(hit.Locations["Filename"]) > 0 {
+                       startIndex, endIndex = internal.FilenameMatchIndexPos(hit.Fields["Content"].(string))
+               }
+
                language := hit.Fields["Language"].(string)
                var updatedUnix timeutil.TimeStamp
                if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil {
diff --git a/modules/indexer/code/bleve/token/path/path.go b/modules/indexer/code/bleve/token/path/path.go
new file mode 100644 (file)
index 0000000..107e0da
--- /dev/null
@@ -0,0 +1,101 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package path
+
+import (
+       "slices"
+       "strings"
+
+       "github.com/blevesearch/bleve/v2/analysis"
+       "github.com/blevesearch/bleve/v2/registry"
+)
+
+const (
+       Name = "gitea/path"
+)
+
+type TokenFilter struct{}
+
+func NewTokenFilter() *TokenFilter {
+       return &TokenFilter{}
+}
+
+func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) {
+       return NewTokenFilter(), nil
+}
+
+func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+       if len(input) == 1 {
+               // if there is only one token, we dont need to generate the reversed chain
+               return generatePathTokens(input, false)
+       }
+
+       normal := generatePathTokens(input, false)
+       reversed := generatePathTokens(input, true)
+
+       return append(normal, reversed...)
+}
+
+// Generates path tokens from the input tokens.
+// This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component
+// in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md).
+//
+// If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful
+// to efficiently search for filenames without supplying the fullpath.
+func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream {
+       terms := make([]string, 0, len(input))
+       longestTerm := 0
+
+       if reversed {
+               slices.Reverse(input)
+       }
+
+       for i := 0; i < len(input); i++ {
+               var sb strings.Builder
+               sb.WriteString(string(input[0].Term))
+
+               for j := 1; j < i; j++ {
+                       sb.WriteString("/")
+                       sb.WriteString(string(input[j].Term))
+               }
+
+               term := sb.String()
+
+               if longestTerm < len(term) {
+                       longestTerm = len(term)
+               }
+
+               terms = append(terms, term)
+       }
+
+       output := make(analysis.TokenStream, 0, len(terms))
+
+       for _, term := range terms {
+               var start, end int
+
+               if reversed {
+                       start = 0
+                       end = len(term)
+               } else {
+                       start = longestTerm - len(term)
+                       end = longestTerm
+               }
+
+               token := analysis.Token{
+                       Position: 1,
+                       Start:    start,
+                       End:      end,
+                       Type:     analysis.AlphaNumeric,
+                       Term:     []byte(term),
+               }
+
+               output = append(output, &token)
+       }
+
+       return output
+}
+
+func init() {
+       registry.RegisterTokenFilter(Name, TokenFilterConstructor)
+}
diff --git a/modules/indexer/code/bleve/token/path/path_test.go b/modules/indexer/code/bleve/token/path/path_test.go
new file mode 100644 (file)
index 0000000..cc52021
--- /dev/null
@@ -0,0 +1,76 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package path
+
+import (
+       "fmt"
+       "testing"
+
+       "github.com/blevesearch/bleve/v2/analysis"
+       "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+       "github.com/stretchr/testify/assert"
+)
+
+type Scenario struct {
+       Input  string
+       Tokens []string
+}
+
+func TestTokenFilter(t *testing.T) {
+       scenarios := []struct {
+               Input string
+               Terms []string
+       }{
+               {
+                       Input: "Dockerfile",
+                       Terms: []string{"Dockerfile"},
+               },
+               {
+                       Input: "Dockerfile.rootless",
+                       Terms: []string{"Dockerfile.rootless"},
+               },
+               {
+                       Input: "a/b/c/Dockerfile.rootless",
+                       Terms: []string{"a", "a/b", "a/b/c", "a/b/c/Dockerfile.rootless", "Dockerfile.rootless", "Dockerfile.rootless/c", "Dockerfile.rootless/c/b", "Dockerfile.rootless/c/b/a"},
+               },
+               {
+                       Input: "",
+                       Terms: []string{},
+               },
+       }
+
+       for _, scenario := range scenarios {
+               t.Run(fmt.Sprintf("ensure terms of '%s'", scenario.Input), func(t *testing.T) {
+                       terms := extractTerms(scenario.Input)
+
+                       assert.Len(t, terms, len(scenario.Terms))
+
+                       for _, term := range terms {
+                               assert.Contains(t, scenario.Terms, term)
+                       }
+               })
+       }
+}
+
+func extractTerms(input string) []string {
+       tokens := tokenize(input)
+       filteredTokens := filter(tokens)
+       terms := make([]string, 0, len(filteredTokens))
+
+       for _, token := range filteredTokens {
+               terms = append(terms, string(token.Term))
+       }
+
+       return terms
+}
+
+func filter(input analysis.TokenStream) analysis.TokenStream {
+       filter := NewTokenFilter()
+       return filter.Filter(input)
+}
+
+func tokenize(input string) analysis.TokenStream {
+       tokenizer := unicode.NewUnicodeTokenizer()
+       return tokenizer.Tokenize([]byte(input))
+}
index 5c01034450be7987a69dce82160c3e59661a39d7..669a1bafcc90883cfbc8cbd30732e171f354b9d0 100644 (file)
@@ -30,7 +30,7 @@ import (
 )
 
 const (
-       esRepoIndexerLatestVersion = 1
+       esRepoIndexerLatestVersion = 2
        // multi-match-types, currently only 2 types are used
        // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
        esMultiMatchTypeBestFields   = "best_fields"
@@ -57,12 +57,50 @@ func NewIndexer(url, indexerName string) *Indexer {
 
 const (
        defaultMapping = `{
+               "settings": {
+               "analysis": {
+                       "analyzer": {
+                               "filename_path_analyzer": {
+                                       "tokenizer": "path_tokenizer"
+                               },
+                               "reversed_filename_path_analyzer": {
+                                       "tokenizer": "reversed_path_tokenizer"
+                               }
+                       },
+                               "tokenizer": {
+                                       "path_tokenizer": {
+                                               "type": "path_hierarchy",
+                                               "delimiter": "/"
+                                       },
+                                       "reversed_path_tokenizer": {
+                                               "type": "path_hierarchy",
+                                               "delimiter": "/",
+                                               "reverse": true
+                                       }
+                               }
+                       }
+               },
                "mappings": {
                        "properties": {
                                "repo_id": {
                                        "type": "long",
                                        "index": true
                                },
+                               "filename": {
+                                       "type": "text",
+                                       "term_vector": "with_positions_offsets",
+                                       "index": true,
+                                       "fields": {
+                                       "path": {
+                                       "type": "text",
+                                       "analyzer": "reversed_filename_path_analyzer"
+                                               },
+                                       "path_reversed": {
+                                       "type": "text",
+                                       "analyzer": "filename_path_analyzer"
+                                       }
+                               }
+                               },
                                "content": {
                                        "type": "text",
                                        "term_vector": "with_positions_offsets",
@@ -136,6 +174,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
                        Id(id).
                        Doc(map[string]any{
                                "repo_id":    repo.ID,
+                               "filename":   update.Filename,
                                "content":    string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
                                "commit_id":  sha,
                                "language":   analyze.GetCodeLanguage(update.Filename, fileContents),
@@ -231,11 +270,11 @@ func (b *Indexer) doDelete(ctx context.Context, repoID int64) error {
        return err
 }
 
-// indexPos find words positions for start and the following end on content. It will
+// contentMatchIndexPos find words positions for start and the following end on content. It will
 // return the beginning position of the first start and the ending position of the
 // first end following the start string.
 // If not found any of the positions, it will return -1, -1.
-func indexPos(content, start, end string) (int, int) {
+func contentMatchIndexPos(content, start, end string) (int, int) {
        startIdx := strings.Index(content, start)
        if startIdx < 0 {
                return -1, -1
@@ -244,22 +283,29 @@ func indexPos(content, start, end string) (int, int) {
        if endIdx < 0 {
                return -1, -1
        }
-       return startIdx, startIdx + len(start) + endIdx + len(end)
+       return startIdx, (startIdx + len(start) + endIdx + len(end)) - 9 // remove the length <em></em> since we give Content the original data
 }
 
 func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
        hits := make([]*internal.SearchResult, 0, pageSize)
        for _, hit := range searchResult.Hits.Hits {
+               repoID, fileName := internal.ParseIndexerID(hit.Id)
+               res := make(map[string]any)
+               if err := json.Unmarshal(hit.Source, &res); err != nil {
+                       return 0, nil, nil, err
+               }
+
                // FIXME: There is no way to get the position the keyword on the content currently on the same request.
                // So we get it from content, this may made the query slower. See
                // https://discuss.elastic.co/t/fetching-position-of-keyword-in-matched-document/94291
                var startIndex, endIndex int
-               c, ok := hit.Highlight["content"]
-               if ok && len(c) > 0 {
+               if c, ok := hit.Highlight["filename"]; ok && len(c) > 0 {
+                       startIndex, endIndex = internal.FilenameMatchIndexPos(res["content"].(string))
+               } else if c, ok := hit.Highlight["content"]; ok && len(c) > 0 {
                        // FIXME: Since the highlighting content will include <em> and </em> for the keywords,
                        // now we should find the positions. But how to avoid html content which contains the
                        // <em> and </em> tags? If elastic search has handled that?
-                       startIndex, endIndex = indexPos(c[0], "<em>", "</em>")
+                       startIndex, endIndex = contentMatchIndexPos(c[0], "<em>", "</em>")
                        if startIndex == -1 {
                                panic(fmt.Sprintf("1===%s,,,%#v,,,%s", kw, hit.Highlight, c[0]))
                        }
@@ -267,12 +313,6 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
                        panic(fmt.Sprintf("2===%#v", hit.Highlight))
                }
 
-               repoID, fileName := internal.ParseIndexerID(hit.Id)
-               res := make(map[string]any)
-               if err := json.Unmarshal(hit.Source, &res); err != nil {
-                       return 0, nil, nil, err
-               }
-
                language := res["language"].(string)
 
                hits = append(hits, &internal.SearchResult{
@@ -283,7 +323,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
                        UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
                        Language:    language,
                        StartIndex:  startIndex,
-                       EndIndex:    endIndex - 9, // remove the length <em></em> since we give Content the original data
+                       EndIndex:    endIndex,
                        Color:       enry.GetColor(language),
                })
        }
@@ -315,7 +355,10 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                searchType = esMultiMatchTypeBestFields
        }
 
-       kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType)
+       kwQuery := elastic.NewBoolQuery().Should(
+               elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType),
+               elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix),
+       )
        query := elastic.NewBoolQuery()
        query = query.Must(kwQuery)
        if len(opts.RepoIDs) > 0 {
@@ -341,6 +384,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                        Highlight(
                                elastic.NewHighlight().
                                        Field("content").
+                                       Field("filename").
                                        NumOfFragments(0). // return all highting content on fragments
                                        HighlighterType("fvh"),
                        ).
@@ -373,6 +417,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
                Highlight(
                        elastic.NewHighlight().
                                Field("content").
+                               Field("filename").
                                NumOfFragments(0). // return all highting content on fragments
                                HighlighterType("fvh"),
                ).
index c6ba93e76d4695a1ebd2831afb2dbfacfc4217d1..a6d2af92b2b11b436449dc5103416fb6cf15ec72 100644 (file)
@@ -10,7 +10,7 @@ import (
 )
 
 func TestIndexPos(t *testing.T) {
-       startIdx, endIdx := indexPos("test index start and end", "start", "end")
+       startIdx, endIdx := contentMatchIndexPos("test index start and end", "start", "end")
        assert.EqualValues(t, 11, startIdx)
-       assert.EqualValues(t, 24, endIdx)
+       assert.EqualValues(t, 15, endIdx)
 }
index 8975c5ce4083bfba1d00b8898c6166253ddfea32..5b33528dcde04e3a8b20aa8a684413f563342ff8 100644 (file)
@@ -6,6 +6,7 @@ package code
 import (
        "context"
        "os"
+       "slices"
        "testing"
 
        "code.gitea.io/gitea/models/db"
@@ -20,53 +21,166 @@ import (
        _ "code.gitea.io/gitea/models/activities"
 
        "github.com/stretchr/testify/assert"
+
+       _ "github.com/mattn/go-sqlite3"
 )
 
+type codeSearchResult struct {
+       Filename string
+       Content  string
+}
+
 func TestMain(m *testing.M) {
        unittest.MainTest(m)
 }
 
 func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
        t.Run(name, func(t *testing.T) {
-               var repoID int64 = 1
-               err := index(git.DefaultContext, indexer, repoID)
-               assert.NoError(t, err)
+               assert.NoError(t, setupRepositoryIndexes(git.DefaultContext, indexer))
+
                keywords := []struct {
                        RepoIDs []int64
                        Keyword string
-                       IDs     []int64
                        Langs   int
+                       Results []codeSearchResult
                }{
+                       // Search for an exact match on the contents of a file
+                       // This scenario yields a single result (the file README.md on the repo '1')
                        {
                                RepoIDs: nil,
                                Keyword: "Description",
-                               IDs:     []int64{repoID},
                                Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "README.md",
+                                               Content:  "# repo1\n\nDescription for repo1",
+                                       },
+                               },
                        },
+                       // Search for an exact match on the contents of a file within the repo '2'.
+                       // This scenario yields no results
                        {
                                RepoIDs: []int64{2},
                                Keyword: "Description",
-                               IDs:     []int64{},
                                Langs:   0,
                        },
+                       // Search for an exact match on the contents of a file
+                       // This scenario yields a single result (the file README.md on the repo '1')
                        {
                                RepoIDs: nil,
                                Keyword: "repo1",
-                               IDs:     []int64{repoID},
                                Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "README.md",
+                                               Content:  "# repo1\n\nDescription for repo1",
+                                       },
+                               },
                        },
+                       // Search for an exact match on the contents of a file within the repo '2'.
+                       // This scenario yields no results
                        {
                                RepoIDs: []int64{2},
                                Keyword: "repo1",
-                               IDs:     []int64{},
                                Langs:   0,
                        },
+                       // Search for a non-existing term.
+                       // This scenario yields no results
                        {
                                RepoIDs: nil,
                                Keyword: "non-exist",
-                               IDs:     []int64{},
                                Langs:   0,
                        },
+                       // Search for an exact match on the contents of a file within the repo '62'.
+                       // This scenario yields a single result (the file avocado.md on the repo '62')
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "pineaple",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "avocado.md",
+                                               Content:  "# repo1\n\npineaple pie of cucumber juice",
+                                       },
+                               },
+                       },
+                       // Search for an exact match on the filename within the repo '62'.
+                       // This scenario yields a single result (the file avocado.md on the repo '62')
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "avocado.md",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "avocado.md",
+                                               Content:  "# repo1\n\npineaple pie of cucumber juice",
+                                       },
+                               },
+                       },
+                       // Search for an partial match on the filename within the repo '62'.
+                       // This scenario yields a single result (the file avocado.md on the repo '62')
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "avo",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "avocado.md",
+                                               Content:  "# repo1\n\npineaple pie of cucumber juice",
+                                       },
+                               },
+                       },
+                       // Search for matches on both the contents and the filenames within the repo '62'.
+                       // This scenario yields two results: the first result is baed on the file (cucumber.md) while the second is based on the contents
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "cucumber",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "cucumber.md",
+                                               Content:  "Salad is good for your health",
+                                       },
+                                       {
+                                               Filename: "avocado.md",
+                                               Content:  "# repo1\n\npineaple pie of cucumber juice",
+                                       },
+                               },
+                       },
+                       // Search for matches on the filenames within the repo '62'.
+                       // This scenario yields two results (both are based on filename, the first one is an exact match)
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "ham",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "ham.md",
+                                               Content:  "This is also not cheese",
+                                       },
+                                       {
+                                               Filename: "potato/ham.md",
+                                               Content:  "This is not cheese",
+                                       },
+                               },
+                       },
+                       // Search for matches on the contents of files within the repo '62'.
+                       // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one)
+                       {
+                               RepoIDs: []int64{62},
+                               Keyword: "This is not cheese",
+                               Langs:   1,
+                               Results: []codeSearchResult{
+                                       {
+                                               Filename: "potato/ham.md",
+                                               Content:  "This is not cheese",
+                                       },
+                                       {
+                                               Filename: "ham.md",
+                                               Content:  "This is also not cheese",
+                                       },
+                               },
+                       },
                }
 
                for _, kw := range keywords {
@@ -81,19 +195,37 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
                                        IsKeywordFuzzy: true,
                                })
                                assert.NoError(t, err)
-                               assert.Len(t, kw.IDs, int(total))
                                assert.Len(t, langs, kw.Langs)
 
-                               ids := make([]int64, 0, len(res))
+                               hits := make([]codeSearchResult, 0, len(res))
+
+                               if total > 0 {
+                                       assert.NotEmpty(t, kw.Results, "The given scenario does not provide any expected results")
+                               }
+
                                for _, hit := range res {
-                                       ids = append(ids, hit.RepoID)
-                                       assert.EqualValues(t, "# repo1\n\nDescription for repo1", hit.Content)
+                                       hits = append(hits, codeSearchResult{
+                                               Filename: hit.Filename,
+                                               Content:  hit.Content,
+                                       })
+                               }
+
+                               lastIndex := -1
+
+                               for _, expected := range kw.Results {
+                                       index := slices.Index(hits, expected)
+                                       if index == -1 {
+                                               assert.Failf(t, "Result not found", "Expected %v in %v", expected, hits)
+                                       } else if lastIndex > index {
+                                               assert.Failf(t, "Result is out of order", "The order of %v within %v is wrong", expected, hits)
+                                       } else {
+                                               lastIndex = index
+                                       }
                                }
-                               assert.EqualValues(t, kw.IDs, ids)
                        })
                }
 
-               assert.NoError(t, indexer.Delete(context.Background(), repoID))
+               assert.NoError(t, tearDownRepositoryIndexes(indexer))
        })
 }
 
@@ -136,3 +268,25 @@ func TestESIndexAndSearch(t *testing.T) {
 
        testIndexer("elastic_search", t, indexer)
 }
+
+func setupRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error {
+       for _, repoID := range repositoriesToSearch() {
+               if err := index(ctx, indexer, repoID); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+func tearDownRepositoryIndexes(indexer internal.Indexer) error {
+       for _, repoID := range repositoriesToSearch() {
+               if err := indexer.Delete(context.Background(), repoID); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+func repositoriesToSearch() []int64 {
+       return []int64{1, 62}
+}
index 689c4f4584b144cb31c7f533512da89a6e395e19..5b95783d9fcfe4beadd53b4dd81add631d7be749 100644 (file)
@@ -10,6 +10,10 @@ import (
        "code.gitea.io/gitea/modules/log"
 )
 
+const (
+       filenameMatchNumberOfLines = 7 // Copied from github search
+)
+
 func FilenameIndexerID(repoID int64, filename string) string {
        return internal.Base36(repoID) + "_" + filename
 }
@@ -30,3 +34,17 @@ func FilenameOfIndexerID(indexerID string) string {
        }
        return indexerID[index+1:]
 }
+
+// Given the contents of file, returns the boundaries of its first seven lines.
+func FilenameMatchIndexPos(content string) (int, int) {
+       count := 1
+       for i, c := range content {
+               if c == '\n' {
+                       count++
+                       if count == filenameMatchNumberOfLines {
+                               return 0, i
+                       }
+               }
+       }
+       return 0, len(content)
+}
index a2265f86e6b35dcbe77f836cc83028d110f46845..b426b39bc20dbd351f7455374039f78480603cfb 100644 (file)
@@ -11,10 +11,15 @@ import (
        "code.gitea.io/gitea/modules/util"
 
        "github.com/blevesearch/bleve/v2"
+       "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
        "github.com/blevesearch/bleve/v2/index/upsidedown"
        "github.com/ethantkoenig/rupture"
 )
 
+const (
+       maxFuzziness = 2
+)
+
 // openIndexer open the index at the specified path, checking for metadata
 // updates and bleve version updates.  If index needs to be created (or
 // re-created), returns (nil, nil)
@@ -48,7 +53,27 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
        return index, 0, nil
 }
 
+// This method test the GuessFuzzinessByKeyword method. The fuzziness is based on the levenshtein distance and determines how many chars
+// may be different on two string and they still be considered equivalent.
+// Given a phrasse, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
 func GuessFuzzinessByKeyword(s string) int {
+       tokenizer := unicode.NewUnicodeTokenizer()
+       tokens := tokenizer.Tokenize([]byte(s))
+
+       if len(tokens) > 0 {
+               fuzziness := maxFuzziness
+
+               for _, token := range tokens {
+                       fuzziness = min(fuzziness, guessFuzzinessByKeyword(string(token.Term)))
+               }
+
+               return fuzziness
+       }
+
+       return 0
+}
+
+func guessFuzzinessByKeyword(s string) int {
        // according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
        // magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
        // BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
@@ -57,5 +82,5 @@ func GuessFuzzinessByKeyword(s string) int {
                        return 0
                }
        }
-       return min(2, len(s)/4)
+       return min(maxFuzziness, len(s)/4)
 }
diff --git a/modules/indexer/internal/bleve/util_test.go b/modules/indexer/internal/bleve/util_test.go
new file mode 100644 (file)
index 0000000..ae0b12c
--- /dev/null
@@ -0,0 +1,45 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package bleve
+
+import (
+       "fmt"
+       "testing"
+
+       "github.com/stretchr/testify/assert"
+)
+
+func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
+       scenarios := []struct {
+               Input     string
+               Fuzziness int // See util.go for the definition of fuzziness in this particular context
+       }{
+               {
+                       Input:     "",
+                       Fuzziness: 0,
+               },
+               {
+                       Input:     "Avocado",
+                       Fuzziness: 1,
+               },
+               {
+                       Input:     "Geschwindigkeit",
+                       Fuzziness: 2,
+               },
+               {
+                       Input:     "non-exist",
+                       Fuzziness: 0,
+               },
+               {
+                       Input:     "갃갃갃",
+                       Fuzziness: 0,
+               },
+       }
+
+       for _, scenario := range scenarios {
+               t.Run(fmt.Sprintf("ensure fuzziness of '%s' is '%d'", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
+                       assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input))
+               })
+       }
+}
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG b/tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG
new file mode 100644 (file)
index 0000000..8b13789
--- /dev/null
@@ -0,0 +1 @@
+
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/HEAD b/tests/gitea-repositories-meta/org42/search-by-path.git/HEAD
new file mode 100644 (file)
index 0000000..cb089cd
--- /dev/null
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/config b/tests/gitea-repositories-meta/org42/search-by-path.git/config
new file mode 100644 (file)
index 0000000..07d359d
--- /dev/null
@@ -0,0 +1,4 @@
+[core]
+       repositoryformatversion = 0
+       filemode = true
+       bare = true
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/description b/tests/gitea-repositories-meta/org42/search-by-path.git/description
new file mode 100644 (file)
index 0000000..382e2d7
--- /dev/null
@@ -0,0 +1,8 @@
+This repository will be used to test code search. The snippet below shows its directory structure
+
+.
+├── avocado.md
+├── cucumber.md
+├── ham.md
+└── potato
+    └── ham.md
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive
new file mode 100755 (executable)
index 0000000..4b3d452
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+ORI_DIR=`pwd`
+SHELL_FOLDER=$(cd "$(dirname "$0")";pwd)
+cd "$ORI_DIR"
+for i in `ls "$SHELL_FOLDER/post-receive.d"`; do
+    sh "$SHELL_FOLDER/post-receive.d/$i"
+done
\ No newline at end of file
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea
new file mode 100755 (executable)
index 0000000..43a948d
--- /dev/null
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" post-receive
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive
new file mode 100755 (executable)
index 0000000..4127013
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+ORI_DIR=`pwd`
+SHELL_FOLDER=$(cd "$(dirname "$0")";pwd)
+cd "$ORI_DIR"
+for i in `ls "$SHELL_FOLDER/pre-receive.d"`; do
+    sh "$SHELL_FOLDER/pre-receive.d/$i"
+done
\ No newline at end of file
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea
new file mode 100755 (executable)
index 0000000..49d0940
--- /dev/null
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" pre-receive
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive
new file mode 100755 (executable)
index 0000000..af2808b
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+ORI_DIR=`pwd`
+SHELL_FOLDER=$(cd "$(dirname "$0")";pwd)
+cd "$ORI_DIR"
+for i in `ls "$SHELL_FOLDER/proc-receive.d"`; do
+    sh "$SHELL_FOLDER/proc-receive.d/$i"
+done
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea
new file mode 100755 (executable)
index 0000000..97521c6
--- /dev/null
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" proc-receive
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update
new file mode 100755 (executable)
index 0000000..c186fe4
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+ORI_DIR=`pwd`
+SHELL_FOLDER=$(cd "$(dirname "$0")";pwd)
+cd "$ORI_DIR"
+for i in `ls "$SHELL_FOLDER/update.d"`; do
+    sh "$SHELL_FOLDER/update.d/$i" $1 $2 $3
+done
\ No newline at end of file
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea
new file mode 100755 (executable)
index 0000000..38101c2
--- /dev/null
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" update $1 $2 $3
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude b/tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude
new file mode 100644 (file)
index 0000000..a5196d1
--- /dev/null
@@ -0,0 +1,6 @@
+# git ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/info/refs b/tests/gitea-repositories-meta/org42/search-by-path.git/info/refs
new file mode 100644 (file)
index 0000000..6b948c9
--- /dev/null
@@ -0,0 +1,13 @@
+90c1019714259b24fb81711d4416ac0f18667dfa       refs/heads/DefaultBranch
+985f0301dba5e7b34be866819cd15ad3d8f508ee       refs/heads/branch2
+65f1bf27bc3bf70f64657658635e66094edbcb4d       refs/heads/develop
+65f1bf27bc3bf70f64657658635e66094edbcb4d       refs/heads/feature/1
+78fb907e3a3309eae4fe8fef030874cebbf1cd5e       refs/heads/home-md-img-check
+3731fe53b763859aaf83e703ee731f6b9447ff1e       refs/heads/master
+62fb502a7172d4453f0322a2cc85bddffa57f07a       refs/heads/pr-to-update
+4649299398e4d39a5c09eb4f534df6f1e1eb87cc       refs/heads/sub-home-md-img-check
+3fa2f829675543ecfc16b2891aebe8bf0608a8f4       refs/notes/commits
+4a357436d925b5c974181ff12a994538ddc5a269       refs/pull/2/head
+5f22f7d0d95d614d25a5b68592adb345a4b5c7fd       refs/pull/3/head
+62fb502a7172d4453f0322a2cc85bddffa57f07a       refs/pull/5/head
+65f1bf27bc3bf70f64657658635e66094edbcb4d       refs/tags/v1.1
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/logs/refs/heads/master b/tests/gitea-repositories-meta/org42/search-by-path.git/logs/refs/heads/master
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph
new file mode 100644 (file)
index 0000000..b38715b
Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs
new file mode 100644 (file)
index 0000000..b2af8c8
--- /dev/null
@@ -0,0 +1,2 @@
+P pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack
+
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap
new file mode 100644 (file)
index 0000000..1fdef22
Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx
new file mode 100644 (file)
index 0000000..0d930e7
Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack
new file mode 100644 (file)
index 0000000..f1aac1e
Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev
new file mode 100644 (file)
index 0000000..869860b
Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev differ
diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs b/tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs
new file mode 100644 (file)
index 0000000..70e69af
--- /dev/null
@@ -0,0 +1,14 @@
+# pack-refs with: peeled fully-peeled sorted 
+90c1019714259b24fb81711d4416ac0f18667dfa refs/heads/DefaultBranch
+985f0301dba5e7b34be866819cd15ad3d8f508ee refs/heads/branch2
+65f1bf27bc3bf70f64657658635e66094edbcb4d refs/heads/develop
+65f1bf27bc3bf70f64657658635e66094edbcb4d refs/heads/feature/1
+78fb907e3a3309eae4fe8fef030874cebbf1cd5e refs/heads/home-md-img-check
+3731fe53b763859aaf83e703ee731f6b9447ff1e refs/heads/master
+62fb502a7172d4453f0322a2cc85bddffa57f07a refs/heads/pr-to-update
+4649299398e4d39a5c09eb4f534df6f1e1eb87cc refs/heads/sub-home-md-img-check
+3fa2f829675543ecfc16b2891aebe8bf0608a8f4 refs/notes/commits
+4a357436d925b5c974181ff12a994538ddc5a269 refs/pull/2/head
+5f22f7d0d95d614d25a5b68592adb345a4b5c7fd refs/pull/3/head
+62fb502a7172d4453f0322a2cc85bddffa57f07a refs/pull/5/head
+65f1bf27bc3bf70f64657658635e66094edbcb4d refs/tags/v1.1
index 70d3a446f768879b1fd947c865db473322431f02..fff121490c9ca7c68679f311d0f6c0d5bc1540b5 100644 (file)
@@ -177,7 +177,7 @@ func TestAPIGetAll(t *testing.T) {
        var apiOrgList []*api.Organization
 
        DecodeJSON(t, resp, &apiOrgList)
-       assert.Len(t, apiOrgList, 12)
+       assert.Len(t, apiOrgList, 13)
        assert.Equal(t, "Limited Org 36", apiOrgList[1].FullName)
        assert.Equal(t, "limited", apiOrgList[1].Visibility)
 
@@ -186,7 +186,7 @@ func TestAPIGetAll(t *testing.T) {
        resp = MakeRequest(t, req, http.StatusOK)
 
        DecodeJSON(t, resp, &apiOrgList)
-       assert.Len(t, apiOrgList, 8)
+       assert.Len(t, apiOrgList, 9)
        assert.Equal(t, "org 17", apiOrgList[0].FullName)
        assert.Equal(t, "public", apiOrgList[0].Visibility)
 }
index 716da762e542de227796b127dcd3481076ecc3f7..93c9ca0920d496e90aad892e91d49529af462fd2 100644 (file)
@@ -94,9 +94,9 @@ func TestAPISearchRepo(t *testing.T) {
        }{
                {
                        name: "RepositoriesMax50", requestURL: "/api/v1/repos/search?limit=50&private=false", expectedResults: expectedResults{
-                               nil:   {count: 35},
-                               user:  {count: 35},
-                               user2: {count: 35},
+                               nil:   {count: 36},
+                               user:  {count: 36},
+                               user2: {count: 36},
                        },
                },
                {