diff options
author | guillep2k <18600385+guillep2k@users.noreply.github.com> | 2019-09-11 14:26:28 -0300 |
---|---|---|
committer | Lauris BH <lauris@nix.lv> | 2019-09-11 20:26:28 +0300 |
commit | 72f6d5c882dc1adfd249e85576aaf6384ee39251 (patch) | |
tree | 63c5c5bffb02eba06a10307f1c8f290e9be64386 /modules/setting | |
parent | 3fd0eec900126d392ff7a45c510cfe64639c198e (diff) | |
download | gitea-72f6d5c882dc1adfd249e85576aaf6384ee39251.tar.gz gitea-72f6d5c882dc1adfd249e85576aaf6384ee39251.zip |
Restrict repository indexing by glob match (#7767)
* Restrict repository indexing by file extension
* Use REPO_EXTENSIONS_LIST_INCLUDE instead of REPO_EXTENSIONS_LIST_EXCLUDE and have a more flexible extension pattern
* Corrected to pass lint gosimple
* Add wildcard support to REPO_INDEXER_EXTENSIONS
* This reverts commit 72a650c8e42f4abf59d5df7cd5dc27b451494cc6.
* Add wildcard support to REPO_INDEXER_EXTENSIONS (no make vendor)
* Simplify isIndexable() for better clarity
* Add gobwas/glob to vendors
* manually set appengine new release
* Implement better REPO_INDEXER_INCLUDE and REPO_INDEXER_EXCLUDE
* Add unit and integration tests
* Update app.ini.sample and reword config-cheat-sheet
* Add doc page and correct app.ini.sample
* Some polish on the doc
* Simplify code as suggested by @lafriks
Diffstat (limited to 'modules/setting')
-rw-r--r-- | modules/setting/indexer.go | 26 | ||||
-rw-r--r-- | modules/setting/indexer_test.go | 73 |
2 files changed, 99 insertions, 0 deletions
diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 36fd4a020b..30c670d407 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -7,6 +7,11 @@ package setting import ( "path" "path/filepath" + "strings" + + "code.gitea.io/gitea/modules/log" + + "github.com/gobwas/glob" ) // enumerates all the indexer queue types @@ -29,6 +34,8 @@ var ( IssueQueueDir string IssueQueueConnStr string IssueQueueBatchNumber int + IncludePatterns []glob.Glob + ExcludePatterns []glob.Glob }{ IssueType: "bleve", IssuePath: "indexers/issues.bleve", @@ -51,6 +58,9 @@ func newIndexerService() { if !filepath.IsAbs(Indexer.RepoPath) { Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) } + Indexer.IncludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_INCLUDE").MustString("")) + Indexer.ExcludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_EXCLUDE").MustString("")) + Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType) @@ -58,3 +68,19 @@ func newIndexerService() { Indexer.IssueQueueConnStr = sec.Key("ISSUE_INDEXER_QUEUE_CONN_STR").MustString(path.Join(AppDataPath, "")) Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) } + +// IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing +func IndexerGlobFromString(globstr string) []glob.Glob { + extarr := make([]glob.Glob, 0, 10) + for _, expr := range strings.Split(strings.ToLower(globstr), ",") { + expr = strings.TrimSpace(expr) + if expr != "" { + if g, err := glob.Compile(expr, '.', '/'); err != nil { + log.Info("Invalid glob expresion '%s' (skipped): %v", expr, err) + } else { + extarr = append(extarr, g) + } + } + } + return extarr +} diff --git a/modules/setting/indexer_test.go b/modules/setting/indexer_test.go new file mode 100644 index 0000000000..ed631747dc --- /dev/null +++ b/modules/setting/indexer_test.go @@ -0,0 +1,73 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package setting + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +type indexerMatchList struct { + value string + position int +} + +func Test_newIndexerGlobSettings(t *testing.T) { + + checkGlobMatch(t, "", []indexerMatchList{}) + checkGlobMatch(t, " ", []indexerMatchList{}) + checkGlobMatch(t, "data, */data, */data/*, **/data/*, **/data/**", []indexerMatchList{ + {"", -1}, + {"don't", -1}, + {"data", 0}, + {"/data", 1}, + {"x/data", 1}, + {"x/data/y", 2}, + {"a/b/c/data/z", 3}, + {"a/b/c/data/x/y/z", 4}, + }) + checkGlobMatch(t, "*.txt, txt, **.txt, **txt, **txt*", []indexerMatchList{ + {"my.txt", 0}, + {"don't", -1}, + {"mytxt", 3}, + {"/data/my.txt", 2}, + {"data/my.txt", 2}, + {"data/txt", 3}, + {"data/thistxtfile", 4}, + {"/data/thistxtfile", 4}, + }) + checkGlobMatch(t, "data/**/*.txt, data/**.txt", []indexerMatchList{ + {"data/a/b/c/d.txt", 0}, + {"data/a.txt", 1}, + }) + checkGlobMatch(t, "**/*.txt, data/**.txt", []indexerMatchList{ + {"data/a/b/c/d.txt", 0}, + {"data/a.txt", 0}, + {"a.txt", -1}, + }) +} + +func checkGlobMatch(t *testing.T, globstr string, list []indexerMatchList) { + glist := IndexerGlobFromString(globstr) + if len(list) == 0 { + assert.Empty(t, glist) + return + } + assert.NotEmpty(t, glist) + for _, m := range list { + found := false + for pos, g := range glist { + if g.Match(m.value) { + assert.Equal(t, m.position, pos, "Test string `%s` doesn't match `%s`@%d, but matches @%d", m.value, globstr, m.position, pos) + found = true + break + } + } + if !found { + assert.Equal(t, m.position, -1, "Test string `%s` doesn't match `%s` anywhere; expected @%d", m.value, globstr, m.position) + } + } +} |