summaryrefslogtreecommitdiffstats
path: root/modules/setting
diff options
context:
space:
mode:
authorguillep2k <18600385+guillep2k@users.noreply.github.com>2019-09-11 14:26:28 -0300
committerLauris BH <lauris@nix.lv>2019-09-11 20:26:28 +0300
commit72f6d5c882dc1adfd249e85576aaf6384ee39251 (patch)
tree63c5c5bffb02eba06a10307f1c8f290e9be64386 /modules/setting
parent3fd0eec900126d392ff7a45c510cfe64639c198e (diff)
downloadgitea-72f6d5c882dc1adfd249e85576aaf6384ee39251.tar.gz
gitea-72f6d5c882dc1adfd249e85576aaf6384ee39251.zip
Restrict repository indexing by glob match (#7767)
* Restrict repository indexing by file extension * Use REPO_EXTENSIONS_LIST_INCLUDE instead of REPO_EXTENSIONS_LIST_EXCLUDE and have a more flexible extension pattern * Corrected to pass lint gosimple * Add wildcard support to REPO_INDEXER_EXTENSIONS * This reverts commit 72a650c8e42f4abf59d5df7cd5dc27b451494cc6. * Add wildcard support to REPO_INDEXER_EXTENSIONS (no make vendor) * Simplify isIndexable() for better clarity * Add gobwas/glob to vendors * manually set appengine new release * Implement better REPO_INDEXER_INCLUDE and REPO_INDEXER_EXCLUDE * Add unit and integration tests * Update app.ini.sample and reword config-cheat-sheet * Add doc page and correct app.ini.sample * Some polish on the doc * Simplify code as suggested by @lafriks
Diffstat (limited to 'modules/setting')
-rw-r--r--modules/setting/indexer.go26
-rw-r--r--modules/setting/indexer_test.go73
2 files changed, 99 insertions, 0 deletions
diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go
index 36fd4a020b..30c670d407 100644
--- a/modules/setting/indexer.go
+++ b/modules/setting/indexer.go
@@ -7,6 +7,11 @@ package setting
import (
"path"
"path/filepath"
+ "strings"
+
+ "code.gitea.io/gitea/modules/log"
+
+ "github.com/gobwas/glob"
)
// enumerates all the indexer queue types
@@ -29,6 +34,8 @@ var (
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
+ IncludePatterns []glob.Glob
+ ExcludePatterns []glob.Glob
}{
IssueType: "bleve",
IssuePath: "indexers/issues.bleve",
@@ -51,6 +58,9 @@ func newIndexerService() {
if !filepath.IsAbs(Indexer.RepoPath) {
Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath)
}
+ Indexer.IncludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_INCLUDE").MustString(""))
+ Indexer.ExcludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_EXCLUDE").MustString(""))
+
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType)
@@ -58,3 +68,19 @@ func newIndexerService() {
Indexer.IssueQueueConnStr = sec.Key("ISSUE_INDEXER_QUEUE_CONN_STR").MustString(path.Join(AppDataPath, ""))
Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20)
}
+
+// IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing
+func IndexerGlobFromString(globstr string) []glob.Glob {
+ extarr := make([]glob.Glob, 0, 10)
+ for _, expr := range strings.Split(strings.ToLower(globstr), ",") {
+ expr = strings.TrimSpace(expr)
+ if expr != "" {
+ if g, err := glob.Compile(expr, '.', '/'); err != nil {
+ log.Info("Invalid glob expresion '%s' (skipped): %v", expr, err)
+ } else {
+ extarr = append(extarr, g)
+ }
+ }
+ }
+ return extarr
+}
diff --git a/modules/setting/indexer_test.go b/modules/setting/indexer_test.go
new file mode 100644
index 0000000000..ed631747dc
--- /dev/null
+++ b/modules/setting/indexer_test.go
@@ -0,0 +1,73 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package setting
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+type indexerMatchList struct {
+ value string
+ position int
+}
+
+func Test_newIndexerGlobSettings(t *testing.T) {
+
+ checkGlobMatch(t, "", []indexerMatchList{})
+ checkGlobMatch(t, " ", []indexerMatchList{})
+ checkGlobMatch(t, "data, */data, */data/*, **/data/*, **/data/**", []indexerMatchList{
+ {"", -1},
+ {"don't", -1},
+ {"data", 0},
+ {"/data", 1},
+ {"x/data", 1},
+ {"x/data/y", 2},
+ {"a/b/c/data/z", 3},
+ {"a/b/c/data/x/y/z", 4},
+ })
+ checkGlobMatch(t, "*.txt, txt, **.txt, **txt, **txt*", []indexerMatchList{
+ {"my.txt", 0},
+ {"don't", -1},
+ {"mytxt", 3},
+ {"/data/my.txt", 2},
+ {"data/my.txt", 2},
+ {"data/txt", 3},
+ {"data/thistxtfile", 4},
+ {"/data/thistxtfile", 4},
+ })
+ checkGlobMatch(t, "data/**/*.txt, data/**.txt", []indexerMatchList{
+ {"data/a/b/c/d.txt", 0},
+ {"data/a.txt", 1},
+ })
+ checkGlobMatch(t, "**/*.txt, data/**.txt", []indexerMatchList{
+ {"data/a/b/c/d.txt", 0},
+ {"data/a.txt", 0},
+ {"a.txt", -1},
+ })
+}
+
+func checkGlobMatch(t *testing.T, globstr string, list []indexerMatchList) {
+ glist := IndexerGlobFromString(globstr)
+ if len(list) == 0 {
+ assert.Empty(t, glist)
+ return
+ }
+ assert.NotEmpty(t, glist)
+ for _, m := range list {
+ found := false
+ for pos, g := range glist {
+ if g.Match(m.value) {
+ assert.Equal(t, m.position, pos, "Test string `%s` doesn't match `%s`@%d, but matches @%d", m.value, globstr, m.position, pos)
+ found = true
+ break
+ }
+ }
+ if !found {
+ assert.Equal(t, m.position, -1, "Test string `%s` doesn't match `%s` anywhere; expected @%d", m.value, globstr, m.position)
+ }
+ }
+}