aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2021-04-01 18:41:09 +0100
committerGitHub <noreply@github.com>2021-04-01 19:41:09 +0200
commitff460ca74d37b1eadac63b8858d0daa1690e0e2f (patch)
tree3a04e464fc5e4e6630b31fe4444dd007eb52f799
parent43fb4921e3fb67be9f2f3a6d631a21a3322f492b (diff)
downloadgitea-ff460ca74d37b1eadac63b8858d0daa1690e0e2f.tar.gz
gitea-ff460ca74d37b1eadac63b8858d0daa1690e0e2f.zip
Speed up `enry.IsVendor` (#15213)
`enry.IsVendor` is kinda slow as it simply iterates across all regexps. This PR ajdusts the regexps to combine them to make this process a little quicker. Related #15143 Signed-off-by: Andrew Thornton <art27@cantab.net>
-rw-r--r--modules/analyze/vendor.go70
-rw-r--r--modules/analyze/vendor_test.go42
-rw-r--r--modules/git/repo_language_stats_gogit.go2
-rw-r--r--modules/git/repo_language_stats_nogogit.go2
-rw-r--r--modules/indexer/code/bleve.go2
-rw-r--r--modules/indexer/code/elastic_search.go2
6 files changed, 116 insertions, 4 deletions
diff --git a/modules/analyze/vendor.go b/modules/analyze/vendor.go
new file mode 100644
index 0000000000..12ae8dbd80
--- /dev/null
+++ b/modules/analyze/vendor.go
@@ -0,0 +1,70 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package analyze
+
+import (
+ "regexp"
+ "sort"
+ "strings"
+
+ "github.com/go-enry/go-enry/v2/data"
+)
+
+var isVendorRegExp *regexp.Regexp
+
+func init() {
+ matchers := data.VendorMatchers
+
+ caretStrings := make([]string, 0, 10)
+ caretShareStrings := make([]string, 0, 10)
+
+ matcherStrings := make([]string, 0, len(matchers))
+ for _, matcher := range matchers {
+ str := matcher.String()
+ if str[0] == '^' {
+ caretStrings = append(caretStrings, str[1:])
+ } else if str[0:5] == "(^|/)" {
+ caretShareStrings = append(caretShareStrings, str[5:])
+ } else {
+ matcherStrings = append(matcherStrings, str)
+ }
+ }
+
+ sort.Strings(caretShareStrings)
+ sort.Strings(caretStrings)
+ sort.Strings(matcherStrings)
+
+ sb := &strings.Builder{}
+ sb.WriteString("(?:^(?:")
+ sb.WriteString(caretStrings[0])
+ for _, matcher := range caretStrings[1:] {
+ sb.WriteString(")|(?:")
+ sb.WriteString(matcher)
+ }
+ sb.WriteString("))")
+ sb.WriteString("|")
+ sb.WriteString("(?:(?:^|/)(?:")
+ sb.WriteString(caretShareStrings[0])
+ for _, matcher := range caretShareStrings[1:] {
+ sb.WriteString(")|(?:")
+ sb.WriteString(matcher)
+ }
+ sb.WriteString("))")
+ sb.WriteString("|")
+ sb.WriteString("(?:")
+ sb.WriteString(matcherStrings[0])
+ for _, matcher := range matcherStrings[1:] {
+ sb.WriteString(")|(?:")
+ sb.WriteString(matcher)
+ }
+ sb.WriteString(")")
+ combined := sb.String()
+ isVendorRegExp = regexp.MustCompile(combined)
+}
+
+// IsVendor returns whether or not path is a vendor path.
+func IsVendor(path string) bool {
+ return isVendorRegExp.MatchString(path)
+}
diff --git a/modules/analyze/vendor_test.go b/modules/analyze/vendor_test.go
new file mode 100644
index 0000000000..2784e49d34
--- /dev/null
+++ b/modules/analyze/vendor_test.go
@@ -0,0 +1,42 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package analyze
+
+import "testing"
+
+func TestIsVendor(t *testing.T) {
+ tests := []struct {
+ path string
+ want bool
+ }{
+ {"cache/", true},
+ {"random/cache/", true},
+ {"cache", false},
+ {"dependencies/", true},
+ {"Dependencies/", true},
+ {"dependency/", false},
+ {"dist/", true},
+ {"dist", false},
+ {"random/dist/", true},
+ {"random/dist", false},
+ {"deps/", true},
+ {"configure", true},
+ {"a/configure", true},
+ {"config.guess", true},
+ {"config.guess/", false},
+ {".vscode/", true},
+ {"doc/_build/", true},
+ {"a/docs/_build/", true},
+ {"a/dasdocs/_build-vsdoc.js", true},
+ {"a/dasdocs/_build-vsdoc.j", false},
+ }
+ for _, tt := range tests {
+ t.Run(tt.path, func(t *testing.T) {
+ if got := IsVendor(tt.path); got != tt.want {
+ t.Errorf("IsVendor() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index b5a235921c..20a7b061f2 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -43,7 +43,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
sizes := make(map[string]int64)
err = tree.Files().ForEach(func(f *object.File) error {
- if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
+ if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
return nil
}
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go
index a929d7953b..3f197f8d74 100644
--- a/modules/git/repo_language_stats_nogogit.go
+++ b/modules/git/repo_language_stats_nogogit.go
@@ -67,7 +67,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
for _, f := range entries {
contentBuf.Reset()
content = contentBuf.Bytes()
- if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
+ if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
continue
}
diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go
index 573ea8b88c..416adeea74 100644
--- a/modules/indexer/code/bleve.go
+++ b/modules/indexer/code/bleve.go
@@ -178,7 +178,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) {
func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
// Ignore vendored files in code search
- if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
+ if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
return nil
}
diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elastic_search.go
index 5327eb1e51..ebb7910fdc 100644
--- a/modules/indexer/code/elastic_search.go
+++ b/modules/indexer/code/elastic_search.go
@@ -177,7 +177,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
// Ignore vendored files in code search
- if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
+ if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
return nil, nil
}