diff options
author | zeripath <art27@cantab.net> | 2021-04-01 18:41:09 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-01 19:41:09 +0200 |
commit | ff460ca74d37b1eadac63b8858d0daa1690e0e2f (patch) | |
tree | 3a04e464fc5e4e6630b31fe4444dd007eb52f799 /modules/analyze | |
parent | 43fb4921e3fb67be9f2f3a6d631a21a3322f492b (diff) | |
download | gitea-ff460ca74d37b1eadac63b8858d0daa1690e0e2f.tar.gz gitea-ff460ca74d37b1eadac63b8858d0daa1690e0e2f.zip |
Speed up `enry.IsVendor` (#15213)
`enry.IsVendor` is kinda slow as it simply iterates across all regexps.
This PR ajdusts the regexps to combine them to make this process a
little quicker.
Related #15143
Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'modules/analyze')
-rw-r--r-- | modules/analyze/vendor.go | 70 | ||||
-rw-r--r-- | modules/analyze/vendor_test.go | 42 |
2 files changed, 112 insertions, 0 deletions
diff --git a/modules/analyze/vendor.go b/modules/analyze/vendor.go new file mode 100644 index 0000000000..12ae8dbd80 --- /dev/null +++ b/modules/analyze/vendor.go @@ -0,0 +1,70 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package analyze + +import ( + "regexp" + "sort" + "strings" + + "github.com/go-enry/go-enry/v2/data" +) + +var isVendorRegExp *regexp.Regexp + +func init() { + matchers := data.VendorMatchers + + caretStrings := make([]string, 0, 10) + caretShareStrings := make([]string, 0, 10) + + matcherStrings := make([]string, 0, len(matchers)) + for _, matcher := range matchers { + str := matcher.String() + if str[0] == '^' { + caretStrings = append(caretStrings, str[1:]) + } else if str[0:5] == "(^|/)" { + caretShareStrings = append(caretShareStrings, str[5:]) + } else { + matcherStrings = append(matcherStrings, str) + } + } + + sort.Strings(caretShareStrings) + sort.Strings(caretStrings) + sort.Strings(matcherStrings) + + sb := &strings.Builder{} + sb.WriteString("(?:^(?:") + sb.WriteString(caretStrings[0]) + for _, matcher := range caretStrings[1:] { + sb.WriteString(")|(?:") + sb.WriteString(matcher) + } + sb.WriteString("))") + sb.WriteString("|") + sb.WriteString("(?:(?:^|/)(?:") + sb.WriteString(caretShareStrings[0]) + for _, matcher := range caretShareStrings[1:] { + sb.WriteString(")|(?:") + sb.WriteString(matcher) + } + sb.WriteString("))") + sb.WriteString("|") + sb.WriteString("(?:") + sb.WriteString(matcherStrings[0]) + for _, matcher := range matcherStrings[1:] { + sb.WriteString(")|(?:") + sb.WriteString(matcher) + } + sb.WriteString(")") + combined := sb.String() + isVendorRegExp = regexp.MustCompile(combined) +} + +// IsVendor returns whether or not path is a vendor path. +func IsVendor(path string) bool { + return isVendorRegExp.MatchString(path) +} diff --git a/modules/analyze/vendor_test.go b/modules/analyze/vendor_test.go new file mode 100644 index 0000000000..2784e49d34 --- /dev/null +++ b/modules/analyze/vendor_test.go @@ -0,0 +1,42 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package analyze + +import "testing" + +func TestIsVendor(t *testing.T) { + tests := []struct { + path string + want bool + }{ + {"cache/", true}, + {"random/cache/", true}, + {"cache", false}, + {"dependencies/", true}, + {"Dependencies/", true}, + {"dependency/", false}, + {"dist/", true}, + {"dist", false}, + {"random/dist/", true}, + {"random/dist", false}, + {"deps/", true}, + {"configure", true}, + {"a/configure", true}, + {"config.guess", true}, + {"config.guess/", false}, + {".vscode/", true}, + {"doc/_build/", true}, + {"a/docs/_build/", true}, + {"a/dasdocs/_build-vsdoc.js", true}, + {"a/dasdocs/_build-vsdoc.j", false}, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + if got := IsVendor(tt.path); got != tt.want { + t.Errorf("IsVendor() = %v, want %v", got, tt.want) + } + }) + } +} |