Backport #15213 `enry.IsVendor` is kinda slow as it simply iterates across all regexps. This PR ajdusts the regexps to combine them to make this process a little quicker. Related #15143 Signed-off-by: Andrew Thornton <art27@cantab.net>tags/v1.14.0
@@ -0,0 +1,70 @@ | |||
// Copyright 2021 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package analyze | |||
import ( | |||
"regexp" | |||
"sort" | |||
"strings" | |||
"github.com/go-enry/go-enry/v2/data" | |||
) | |||
var isVendorRegExp *regexp.Regexp | |||
func init() { | |||
matchers := data.VendorMatchers | |||
caretStrings := make([]string, 0, 10) | |||
caretShareStrings := make([]string, 0, 10) | |||
matcherStrings := make([]string, 0, len(matchers)) | |||
for _, matcher := range matchers { | |||
str := matcher.String() | |||
if str[0] == '^' { | |||
caretStrings = append(caretStrings, str[1:]) | |||
} else if str[0:5] == "(^|/)" { | |||
caretShareStrings = append(caretShareStrings, str[5:]) | |||
} else { | |||
matcherStrings = append(matcherStrings, str) | |||
} | |||
} | |||
sort.Strings(caretShareStrings) | |||
sort.Strings(caretStrings) | |||
sort.Strings(matcherStrings) | |||
sb := &strings.Builder{} | |||
sb.WriteString("(?:^(?:") | |||
sb.WriteString(caretStrings[0]) | |||
for _, matcher := range caretStrings[1:] { | |||
sb.WriteString(")|(?:") | |||
sb.WriteString(matcher) | |||
} | |||
sb.WriteString("))") | |||
sb.WriteString("|") | |||
sb.WriteString("(?:(?:^|/)(?:") | |||
sb.WriteString(caretShareStrings[0]) | |||
for _, matcher := range caretShareStrings[1:] { | |||
sb.WriteString(")|(?:") | |||
sb.WriteString(matcher) | |||
} | |||
sb.WriteString("))") | |||
sb.WriteString("|") | |||
sb.WriteString("(?:") | |||
sb.WriteString(matcherStrings[0]) | |||
for _, matcher := range matcherStrings[1:] { | |||
sb.WriteString(")|(?:") | |||
sb.WriteString(matcher) | |||
} | |||
sb.WriteString(")") | |||
combined := sb.String() | |||
isVendorRegExp = regexp.MustCompile(combined) | |||
} | |||
// IsVendor returns whether or not path is a vendor path. | |||
func IsVendor(path string) bool { | |||
return isVendorRegExp.MatchString(path) | |||
} |
@@ -0,0 +1,42 @@ | |||
// Copyright 2021 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package analyze | |||
import "testing" | |||
func TestIsVendor(t *testing.T) { | |||
tests := []struct { | |||
path string | |||
want bool | |||
}{ | |||
{"cache/", true}, | |||
{"random/cache/", true}, | |||
{"cache", false}, | |||
{"dependencies/", true}, | |||
{"Dependencies/", true}, | |||
{"dependency/", false}, | |||
{"dist/", true}, | |||
{"dist", false}, | |||
{"random/dist/", true}, | |||
{"random/dist", false}, | |||
{"deps/", true}, | |||
{"configure", true}, | |||
{"a/configure", true}, | |||
{"config.guess", true}, | |||
{"config.guess/", false}, | |||
{".vscode/", true}, | |||
{"doc/_build/", true}, | |||
{"a/docs/_build/", true}, | |||
{"a/dasdocs/_build-vsdoc.js", true}, | |||
{"a/dasdocs/_build-vsdoc.j", false}, | |||
} | |||
for _, tt := range tests { | |||
t.Run(tt.path, func(t *testing.T) { | |||
if got := IsVendor(tt.path); got != tt.want { | |||
t.Errorf("IsVendor() = %v, want %v", got, tt.want) | |||
} | |||
}) | |||
} | |||
} |
@@ -43,7 +43,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||
sizes := make(map[string]int64) | |||
err = tree.Files().ForEach(func(f *object.File) error { | |||
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | |||
if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | |||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { | |||
return nil | |||
} |
@@ -67,7 +67,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||
for _, f := range entries { | |||
contentBuf.Reset() | |||
content = contentBuf.Bytes() | |||
if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || | |||
if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || | |||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { | |||
continue | |||
} |
@@ -178,7 +178,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { | |||
func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | |||
// Ignore vendored files in code search | |||
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | |||
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | |||
return nil | |||
} | |||
@@ -177,7 +177,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) { | |||
func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { | |||
// Ignore vendored files in code search | |||
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | |||
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | |||
return nil, nil | |||
} | |||