Backport #15213 `enry.IsVendor` is kinda slow as it simply iterates across all regexps. This PR ajdusts the regexps to combine them to make this process a little quicker. Related #15143 Signed-off-by: Andrew Thornton <art27@cantab.net>tags/v1.14.0
// Copyright 2021 The Gitea Authors. All rights reserved. | |||||
// Use of this source code is governed by a MIT-style | |||||
// license that can be found in the LICENSE file. | |||||
package analyze | |||||
import ( | |||||
"regexp" | |||||
"sort" | |||||
"strings" | |||||
"github.com/go-enry/go-enry/v2/data" | |||||
) | |||||
var isVendorRegExp *regexp.Regexp | |||||
func init() { | |||||
matchers := data.VendorMatchers | |||||
caretStrings := make([]string, 0, 10) | |||||
caretShareStrings := make([]string, 0, 10) | |||||
matcherStrings := make([]string, 0, len(matchers)) | |||||
for _, matcher := range matchers { | |||||
str := matcher.String() | |||||
if str[0] == '^' { | |||||
caretStrings = append(caretStrings, str[1:]) | |||||
} else if str[0:5] == "(^|/)" { | |||||
caretShareStrings = append(caretShareStrings, str[5:]) | |||||
} else { | |||||
matcherStrings = append(matcherStrings, str) | |||||
} | |||||
} | |||||
sort.Strings(caretShareStrings) | |||||
sort.Strings(caretStrings) | |||||
sort.Strings(matcherStrings) | |||||
sb := &strings.Builder{} | |||||
sb.WriteString("(?:^(?:") | |||||
sb.WriteString(caretStrings[0]) | |||||
for _, matcher := range caretStrings[1:] { | |||||
sb.WriteString(")|(?:") | |||||
sb.WriteString(matcher) | |||||
} | |||||
sb.WriteString("))") | |||||
sb.WriteString("|") | |||||
sb.WriteString("(?:(?:^|/)(?:") | |||||
sb.WriteString(caretShareStrings[0]) | |||||
for _, matcher := range caretShareStrings[1:] { | |||||
sb.WriteString(")|(?:") | |||||
sb.WriteString(matcher) | |||||
} | |||||
sb.WriteString("))") | |||||
sb.WriteString("|") | |||||
sb.WriteString("(?:") | |||||
sb.WriteString(matcherStrings[0]) | |||||
for _, matcher := range matcherStrings[1:] { | |||||
sb.WriteString(")|(?:") | |||||
sb.WriteString(matcher) | |||||
} | |||||
sb.WriteString(")") | |||||
combined := sb.String() | |||||
isVendorRegExp = regexp.MustCompile(combined) | |||||
} | |||||
// IsVendor returns whether or not path is a vendor path. | |||||
func IsVendor(path string) bool { | |||||
return isVendorRegExp.MatchString(path) | |||||
} |
// Copyright 2021 The Gitea Authors. All rights reserved. | |||||
// Use of this source code is governed by a MIT-style | |||||
// license that can be found in the LICENSE file. | |||||
package analyze | |||||
import "testing" | |||||
func TestIsVendor(t *testing.T) { | |||||
tests := []struct { | |||||
path string | |||||
want bool | |||||
}{ | |||||
{"cache/", true}, | |||||
{"random/cache/", true}, | |||||
{"cache", false}, | |||||
{"dependencies/", true}, | |||||
{"Dependencies/", true}, | |||||
{"dependency/", false}, | |||||
{"dist/", true}, | |||||
{"dist", false}, | |||||
{"random/dist/", true}, | |||||
{"random/dist", false}, | |||||
{"deps/", true}, | |||||
{"configure", true}, | |||||
{"a/configure", true}, | |||||
{"config.guess", true}, | |||||
{"config.guess/", false}, | |||||
{".vscode/", true}, | |||||
{"doc/_build/", true}, | |||||
{"a/docs/_build/", true}, | |||||
{"a/dasdocs/_build-vsdoc.js", true}, | |||||
{"a/dasdocs/_build-vsdoc.j", false}, | |||||
} | |||||
for _, tt := range tests { | |||||
t.Run(tt.path, func(t *testing.T) { | |||||
if got := IsVendor(tt.path); got != tt.want { | |||||
t.Errorf("IsVendor() = %v, want %v", got, tt.want) | |||||
} | |||||
}) | |||||
} | |||||
} |
sizes := make(map[string]int64) | sizes := make(map[string]int64) | ||||
err = tree.Files().ForEach(func(f *object.File) error { | err = tree.Files().ForEach(func(f *object.File) error { | ||||
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | |||||
if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | |||||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { | enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { | ||||
return nil | return nil | ||||
} | } |
for _, f := range entries { | for _, f := range entries { | ||||
contentBuf.Reset() | contentBuf.Reset() | ||||
content = contentBuf.Bytes() | content = contentBuf.Bytes() | ||||
if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || | |||||
if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || | |||||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { | enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { | ||||
continue | continue | ||||
} | } |
func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | ||||
// Ignore vendored files in code search | // Ignore vendored files in code search | ||||
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | |||||
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | |||||
return nil | return nil | ||||
} | } | ||||
func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { | func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { | ||||
// Ignore vendored files in code search | // Ignore vendored files in code search | ||||
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | |||||
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | |||||
return nil, nil | return nil, nil | ||||
} | } | ||||