summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
Diffstat (limited to 'modules')
-rw-r--r--modules/analyze/generated.go28
-rw-r--r--modules/git/repo_attribute.go285
-rw-r--r--modules/git/repo_attribute_test.go159
-rw-r--r--modules/git/repo_index.go39
-rw-r--r--modules/git/repo_language_stats_gogit.go70
-rw-r--r--modules/git/repo_language_stats_nogogit.go71
6 files changed, 640 insertions, 12 deletions
diff --git a/modules/analyze/generated.go b/modules/analyze/generated.go
new file mode 100644
index 0000000000..0f14d28545
--- /dev/null
+++ b/modules/analyze/generated.go
@@ -0,0 +1,28 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package analyze
+
+import (
+ "path/filepath"
+ "strings"
+
+ "github.com/go-enry/go-enry/v2/data"
+)
+
+// IsGenerated returns whether or not path is a generated path.
+func IsGenerated(path string) bool {
+ ext := strings.ToLower(filepath.Ext(path))
+ if _, ok := data.GeneratedCodeExtensions[ext]; ok {
+ return true
+ }
+
+ for _, m := range data.GeneratedCodeNameMatchers {
+ if m(path) {
+ return true
+ }
+ }
+
+ return false
+}
diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go
index aa5e4c10e7..0bd7d7e49c 100644
--- a/modules/git/repo_attribute.go
+++ b/modules/git/repo_attribute.go
@@ -6,7 +6,12 @@ package git
import (
"bytes"
+ "context"
"fmt"
+ "io"
+ "os"
+ "strconv"
+ "strings"
)
// CheckAttributeOpts represents the possible options to CheckAttribute
@@ -21,7 +26,7 @@ type CheckAttributeOpts struct {
func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) {
err := LoadGitVersion()
if err != nil {
- return nil, fmt.Errorf("Git version missing: %v", err)
+ return nil, fmt.Errorf("git version missing: %v", err)
}
stdOut := new(bytes.Buffer)
@@ -55,13 +60,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
cmd := NewCommand(cmdArgs...)
if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
- return nil, fmt.Errorf("Failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
+ return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
}
+ // FIXME: This is incorrect on versions < 1.8.5
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 {
- return nil, fmt.Errorf("Wrong number of fields in return from check-attr")
+ return nil, fmt.Errorf("wrong number of fields in return from check-attr")
}
var name2attribute2info = make(map[string]map[string]string)
@@ -80,3 +86,276 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
return name2attribute2info, nil
}
+
+// CheckAttributeReader provides a reader for check-attribute content that can be long running
+type CheckAttributeReader struct {
+ // params
+ Attributes []string
+ Repo *Repository
+ IndexFile string
+ WorkTree string
+
+ stdinReader io.ReadCloser
+ stdinWriter *os.File
+ stdOut attributeWriter
+ cmd *Command
+ env []string
+ ctx context.Context
+ cancel context.CancelFunc
+ running chan struct{}
+}
+
+// Init initializes the cmd
+func (c *CheckAttributeReader) Init(ctx context.Context) error {
+ c.running = make(chan struct{})
+ cmdArgs := []string{"check-attr", "--stdin", "-z"}
+
+ if len(c.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
+ cmdArgs = append(cmdArgs, "--cached")
+ c.env = []string{"GIT_INDEX_FILE=" + c.IndexFile}
+ }
+
+ if len(c.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
+ c.env = []string{"GIT_WORK_TREE=" + c.WorkTree}
+ }
+
+ if len(c.Attributes) > 0 {
+ cmdArgs = append(cmdArgs, c.Attributes...)
+ cmdArgs = append(cmdArgs, "--")
+ } else {
+ lw := new(nulSeparatedAttributeWriter)
+ lw.attributes = make(chan attributeTriple)
+
+ c.stdOut = lw
+ c.stdOut.Close()
+ return fmt.Errorf("no provided Attributes to check")
+ }
+
+ c.ctx, c.cancel = context.WithCancel(ctx)
+ c.cmd = NewCommandContext(c.ctx, cmdArgs...)
+ var err error
+ c.stdinReader, c.stdinWriter, err = os.Pipe()
+ if err != nil {
+ return err
+ }
+
+ if CheckGitVersionAtLeast("1.8.5") == nil {
+ lw := new(nulSeparatedAttributeWriter)
+ lw.attributes = make(chan attributeTriple, 5)
+
+ c.stdOut = lw
+ } else {
+ lw := new(lineSeparatedAttributeWriter)
+ lw.attributes = make(chan attributeTriple, 5)
+
+ c.stdOut = lw
+ }
+ return nil
+}
+
+// Run run cmd
+func (c *CheckAttributeReader) Run() error {
+ stdErr := new(bytes.Buffer)
+ err := c.cmd.RunInDirTimeoutEnvFullPipelineFunc(c.env, -1, c.Repo.Path, c.stdOut, stdErr, c.stdinReader, func(_ context.Context, _ context.CancelFunc) error {
+ close(c.running)
+ return nil
+ })
+ defer c.cancel()
+ _ = c.stdOut.Close()
+ if err != nil && c.ctx.Err() != nil && err.Error() != "signal: killed" {
+ return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
+ }
+
+ return nil
+}
+
+// CheckPath check attr for given path
+func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) {
+ select {
+ case <-c.ctx.Done():
+ return nil, c.ctx.Err()
+ case <-c.running:
+ }
+
+ if _, err := c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
+ defer c.cancel()
+ return nil, err
+ }
+
+ if err := c.stdinWriter.Sync(); err != nil {
+ defer c.cancel()
+ return nil, err
+ }
+
+ rs := make(map[string]string)
+ for range c.Attributes {
+ select {
+ case attr := <-c.stdOut.ReadAttribute():
+ rs[attr.Attribute] = attr.Value
+ case <-c.ctx.Done():
+ return nil, c.ctx.Err()
+ }
+ }
+ return rs, nil
+}
+
+// Close close pip after use
+func (c *CheckAttributeReader) Close() error {
+ select {
+ case <-c.running:
+ default:
+ close(c.running)
+ }
+ defer c.cancel()
+ return c.stdinWriter.Close()
+}
+
+type attributeWriter interface {
+ io.WriteCloser
+ ReadAttribute() <-chan attributeTriple
+}
+
+type attributeTriple struct {
+ Filename string
+ Attribute string
+ Value string
+}
+
+type nulSeparatedAttributeWriter struct {
+ tmp []byte
+ attributes chan attributeTriple
+ working attributeTriple
+ pos int
+}
+
+func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
+ l, read := len(p), 0
+
+ nulIdx := bytes.IndexByte(p, '\x00')
+ for nulIdx >= 0 {
+ wr.tmp = append(wr.tmp, p[:nulIdx]...)
+ switch wr.pos {
+ case 0:
+ wr.working = attributeTriple{
+ Filename: string(wr.tmp),
+ }
+ case 1:
+ wr.working.Attribute = string(wr.tmp)
+ case 2:
+ wr.working.Value = string(wr.tmp)
+ }
+ wr.tmp = wr.tmp[:0]
+ wr.pos++
+ if wr.pos > 2 {
+ wr.attributes <- wr.working
+ wr.pos = 0
+ }
+ read += nulIdx + 1
+ if l > read {
+ p = p[nulIdx+1:]
+ nulIdx = bytes.IndexByte(p, '\x00')
+ } else {
+ return l, nil
+ }
+ }
+ wr.tmp = append(wr.tmp, p...)
+ return len(p), nil
+}
+
+func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
+ return wr.attributes
+}
+
+func (wr *nulSeparatedAttributeWriter) Close() error {
+ close(wr.attributes)
+ return nil
+}
+
+type lineSeparatedAttributeWriter struct {
+ tmp []byte
+ attributes chan attributeTriple
+}
+
+func (wr *lineSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
+ l := len(p)
+
+ nlIdx := bytes.IndexByte(p, '\n')
+ for nlIdx >= 0 {
+ wr.tmp = append(wr.tmp, p[:nlIdx]...)
+
+ if len(wr.tmp) == 0 {
+ // This should not happen
+ if len(p) > nlIdx+1 {
+ wr.tmp = wr.tmp[:0]
+ p = p[nlIdx+1:]
+ nlIdx = bytes.IndexByte(p, '\n')
+ continue
+ } else {
+ return l, nil
+ }
+ }
+
+ working := attributeTriple{}
+ if wr.tmp[0] == '"' {
+ sb := new(strings.Builder)
+ remaining := string(wr.tmp[1:])
+ for len(remaining) > 0 {
+ rn, _, tail, err := strconv.UnquoteChar(remaining, '"')
+ if err != nil {
+ if len(remaining) > 2 && remaining[0] == '"' && remaining[1] == ':' && remaining[2] == ' ' {
+ working.Filename = sb.String()
+ wr.tmp = []byte(remaining[3:])
+ break
+ }
+ return l, fmt.Errorf("unexpected tail %s", string(remaining))
+ }
+ _, _ = sb.WriteRune(rn)
+ remaining = tail
+ }
+ } else {
+ idx := bytes.IndexByte(wr.tmp, ':')
+ if idx < 0 {
+ return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
+ }
+ working.Filename = string(wr.tmp[:idx])
+ if len(wr.tmp) < idx+2 {
+ return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
+ }
+ wr.tmp = wr.tmp[idx+2:]
+ }
+
+ idx := bytes.IndexByte(wr.tmp, ':')
+ if idx < 0 {
+ return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
+ }
+
+ working.Attribute = string(wr.tmp[:idx])
+ if len(wr.tmp) < idx+2 {
+ return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
+ }
+
+ working.Value = string(wr.tmp[idx+2:])
+
+ wr.attributes <- working
+ wr.tmp = wr.tmp[:0]
+ if len(p) > nlIdx+1 {
+ p = p[nlIdx+1:]
+ nlIdx = bytes.IndexByte(p, '\n')
+ continue
+ } else {
+ return l, nil
+ }
+ }
+
+ wr.tmp = append(wr.tmp, p...)
+ return l, nil
+}
+
+func (wr *lineSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
+ return wr.attributes
+}
+
+func (wr *lineSeparatedAttributeWriter) Close() error {
+ close(wr.attributes)
+ return nil
+}
diff --git a/modules/git/repo_attribute_test.go b/modules/git/repo_attribute_test.go
new file mode 100644
index 0000000000..92d1a78fa4
--- /dev/null
+++ b/modules/git/repo_attribute_test.go
@@ -0,0 +1,159 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package git
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
+ wr := &nulSeparatedAttributeWriter{
+ attributes: make(chan attributeTriple, 5),
+ }
+
+ testStr := ".gitignore\"\n\x00linguist-vendored\x00unspecified\x00"
+
+ n, err := wr.Write([]byte(testStr))
+
+ assert.Equal(t, n, len(testStr))
+ assert.NoError(t, err)
+ select {
+ case attr := <-wr.ReadAttribute():
+ assert.Equal(t, ".gitignore\"\n", attr.Filename)
+ assert.Equal(t, "linguist-vendored", attr.Attribute)
+ assert.Equal(t, "unspecified", attr.Value)
+ case <-time.After(100 * time.Millisecond):
+ assert.Fail(t, "took too long to read an attribute from the list")
+ }
+ // Write a second attribute again
+ n, err = wr.Write([]byte(testStr))
+
+ assert.Equal(t, n, len(testStr))
+ assert.NoError(t, err)
+
+ select {
+ case attr := <-wr.ReadAttribute():
+ assert.Equal(t, ".gitignore\"\n", attr.Filename)
+ assert.Equal(t, "linguist-vendored", attr.Attribute)
+ assert.Equal(t, "unspecified", attr.Value)
+ case <-time.After(100 * time.Millisecond):
+ assert.Fail(t, "took too long to read an attribute from the list")
+ }
+
+ //Write a partial attribute
+ _, err = wr.Write([]byte("incomplete-file"))
+ assert.NoError(t, err)
+ _, err = wr.Write([]byte("name\x00"))
+ assert.NoError(t, err)
+
+ select {
+ case <-wr.ReadAttribute():
+ assert.Fail(t, "There should not be an attribute ready to read")
+ case <-time.After(100 * time.Millisecond):
+ }
+ _, err = wr.Write([]byte("attribute\x00"))
+ assert.NoError(t, err)
+ select {
+ case <-wr.ReadAttribute():
+ assert.Fail(t, "There should not be an attribute ready to read")
+ case <-time.After(100 * time.Millisecond):
+ }
+
+ _, err = wr.Write([]byte("value\x00"))
+ assert.NoError(t, err)
+
+ attr := <-wr.ReadAttribute()
+ assert.Equal(t, "incomplete-filename", attr.Filename)
+ assert.Equal(t, "attribute", attr.Attribute)
+ assert.Equal(t, "value", attr.Value)
+
+ _, err = wr.Write([]byte("shouldbe.vendor\x00linguist-vendored\x00set\x00shouldbe.vendor\x00linguist-generated\x00unspecified\x00shouldbe.vendor\x00linguist-language\x00unspecified\x00"))
+ assert.NoError(t, err)
+ attr = <-wr.ReadAttribute()
+ assert.NoError(t, err)
+ assert.EqualValues(t, attributeTriple{
+ Filename: "shouldbe.vendor",
+ Attribute: "linguist-vendored",
+ Value: "set",
+ }, attr)
+ attr = <-wr.ReadAttribute()
+ assert.NoError(t, err)
+ assert.EqualValues(t, attributeTriple{
+ Filename: "shouldbe.vendor",
+ Attribute: "linguist-generated",
+ Value: "unspecified",
+ }, attr)
+ attr = <-wr.ReadAttribute()
+ assert.NoError(t, err)
+ assert.EqualValues(t, attributeTriple{
+ Filename: "shouldbe.vendor",
+ Attribute: "linguist-language",
+ Value: "unspecified",
+ }, attr)
+}
+
+func Test_lineSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
+ wr := &lineSeparatedAttributeWriter{
+ attributes: make(chan attributeTriple, 5),
+ }
+
+ testStr := `".gitignore\"\n": linguist-vendored: unspecified
+`
+ n, err := wr.Write([]byte(testStr))
+
+ assert.Equal(t, n, len(testStr))
+ assert.NoError(t, err)
+
+ select {
+ case attr := <-wr.ReadAttribute():
+ assert.Equal(t, ".gitignore\"\n", attr.Filename)
+ assert.Equal(t, "linguist-vendored", attr.Attribute)
+ assert.Equal(t, "unspecified", attr.Value)
+ case <-time.After(100 * time.Millisecond):
+ assert.Fail(t, "took too long to read an attribute from the list")
+ }
+
+ // Write a second attribute again
+ n, err = wr.Write([]byte(testStr))
+
+ assert.Equal(t, n, len(testStr))
+ assert.NoError(t, err)
+
+ select {
+ case attr := <-wr.ReadAttribute():
+ assert.Equal(t, ".gitignore\"\n", attr.Filename)
+ assert.Equal(t, "linguist-vendored", attr.Attribute)
+ assert.Equal(t, "unspecified", attr.Value)
+ case <-time.After(100 * time.Millisecond):
+ assert.Fail(t, "took too long to read an attribute from the list")
+ }
+
+ //Write a partial attribute
+ _, err = wr.Write([]byte("incomplete-file"))
+ assert.NoError(t, err)
+ _, err = wr.Write([]byte("name: "))
+ assert.NoError(t, err)
+ select {
+ case <-wr.ReadAttribute():
+ assert.Fail(t, "There should not be an attribute ready to read")
+ case <-time.After(100 * time.Millisecond):
+ }
+ _, err = wr.Write([]byte("attribute: "))
+ assert.NoError(t, err)
+ select {
+ case <-wr.ReadAttribute():
+ assert.Fail(t, "There should not be an attribute ready to read")
+ case <-time.After(100 * time.Millisecond):
+ }
+ _, err = wr.Write([]byte("value\n"))
+ assert.NoError(t, err)
+ attr := <-wr.ReadAttribute()
+ assert.Equal(t, "incomplete-filename", attr.Filename)
+ assert.Equal(t, "attribute", attr.Attribute)
+ assert.Equal(t, "value", attr.Value)
+}
diff --git a/modules/git/repo_index.go b/modules/git/repo_index.go
index 2c351e209f..b301ff2437 100644
--- a/modules/git/repo_index.go
+++ b/modules/git/repo_index.go
@@ -6,11 +6,17 @@ package git
import (
"bytes"
+ "context"
+ "io/ioutil"
+ "os"
"strings"
+
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/util"
)
// ReadTreeToIndex reads a treeish to the index
-func (repo *Repository) ReadTreeToIndex(treeish string) error {
+func (repo *Repository) ReadTreeToIndex(treeish string, indexFilename ...string) error {
if len(treeish) != 40 {
res, err := NewCommand("rev-parse", "--verify", treeish).RunInDir(repo.Path)
if err != nil {
@@ -24,17 +30,42 @@ func (repo *Repository) ReadTreeToIndex(treeish string) error {
if err != nil {
return err
}
- return repo.readTreeToIndex(id)
+ return repo.readTreeToIndex(id, indexFilename...)
}
-func (repo *Repository) readTreeToIndex(id SHA1) error {
- _, err := NewCommand("read-tree", id.String()).RunInDir(repo.Path)
+func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error {
+ var env []string
+ if len(indexFilename) > 0 {
+ env = append(os.Environ(), "GIT_INDEX_FILE="+indexFilename[0])
+ }
+ _, err := NewCommand("read-tree", id.String()).RunInDirWithEnv(repo.Path, env)
if err != nil {
return err
}
return nil
}
+// ReadTreeToTemporaryIndex reads a treeish to a temporary index file
+func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
+ tmpIndex, err := ioutil.TempFile("", "index")
+ if err != nil {
+ return
+ }
+ filename = tmpIndex.Name()
+ cancel = func() {
+ err := util.Remove(filename)
+ if err != nil {
+ log.Error("failed to remove tmp index file: %v", err)
+ }
+ }
+ err = repo.ReadTreeToIndex(treeish, filename)
+ if err != nil {
+ defer cancel()
+ return "", func() {}, err
+ }
+ return
+}
+
// EmptyIndex empties the index
func (repo *Repository) EmptyIndex() error {
_, err := NewCommand("read-tree", "--empty").RunInDir(repo.Path)
diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index 0a4cfbbc7b..3abce1f077 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -9,10 +9,12 @@ package git
import (
"bytes"
+ "context"
"io"
"io/ioutil"
"code.gitea.io/gitea/modules/analyze"
+ "code.gitea.io/gitea/modules/log"
"github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5"
@@ -42,9 +44,73 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
+ var checker *CheckAttributeReader
+
+ if CheckGitVersionAtLeast("1.7.8") == nil {
+ indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
+ if err == nil {
+ defer deleteTemporaryFile()
+
+ checker = &CheckAttributeReader{
+ Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+ Repo: repo,
+ IndexFile: indexFilename,
+ }
+ ctx, cancel := context.WithCancel(DefaultContext)
+ if err := checker.Init(ctx); err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ } else {
+ go func() {
+ err = checker.Run()
+ if err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ cancel()
+ }
+ }()
+ }
+ defer cancel()
+ }
+ }
+
sizes := make(map[string]int64)
err = tree.Files().ForEach(func(f *object.File) error {
- if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
+ if f.Size == 0 {
+ return nil
+ }
+
+ notVendored := false
+ notGenerated := false
+
+ if checker != nil {
+ attrs, err := checker.CheckPath(f.Name)
+ if err == nil {
+ if vendored, has := attrs["linguist-vendored"]; has {
+ if vendored == "set" || vendored == "true" {
+ return nil
+ }
+ notVendored = vendored == "false"
+ }
+ if generated, has := attrs["linguist-generated"]; has {
+ if generated == "set" || generated == "true" {
+ return nil
+ }
+ notGenerated = generated == "false"
+ }
+ if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
+ // group languages, such as Pug -> HTML; SCSS -> CSS
+ group := enry.GetLanguageGroup(language)
+ if len(group) == 0 {
+ language = group
+ }
+
+ sizes[language] += f.Size
+
+ return nil
+ }
+ }
+ }
+
+ if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
return nil
}
@@ -54,7 +120,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
if f.Size <= bigFileSize {
content, _ = readFile(f, fileSizeLimit)
}
- if enry.IsGenerated(f.Name, content) {
+ if !notGenerated && enry.IsGenerated(f.Name, content) {
return nil
}
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go
index 7425e2dbb1..c3b96ea841 100644
--- a/modules/git/repo_language_stats_nogogit.go
+++ b/modules/git/repo_language_stats_nogogit.go
@@ -10,6 +10,7 @@ package git
import (
"bufio"
"bytes"
+ "context"
"io"
"math"
@@ -62,13 +63,78 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
+ var checker *CheckAttributeReader
+
+ if CheckGitVersionAtLeast("1.7.8") == nil {
+ indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
+ if err == nil {
+ defer deleteTemporaryFile()
+
+ checker = &CheckAttributeReader{
+ Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+ Repo: repo,
+ IndexFile: indexFilename,
+ }
+ ctx, cancel := context.WithCancel(DefaultContext)
+ if err := checker.Init(ctx); err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ } else {
+ go func() {
+ err = checker.Run()
+ if err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ cancel()
+ }
+ }()
+ }
+ defer cancel()
+ }
+ }
+
contentBuf := bytes.Buffer{}
var content []byte
sizes := make(map[string]int64)
for _, f := range entries {
contentBuf.Reset()
content = contentBuf.Bytes()
- if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
+
+ if f.Size() == 0 {
+ continue
+ }
+
+ notVendored := false
+ notGenerated := false
+
+ if checker != nil {
+ attrs, err := checker.CheckPath(f.Name())
+ if err == nil {
+ if vendored, has := attrs["linguist-vendored"]; has {
+ if vendored == "set" || vendored == "true" {
+ continue
+ }
+ notVendored = vendored == "false"
+ }
+ if generated, has := attrs["linguist-generated"]; has {
+ if generated == "set" || generated == "true" {
+ continue
+ }
+ notGenerated = generated == "false"
+ }
+ if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
+ // group languages, such as Pug -> HTML; SCSS -> CSS
+ group := enry.GetLanguageGroup(language)
+ if len(group) == 0 {
+ language = group
+ }
+
+ sizes[language] += f.Size()
+
+ continue
+ }
+ }
+ }
+
+ if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) ||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
continue
}
@@ -102,11 +168,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
}
- if enry.IsGenerated(f.Name(), content) {
+ if !notGenerated && enry.IsGenerated(f.Name(), content) {
continue
}
- // TODO: Use .gitattributes file for linguist overrides
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
// - eg. do the all the detection tests using filename first before reading content.
language := analyze.GetCodeLanguage(f.Name(), content)