summaryrefslogtreecommitdiffstats
path: root/modules/git
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2021-09-09 21:13:36 +0100
committerGitHub <noreply@github.com>2021-09-09 21:13:36 +0100
commit248b96d8a38b2d52a73d7091a82f688f4688295e (patch)
tree43dfd0341cfbd86d576c1de073f8ad060f84b60b /modules/git
parentb83b4fbef9df7bb4beef5684b18fe2ef210c42a2 (diff)
downloadgitea-248b96d8a38b2d52a73d7091a82f688f4688295e.tar.gz
gitea-248b96d8a38b2d52a73d7091a82f688f4688295e.zip
Use git attributes to determine generated and vendored status for language stats and diffs (#16773)
Replaces #16262 Replaces #16250 Replaces #14833 This PR first implements a `git check-attr` pipe reader - using `git check-attr --stdin -z --cached` - taking account of the change in the output format in git 1.8.5 and creates a helper function to read a tree into a temporary index file for that pipe reader. It then wires this in to the language stats helper and into the git diff generation. Files which are marked generated will be folded by default. Fixes #14786 Fixes #12653
Diffstat (limited to 'modules/git')
-rw-r--r--modules/git/repo_attribute.go285
-rw-r--r--modules/git/repo_attribute_test.go159
-rw-r--r--modules/git/repo_index.go39
-rw-r--r--modules/git/repo_language_stats_gogit.go70
-rw-r--r--modules/git/repo_language_stats_nogogit.go71
5 files changed, 612 insertions, 12 deletions
diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go
index aa5e4c10e7..0bd7d7e49c 100644
--- a/modules/git/repo_attribute.go
+++ b/modules/git/repo_attribute.go
@@ -6,7 +6,12 @@ package git
import (
"bytes"
+ "context"
"fmt"
+ "io"
+ "os"
+ "strconv"
+ "strings"
)
// CheckAttributeOpts represents the possible options to CheckAttribute
@@ -21,7 +26,7 @@ type CheckAttributeOpts struct {
func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) {
err := LoadGitVersion()
if err != nil {
- return nil, fmt.Errorf("Git version missing: %v", err)
+ return nil, fmt.Errorf("git version missing: %v", err)
}
stdOut := new(bytes.Buffer)
@@ -55,13 +60,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
cmd := NewCommand(cmdArgs...)
if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
- return nil, fmt.Errorf("Failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
+ return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
}
+ // FIXME: This is incorrect on versions < 1.8.5
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 {
- return nil, fmt.Errorf("Wrong number of fields in return from check-attr")
+ return nil, fmt.Errorf("wrong number of fields in return from check-attr")
}
var name2attribute2info = make(map[string]map[string]string)
@@ -80,3 +86,276 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
return name2attribute2info, nil
}
+
+// CheckAttributeReader provides a reader for check-attribute content that can be long running
+type CheckAttributeReader struct {
+ // params
+ Attributes []string
+ Repo *Repository
+ IndexFile string
+ WorkTree string
+
+ stdinReader io.ReadCloser
+ stdinWriter *os.File
+ stdOut attributeWriter
+ cmd *Command
+ env []string
+ ctx context.Context
+ cancel context.CancelFunc
+ running chan struct{}
+}
+
+// Init initializes the cmd
+func (c *CheckAttributeReader) Init(ctx context.Context) error {
+ c.running = make(chan struct{})
+ cmdArgs := []string{"check-attr", "--stdin", "-z"}
+
+ if len(c.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
+ cmdArgs = append(cmdArgs, "--cached")
+ c.env = []string{"GIT_INDEX_FILE=" + c.IndexFile}
+ }
+
+ if len(c.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
+ c.env = []string{"GIT_WORK_TREE=" + c.WorkTree}
+ }
+
+ if len(c.Attributes) > 0 {
+ cmdArgs = append(cmdArgs, c.Attributes...)
+ cmdArgs = append(cmdArgs, "--")
+ } else {
+ lw := new(nulSeparatedAttributeWriter)
+ lw.attributes = make(chan attributeTriple)
+
+ c.stdOut = lw
+ c.stdOut.Close()
+ return fmt.Errorf("no provided Attributes to check")
+ }
+
+ c.ctx, c.cancel = context.WithCancel(ctx)
+ c.cmd = NewCommandContext(c.ctx, cmdArgs...)
+ var err error
+ c.stdinReader, c.stdinWriter, err = os.Pipe()
+ if err != nil {
+ return err
+ }
+
+ if CheckGitVersionAtLeast("1.8.5") == nil {
+ lw := new(nulSeparatedAttributeWriter)
+ lw.attributes = make(chan attributeTriple, 5)
+
+ c.stdOut = lw
+ } else {
+ lw := new(lineSeparatedAttributeWriter)
+ lw.attributes = make(chan attributeTriple, 5)
+
+ c.stdOut = lw
+ }
+ return nil
+}
+
+// Run run cmd
+func (c *CheckAttributeReader) Run() error {
+ stdErr := new(bytes.Buffer)
+ err := c.cmd.RunInDirTimeoutEnvFullPipelineFunc(c.env, -1, c.Repo.Path, c.stdOut, stdErr, c.stdinReader, func(_ context.Context, _ context.CancelFunc) error {
+ close(c.running)
+ return nil
+ })
+ defer c.cancel()
+ _ = c.stdOut.Close()
+ if err != nil && c.ctx.Err() != nil && err.Error() != "signal: killed" {
+ return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
+ }
+
+ return nil
+}
+
+// CheckPath check attr for given path
+func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) {
+ select {
+ case <-c.ctx.Done():
+ return nil, c.ctx.Err()
+ case <-c.running:
+ }
+
+ if _, err := c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
+ defer c.cancel()
+ return nil, err
+ }
+
+ if err := c.stdinWriter.Sync(); err != nil {
+ defer c.cancel()
+ return nil, err
+ }
+
+ rs := make(map[string]string)
+ for range c.Attributes {
+ select {
+ case attr := <-c.stdOut.ReadAttribute():
+ rs[attr.Attribute] = attr.Value
+ case <-c.ctx.Done():
+ return nil, c.ctx.Err()
+ }
+ }
+ return rs, nil
+}
+
+// Close close pip after use
+func (c *CheckAttributeReader) Close() error {
+ select {
+ case <-c.running:
+ default:
+ close(c.running)
+ }
+ defer c.cancel()
+ return c.stdinWriter.Close()
+}
+
+type attributeWriter interface {
+ io.WriteCloser
+ ReadAttribute() <-chan attributeTriple
+}
+
+type attributeTriple struct {
+ Filename string
+ Attribute string
+ Value string
+}
+
+type nulSeparatedAttributeWriter struct {
+ tmp []byte
+ attributes chan attributeTriple
+ working attributeTriple
+ pos int
+}
+
+func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
+ l, read := len(p), 0
+
+ nulIdx := bytes.IndexByte(p, '\x00')
+ for nulIdx >= 0 {
+ wr.tmp = append(wr.tmp, p[:nulIdx]...)
+ switch wr.pos {
+ case 0:
+ wr.working = attributeTriple{
+ Filename: string(wr.tmp),
+ }
+ case 1:
+ wr.working.Attribute = string(wr.tmp)
+ case 2:
+ wr.working.Value = string(wr.tmp)
+ }
+ wr.tmp = wr.tmp[:0]
+ wr.pos++
+ if wr.pos > 2 {
+ wr.attributes <- wr.working
+ wr.pos = 0
+ }
+ read += nulIdx + 1
+ if l > read {
+ p = p[nulIdx+1:]
+ nulIdx = bytes.IndexByte(p, '\x00')
+ } else {
+ return l, nil
+ }
+ }
+ wr.tmp = append(wr.tmp, p...)
+ return len(p), nil
+}
+
+func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
+ return wr.attributes
+}
+
+func (wr *nulSeparatedAttributeWriter) Close() error {
+ close(wr.attributes)
+ return nil
+}
+
+type lineSeparatedAttributeWriter struct {
+ tmp []byte
+ attributes chan attributeTriple
+}
+
+func (wr *lineSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
+ l := len(p)
+
+ nlIdx := bytes.IndexByte(p, '\n')
+ for nlIdx >= 0 {
+ wr.tmp = append(wr.tmp, p[:nlIdx]...)
+
+ if len(wr.tmp) == 0 {
+ // This should not happen
+ if len(p) > nlIdx+1 {
+ wr.tmp = wr.tmp[:0]
+ p = p[nlIdx+1:]
+ nlIdx = bytes.IndexByte(p, '\n')
+ continue
+ } else {
+ return l, nil
+ }
+ }
+
+ working := attributeTriple{}
+ if wr.tmp[0] == '"' {
+ sb := new(strings.Builder)
+ remaining := string(wr.tmp[1:])
+ for len(remaining) > 0 {
+ rn, _, tail, err := strconv.UnquoteChar(remaining, '"')
+ if err != nil {
+ if len(remaining) > 2 && remaining[0] == '"' && remaining[1] == ':' && remaining[2] == ' ' {
+ working.Filename = sb.String()
+ wr.tmp = []byte(remaining[3:])
+ break
+ }
+ return l, fmt.Errorf("unexpected tail %s", string(remaining))
+ }
+ _, _ = sb.WriteRune(rn)
+ remaining = tail
+ }
+ } else {
+ idx := bytes.IndexByte(wr.tmp, ':')
+ if idx < 0 {
+ return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
+ }
+ working.Filename = string(wr.tmp[:idx])
+ if len(wr.tmp) < idx+2 {
+ return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
+ }
+ wr.tmp = wr.tmp[idx+2:]
+ }
+
+ idx := bytes.IndexByte(wr.tmp, ':')
+ if idx < 0 {
+ return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
+ }
+
+ working.Attribute = string(wr.tmp[:idx])
+ if len(wr.tmp) < idx+2 {
+ return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
+ }
+
+ working.Value = string(wr.tmp[idx+2:])
+
+ wr.attributes <- working
+ wr.tmp = wr.tmp[:0]
+ if len(p) > nlIdx+1 {
+ p = p[nlIdx+1:]
+ nlIdx = bytes.IndexByte(p, '\n')
+ continue
+ } else {
+ return l, nil
+ }
+ }
+
+ wr.tmp = append(wr.tmp, p...)
+ return l, nil
+}
+
+func (wr *lineSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
+ return wr.attributes
+}
+
+func (wr *lineSeparatedAttributeWriter) Close() error {
+ close(wr.attributes)
+ return nil
+}
diff --git a/modules/git/repo_attribute_test.go b/modules/git/repo_attribute_test.go
new file mode 100644
index 0000000000..92d1a78fa4
--- /dev/null
+++ b/modules/git/repo_attribute_test.go
@@ -0,0 +1,159 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package git
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
+ wr := &nulSeparatedAttributeWriter{
+ attributes: make(chan attributeTriple, 5),
+ }
+
+ testStr := ".gitignore\"\n\x00linguist-vendored\x00unspecified\x00"
+
+ n, err := wr.Write([]byte(testStr))
+
+ assert.Equal(t, n, len(testStr))
+ assert.NoError(t, err)
+ select {
+ case attr := <-wr.ReadAttribute():
+ assert.Equal(t, ".gitignore\"\n", attr.Filename)
+ assert.Equal(t, "linguist-vendored", attr.Attribute)
+ assert.Equal(t, "unspecified", attr.Value)
+ case <-time.After(100 * time.Millisecond):
+ assert.Fail(t, "took too long to read an attribute from the list")
+ }
+ // Write a second attribute again
+ n, err = wr.Write([]byte(testStr))
+
+ assert.Equal(t, n, len(testStr))
+ assert.NoError(t, err)
+
+ select {
+ case attr := <-wr.ReadAttribute():
+ assert.Equal(t, ".gitignore\"\n", attr.Filename)
+ assert.Equal(t, "linguist-vendored", attr.Attribute)
+ assert.Equal(t, "unspecified", attr.Value)
+ case <-time.After(100 * time.Millisecond):
+ assert.Fail(t, "took too long to read an attribute from the list")
+ }
+
+ //Write a partial attribute
+ _, err = wr.Write([]byte("incomplete-file"))
+ assert.NoError(t, err)
+ _, err = wr.Write([]byte("name\x00"))
+ assert.NoError(t, err)
+
+ select {
+ case <-wr.ReadAttribute():
+ assert.Fail(t, "There should not be an attribute ready to read")
+ case <-time.After(100 * time.Millisecond):
+ }
+ _, err = wr.Write([]byte("attribute\x00"))
+ assert.NoError(t, err)
+ select {
+ case <-wr.ReadAttribute():
+ assert.Fail(t, "There should not be an attribute ready to read")
+ case <-time.After(100 * time.Millisecond):
+ }
+
+ _, err = wr.Write([]byte("value\x00"))
+ assert.NoError(t, err)
+
+ attr := <-wr.ReadAttribute()
+ assert.Equal(t, "incomplete-filename", attr.Filename)
+ assert.Equal(t, "attribute", attr.Attribute)
+ assert.Equal(t, "value", attr.Value)
+
+ _, err = wr.Write([]byte("shouldbe.vendor\x00linguist-vendored\x00set\x00shouldbe.vendor\x00linguist-generated\x00unspecified\x00shouldbe.vendor\x00linguist-language\x00unspecified\x00"))
+ assert.NoError(t, err)
+ attr = <-wr.ReadAttribute()
+ assert.NoError(t, err)
+ assert.EqualValues(t, attributeTriple{
+ Filename: "shouldbe.vendor",
+ Attribute: "linguist-vendored",
+ Value: "set",
+ }, attr)
+ attr = <-wr.ReadAttribute()
+ assert.NoError(t, err)
+ assert.EqualValues(t, attributeTriple{
+ Filename: "shouldbe.vendor",
+ Attribute: "linguist-generated",
+ Value: "unspecified",
+ }, attr)
+ attr = <-wr.ReadAttribute()
+ assert.NoError(t, err)
+ assert.EqualValues(t, attributeTriple{
+ Filename: "shouldbe.vendor",
+ Attribute: "linguist-language",
+ Value: "unspecified",
+ }, attr)
+}
+
+func Test_lineSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
+ wr := &lineSeparatedAttributeWriter{
+ attributes: make(chan attributeTriple, 5),
+ }
+
+ testStr := `".gitignore\"\n": linguist-vendored: unspecified
+`
+ n, err := wr.Write([]byte(testStr))
+
+ assert.Equal(t, n, len(testStr))
+ assert.NoError(t, err)
+
+ select {
+ case attr := <-wr.ReadAttribute():
+ assert.Equal(t, ".gitignore\"\n", attr.Filename)
+ assert.Equal(t, "linguist-vendored", attr.Attribute)
+ assert.Equal(t, "unspecified", attr.Value)
+ case <-time.After(100 * time.Millisecond):
+ assert.Fail(t, "took too long to read an attribute from the list")
+ }
+
+ // Write a second attribute again
+ n, err = wr.Write([]byte(testStr))
+
+ assert.Equal(t, n, len(testStr))
+ assert.NoError(t, err)
+
+ select {
+ case attr := <-wr.ReadAttribute():
+ assert.Equal(t, ".gitignore\"\n", attr.Filename)
+ assert.Equal(t, "linguist-vendored", attr.Attribute)
+ assert.Equal(t, "unspecified", attr.Value)
+ case <-time.After(100 * time.Millisecond):
+ assert.Fail(t, "took too long to read an attribute from the list")
+ }
+
+ //Write a partial attribute
+ _, err = wr.Write([]byte("incomplete-file"))
+ assert.NoError(t, err)
+ _, err = wr.Write([]byte("name: "))
+ assert.NoError(t, err)
+ select {
+ case <-wr.ReadAttribute():
+ assert.Fail(t, "There should not be an attribute ready to read")
+ case <-time.After(100 * time.Millisecond):
+ }
+ _, err = wr.Write([]byte("attribute: "))
+ assert.NoError(t, err)
+ select {
+ case <-wr.ReadAttribute():
+ assert.Fail(t, "There should not be an attribute ready to read")
+ case <-time.After(100 * time.Millisecond):
+ }
+ _, err = wr.Write([]byte("value\n"))
+ assert.NoError(t, err)
+ attr := <-wr.ReadAttribute()
+ assert.Equal(t, "incomplete-filename", attr.Filename)
+ assert.Equal(t, "attribute", attr.Attribute)
+ assert.Equal(t, "value", attr.Value)
+}
diff --git a/modules/git/repo_index.go b/modules/git/repo_index.go
index 2c351e209f..b301ff2437 100644
--- a/modules/git/repo_index.go
+++ b/modules/git/repo_index.go
@@ -6,11 +6,17 @@ package git
import (
"bytes"
+ "context"
+ "io/ioutil"
+ "os"
"strings"
+
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/util"
)
// ReadTreeToIndex reads a treeish to the index
-func (repo *Repository) ReadTreeToIndex(treeish string) error {
+func (repo *Repository) ReadTreeToIndex(treeish string, indexFilename ...string) error {
if len(treeish) != 40 {
res, err := NewCommand("rev-parse", "--verify", treeish).RunInDir(repo.Path)
if err != nil {
@@ -24,17 +30,42 @@ func (repo *Repository) ReadTreeToIndex(treeish string) error {
if err != nil {
return err
}
- return repo.readTreeToIndex(id)
+ return repo.readTreeToIndex(id, indexFilename...)
}
-func (repo *Repository) readTreeToIndex(id SHA1) error {
- _, err := NewCommand("read-tree", id.String()).RunInDir(repo.Path)
+func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error {
+ var env []string
+ if len(indexFilename) > 0 {
+ env = append(os.Environ(), "GIT_INDEX_FILE="+indexFilename[0])
+ }
+ _, err := NewCommand("read-tree", id.String()).RunInDirWithEnv(repo.Path, env)
if err != nil {
return err
}
return nil
}
+// ReadTreeToTemporaryIndex reads a treeish to a temporary index file
+func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
+ tmpIndex, err := ioutil.TempFile("", "index")
+ if err != nil {
+ return
+ }
+ filename = tmpIndex.Name()
+ cancel = func() {
+ err := util.Remove(filename)
+ if err != nil {
+ log.Error("failed to remove tmp index file: %v", err)
+ }
+ }
+ err = repo.ReadTreeToIndex(treeish, filename)
+ if err != nil {
+ defer cancel()
+ return "", func() {}, err
+ }
+ return
+}
+
// EmptyIndex empties the index
func (repo *Repository) EmptyIndex() error {
_, err := NewCommand("read-tree", "--empty").RunInDir(repo.Path)
diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index 0a4cfbbc7b..3abce1f077 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -9,10 +9,12 @@ package git
import (
"bytes"
+ "context"
"io"
"io/ioutil"
"code.gitea.io/gitea/modules/analyze"
+ "code.gitea.io/gitea/modules/log"
"github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5"
@@ -42,9 +44,73 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
+ var checker *CheckAttributeReader
+
+ if CheckGitVersionAtLeast("1.7.8") == nil {
+ indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
+ if err == nil {
+ defer deleteTemporaryFile()
+
+ checker = &CheckAttributeReader{
+ Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+ Repo: repo,
+ IndexFile: indexFilename,
+ }
+ ctx, cancel := context.WithCancel(DefaultContext)
+ if err := checker.Init(ctx); err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ } else {
+ go func() {
+ err = checker.Run()
+ if err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ cancel()
+ }
+ }()
+ }
+ defer cancel()
+ }
+ }
+
sizes := make(map[string]int64)
err = tree.Files().ForEach(func(f *object.File) error {
- if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
+ if f.Size == 0 {
+ return nil
+ }
+
+ notVendored := false
+ notGenerated := false
+
+ if checker != nil {
+ attrs, err := checker.CheckPath(f.Name)
+ if err == nil {
+ if vendored, has := attrs["linguist-vendored"]; has {
+ if vendored == "set" || vendored == "true" {
+ return nil
+ }
+ notVendored = vendored == "false"
+ }
+ if generated, has := attrs["linguist-generated"]; has {
+ if generated == "set" || generated == "true" {
+ return nil
+ }
+ notGenerated = generated == "false"
+ }
+ if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
+ // group languages, such as Pug -> HTML; SCSS -> CSS
+ group := enry.GetLanguageGroup(language)
+ if len(group) == 0 {
+ language = group
+ }
+
+ sizes[language] += f.Size
+
+ return nil
+ }
+ }
+ }
+
+ if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
return nil
}
@@ -54,7 +120,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
if f.Size <= bigFileSize {
content, _ = readFile(f, fileSizeLimit)
}
- if enry.IsGenerated(f.Name, content) {
+ if !notGenerated && enry.IsGenerated(f.Name, content) {
return nil
}
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go
index 7425e2dbb1..c3b96ea841 100644
--- a/modules/git/repo_language_stats_nogogit.go
+++ b/modules/git/repo_language_stats_nogogit.go
@@ -10,6 +10,7 @@ package git
import (
"bufio"
"bytes"
+ "context"
"io"
"math"
@@ -62,13 +63,78 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
+ var checker *CheckAttributeReader
+
+ if CheckGitVersionAtLeast("1.7.8") == nil {
+ indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
+ if err == nil {
+ defer deleteTemporaryFile()
+
+ checker = &CheckAttributeReader{
+ Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+ Repo: repo,
+ IndexFile: indexFilename,
+ }
+ ctx, cancel := context.WithCancel(DefaultContext)
+ if err := checker.Init(ctx); err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ } else {
+ go func() {
+ err = checker.Run()
+ if err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ cancel()
+ }
+ }()
+ }
+ defer cancel()
+ }
+ }
+
contentBuf := bytes.Buffer{}
var content []byte
sizes := make(map[string]int64)
for _, f := range entries {
contentBuf.Reset()
content = contentBuf.Bytes()
- if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
+
+ if f.Size() == 0 {
+ continue
+ }
+
+ notVendored := false
+ notGenerated := false
+
+ if checker != nil {
+ attrs, err := checker.CheckPath(f.Name())
+ if err == nil {
+ if vendored, has := attrs["linguist-vendored"]; has {
+ if vendored == "set" || vendored == "true" {
+ continue
+ }
+ notVendored = vendored == "false"
+ }
+ if generated, has := attrs["linguist-generated"]; has {
+ if generated == "set" || generated == "true" {
+ continue
+ }
+ notGenerated = generated == "false"
+ }
+ if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
+ // group languages, such as Pug -> HTML; SCSS -> CSS
+ group := enry.GetLanguageGroup(language)
+ if len(group) == 0 {
+ language = group
+ }
+
+ sizes[language] += f.Size()
+
+ continue
+ }
+ }
+ }
+
+ if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) ||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
continue
}
@@ -102,11 +168,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
}
- if enry.IsGenerated(f.Name(), content) {
+ if !notGenerated && enry.IsGenerated(f.Name(), content) {
continue
}
- // TODO: Use .gitattributes file for linguist overrides
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
// - eg. do the all the detection tests using filename first before reading content.
language := analyze.GetCodeLanguage(f.Name(), content)