summaryrefslogtreecommitdiffstats
path: root/modules/git/repo_language_stats_nogogit.go
diff options
context:
space:
mode:
Diffstat (limited to 'modules/git/repo_language_stats_nogogit.go')
-rw-r--r--modules/git/repo_language_stats_nogogit.go71
1 files changed, 68 insertions, 3 deletions
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go
index 7425e2dbb1..c3b96ea841 100644
--- a/modules/git/repo_language_stats_nogogit.go
+++ b/modules/git/repo_language_stats_nogogit.go
@@ -10,6 +10,7 @@ package git
import (
"bufio"
"bytes"
+ "context"
"io"
"math"
@@ -62,13 +63,78 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
+ var checker *CheckAttributeReader
+
+ if CheckGitVersionAtLeast("1.7.8") == nil {
+ indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
+ if err == nil {
+ defer deleteTemporaryFile()
+
+ checker = &CheckAttributeReader{
+ Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+ Repo: repo,
+ IndexFile: indexFilename,
+ }
+ ctx, cancel := context.WithCancel(DefaultContext)
+ if err := checker.Init(ctx); err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ } else {
+ go func() {
+ err = checker.Run()
+ if err != nil {
+ log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+ cancel()
+ }
+ }()
+ }
+ defer cancel()
+ }
+ }
+
contentBuf := bytes.Buffer{}
var content []byte
sizes := make(map[string]int64)
for _, f := range entries {
contentBuf.Reset()
content = contentBuf.Bytes()
- if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
+
+ if f.Size() == 0 {
+ continue
+ }
+
+ notVendored := false
+ notGenerated := false
+
+ if checker != nil {
+ attrs, err := checker.CheckPath(f.Name())
+ if err == nil {
+ if vendored, has := attrs["linguist-vendored"]; has {
+ if vendored == "set" || vendored == "true" {
+ continue
+ }
+ notVendored = vendored == "false"
+ }
+ if generated, has := attrs["linguist-generated"]; has {
+ if generated == "set" || generated == "true" {
+ continue
+ }
+ notGenerated = generated == "false"
+ }
+ if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
+ // group languages, such as Pug -> HTML; SCSS -> CSS
+ group := enry.GetLanguageGroup(language)
+ if len(group) == 0 {
+ language = group
+ }
+
+ sizes[language] += f.Size()
+
+ continue
+ }
+ }
+ }
+
+ if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) ||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
continue
}
@@ -102,11 +168,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
}
- if enry.IsGenerated(f.Name(), content) {
+ if !notGenerated && enry.IsGenerated(f.Name(), content) {
continue
}
- // TODO: Use .gitattributes file for linguist overrides
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
// - eg. do the all the detection tests using filename first before reading content.
language := analyze.GetCodeLanguage(f.Name(), content)