diff options
Diffstat (limited to 'modules/git/repo_language_stats_nogogit.go')
-rw-r--r-- | modules/git/repo_language_stats_nogogit.go | 71 |
1 files changed, 68 insertions, 3 deletions
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 7425e2dbb1..c3b96ea841 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -10,6 +10,7 @@ package git import ( "bufio" "bytes" + "context" "io" "math" @@ -62,13 +63,78 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } + var checker *CheckAttributeReader + + if CheckGitVersionAtLeast("1.7.8") == nil { + indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) + if err == nil { + defer deleteTemporaryFile() + + checker = &CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"}, + Repo: repo, + IndexFile: indexFilename, + } + ctx, cancel := context.WithCancel(DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + } else { + go func() { + err = checker.Run() + if err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + cancel() + } + }() + } + defer cancel() + } + } + contentBuf := bytes.Buffer{} var content []byte sizes := make(map[string]int64) for _, f := range entries { contentBuf.Reset() content = contentBuf.Bytes() - if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || + + if f.Size() == 0 { + continue + } + + notVendored := false + notGenerated := false + + if checker != nil { + attrs, err := checker.CheckPath(f.Name()) + if err == nil { + if vendored, has := attrs["linguist-vendored"]; has { + if vendored == "set" || vendored == "true" { + continue + } + notVendored = vendored == "false" + } + if generated, has := attrs["linguist-generated"]; has { + if generated == "set" || generated == "true" { + continue + } + notGenerated = generated == "false" + } + if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if len(group) == 0 { + language = group + } + + sizes[language] += f.Size() + + continue + } + } + } + + if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) || enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { continue } @@ -102,11 +168,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } } - if enry.IsGenerated(f.Name(), content) { + if !notGenerated && enry.IsGenerated(f.Name(), content) { continue } - // TODO: Use .gitattributes file for linguist overrides // FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary? // - eg. do the all the detection tests using filename first before reading content. language := analyze.GetCodeLanguage(f.Name(), content) |