You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

repo_language_stats_gogit.go 4.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. //go:build gogit
  4. package git
  5. import (
  6. "bytes"
  7. "io"
  8. "strings"
  9. "code.gitea.io/gitea/modules/analyze"
  10. "github.com/go-enry/go-enry/v2"
  11. "github.com/go-git/go-git/v5"
  12. "github.com/go-git/go-git/v5/plumbing"
  13. "github.com/go-git/go-git/v5/plumbing/object"
  14. )
  15. // GetLanguageStats calculates language stats for git repository at specified commit
  16. func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
  17. r, err := git.PlainOpen(repo.Path)
  18. if err != nil {
  19. return nil, err
  20. }
  21. rev, err := r.ResolveRevision(plumbing.Revision(commitID))
  22. if err != nil {
  23. return nil, err
  24. }
  25. commit, err := r.CommitObject(*rev)
  26. if err != nil {
  27. return nil, err
  28. }
  29. tree, err := commit.Tree()
  30. if err != nil {
  31. return nil, err
  32. }
  33. checker, deferable := repo.CheckAttributeReader(commitID)
  34. defer deferable()
  35. // sizes contains the current calculated size of all files by language
  36. sizes := make(map[string]int64)
  37. // by default we will only count the sizes of programming languages or markup languages
  38. // unless they are explicitly set using linguist-language
  39. includedLanguage := map[string]bool{}
  40. // or if there's only one language in the repository
  41. firstExcludedLanguage := ""
  42. firstExcludedLanguageSize := int64(0)
  43. err = tree.Files().ForEach(func(f *object.File) error {
  44. if f.Size == 0 {
  45. return nil
  46. }
  47. notVendored := false
  48. notGenerated := false
  49. if checker != nil {
  50. attrs, err := checker.CheckPath(f.Name)
  51. if err == nil {
  52. if vendored, has := attrs["linguist-vendored"]; has {
  53. if vendored == "set" || vendored == "true" {
  54. return nil
  55. }
  56. notVendored = vendored == "false"
  57. }
  58. if generated, has := attrs["linguist-generated"]; has {
  59. if generated == "set" || generated == "true" {
  60. return nil
  61. }
  62. notGenerated = generated == "false"
  63. }
  64. if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
  65. // group languages, such as Pug -> HTML; SCSS -> CSS
  66. group := enry.GetLanguageGroup(language)
  67. if len(group) != 0 {
  68. language = group
  69. }
  70. // this language will always be added to the size
  71. sizes[language] += f.Size
  72. return nil
  73. } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
  74. // strip off a ? if present
  75. if idx := strings.IndexByte(language, '?'); idx >= 0 {
  76. language = language[:idx]
  77. }
  78. if len(language) != 0 {
  79. // group languages, such as Pug -> HTML; SCSS -> CSS
  80. group := enry.GetLanguageGroup(language)
  81. if len(group) != 0 {
  82. language = group
  83. }
  84. // this language will always be added to the size
  85. sizes[language] += f.Size
  86. return nil
  87. }
  88. }
  89. }
  90. }
  91. if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
  92. enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
  93. return nil
  94. }
  95. // If content can not be read or file is too big just do detection by filename
  96. var content []byte
  97. if f.Size <= bigFileSize {
  98. content, _ = readFile(f, fileSizeLimit)
  99. }
  100. if !notGenerated && enry.IsGenerated(f.Name, content) {
  101. return nil
  102. }
  103. // TODO: Use .gitattributes file for linguist overrides
  104. language := analyze.GetCodeLanguage(f.Name, content)
  105. if language == enry.OtherLanguage || language == "" {
  106. return nil
  107. }
  108. // group languages, such as Pug -> HTML; SCSS -> CSS
  109. group := enry.GetLanguageGroup(language)
  110. if group != "" {
  111. language = group
  112. }
  113. included, checked := includedLanguage[language]
  114. if !checked {
  115. langtype := enry.GetLanguageType(language)
  116. included = langtype == enry.Programming || langtype == enry.Markup
  117. includedLanguage[language] = included
  118. }
  119. if included {
  120. sizes[language] += f.Size
  121. } else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
  122. firstExcludedLanguage = language
  123. firstExcludedLanguageSize += f.Size
  124. }
  125. return nil
  126. })
  127. if err != nil {
  128. return nil, err
  129. }
  130. // If there are no included languages add the first excluded language
  131. if len(sizes) == 0 && firstExcludedLanguage != "" {
  132. sizes[firstExcludedLanguage] = firstExcludedLanguageSize
  133. }
  134. return mergeLanguageStats(sizes), nil
  135. }
  136. func readFile(f *object.File, limit int64) ([]byte, error) {
  137. r, err := f.Reader()
  138. if err != nil {
  139. return nil, err
  140. }
  141. defer r.Close()
  142. if limit <= 0 {
  143. return io.ReadAll(r)
  144. }
  145. size := f.Size
  146. if limit > 0 && size > limit {
  147. size = limit
  148. }
  149. buf := bytes.NewBuffer(nil)
  150. buf.Grow(int(size))
  151. _, err = io.Copy(buf, io.LimitReader(r, limit))
  152. return buf.Bytes(), err
  153. }