You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

repo_language_stats_gogit.go 4.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. //go:build gogit
  5. package git
  6. import (
  7. "bytes"
  8. "context"
  9. "io"
  10. "strings"
  11. "code.gitea.io/gitea/modules/analyze"
  12. "code.gitea.io/gitea/modules/log"
  13. "github.com/go-enry/go-enry/v2"
  14. "github.com/go-git/go-git/v5"
  15. "github.com/go-git/go-git/v5/plumbing"
  16. "github.com/go-git/go-git/v5/plumbing/object"
  17. )
  18. // GetLanguageStats calculates language stats for git repository at specified commit
  19. func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
  20. r, err := git.PlainOpen(repo.Path)
  21. if err != nil {
  22. return nil, err
  23. }
  24. rev, err := r.ResolveRevision(plumbing.Revision(commitID))
  25. if err != nil {
  26. return nil, err
  27. }
  28. commit, err := r.CommitObject(*rev)
  29. if err != nil {
  30. return nil, err
  31. }
  32. tree, err := commit.Tree()
  33. if err != nil {
  34. return nil, err
  35. }
  36. var checker *CheckAttributeReader
  37. if CheckGitVersionAtLeast("1.7.8") == nil {
  38. indexFilename, workTree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
  39. if err == nil {
  40. defer deleteTemporaryFile()
  41. checker = &CheckAttributeReader{
  42. Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
  43. Repo: repo,
  44. IndexFile: indexFilename,
  45. WorkTree: workTree,
  46. }
  47. ctx, cancel := context.WithCancel(DefaultContext)
  48. if err := checker.Init(ctx); err != nil {
  49. log.Error("Unable to open checker for %s. Error: %v", commitID, err)
  50. } else {
  51. go func() {
  52. err = checker.Run()
  53. if err != nil {
  54. log.Error("Unable to open checker for %s. Error: %v", commitID, err)
  55. cancel()
  56. }
  57. }()
  58. }
  59. defer cancel()
  60. }
  61. }
  62. sizes := make(map[string]int64)
  63. err = tree.Files().ForEach(func(f *object.File) error {
  64. if f.Size == 0 {
  65. return nil
  66. }
  67. notVendored := false
  68. notGenerated := false
  69. if checker != nil {
  70. attrs, err := checker.CheckPath(f.Name)
  71. if err == nil {
  72. if vendored, has := attrs["linguist-vendored"]; has {
  73. if vendored == "set" || vendored == "true" {
  74. return nil
  75. }
  76. notVendored = vendored == "false"
  77. }
  78. if generated, has := attrs["linguist-generated"]; has {
  79. if generated == "set" || generated == "true" {
  80. return nil
  81. }
  82. notGenerated = generated == "false"
  83. }
  84. if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
  85. // group languages, such as Pug -> HTML; SCSS -> CSS
  86. group := enry.GetLanguageGroup(language)
  87. if len(group) != 0 {
  88. language = group
  89. }
  90. sizes[language] += f.Size
  91. return nil
  92. } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
  93. // strip off a ? if present
  94. if idx := strings.IndexByte(language, '?'); idx >= 0 {
  95. language = language[:idx]
  96. }
  97. if len(language) != 0 {
  98. // group languages, such as Pug -> HTML; SCSS -> CSS
  99. group := enry.GetLanguageGroup(language)
  100. if len(group) != 0 {
  101. language = group
  102. }
  103. sizes[language] += f.Size
  104. return nil
  105. }
  106. }
  107. }
  108. }
  109. if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
  110. enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
  111. return nil
  112. }
  113. // If content can not be read or file is too big just do detection by filename
  114. var content []byte
  115. if f.Size <= bigFileSize {
  116. content, _ = readFile(f, fileSizeLimit)
  117. }
  118. if !notGenerated && enry.IsGenerated(f.Name, content) {
  119. return nil
  120. }
  121. // TODO: Use .gitattributes file for linguist overrides
  122. language := analyze.GetCodeLanguage(f.Name, content)
  123. if language == enry.OtherLanguage || language == "" {
  124. return nil
  125. }
  126. // group languages, such as Pug -> HTML; SCSS -> CSS
  127. group := enry.GetLanguageGroup(language)
  128. if group != "" {
  129. language = group
  130. }
  131. sizes[language] += f.Size
  132. return nil
  133. })
  134. if err != nil {
  135. return nil, err
  136. }
  137. // filter special languages unless they are the only language
  138. if len(sizes) > 1 {
  139. for language := range sizes {
  140. langtype := enry.GetLanguageType(language)
  141. if langtype != enry.Programming && langtype != enry.Markup {
  142. delete(sizes, language)
  143. }
  144. }
  145. }
  146. return sizes, nil
  147. }
  148. func readFile(f *object.File, limit int64) ([]byte, error) {
  149. r, err := f.Reader()
  150. if err != nil {
  151. return nil, err
  152. }
  153. defer r.Close()
  154. if limit <= 0 {
  155. return io.ReadAll(r)
  156. }
  157. size := f.Size
  158. if limit > 0 && size > limit {
  159. size = limit
  160. }
  161. buf := bytes.NewBuffer(nil)
  162. buf.Grow(int(size))
  163. _, err = io.Copy(buf, io.LimitReader(r, limit))
  164. return buf.Bytes(), err
  165. }