Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

repo_language_stats.go 2.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package git
  5. import (
  6. "bytes"
  7. "io"
  8. "io/ioutil"
  9. "code.gitea.io/gitea/modules/analyze"
  10. "github.com/go-enry/go-enry/v2"
  11. "github.com/go-git/go-git/v5"
  12. "github.com/go-git/go-git/v5/plumbing"
  13. "github.com/go-git/go-git/v5/plumbing/object"
  14. )
  15. const fileSizeLimit int64 = 16 * 1024 // 16 KiB
  16. const bigFileSize int64 = 1024 * 1024 // 1 MiB
  17. // specialLanguages defines list of languages that are excluded from the calculation
  18. // unless they are the only language present in repository. Only languages which under
  19. // normal circumstances are not considered to be code should be listed here.
  20. var specialLanguages = []string{
  21. "XML",
  22. "JSON",
  23. "TOML",
  24. "YAML",
  25. "INI",
  26. "SVG",
  27. "Text",
  28. "Markdown",
  29. }
  30. // GetLanguageStats calculates language stats for git repository at specified commit
  31. func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
  32. r, err := git.PlainOpen(repo.Path)
  33. if err != nil {
  34. return nil, err
  35. }
  36. rev, err := r.ResolveRevision(plumbing.Revision(commitID))
  37. if err != nil {
  38. return nil, err
  39. }
  40. commit, err := r.CommitObject(*rev)
  41. if err != nil {
  42. return nil, err
  43. }
  44. tree, err := commit.Tree()
  45. if err != nil {
  46. return nil, err
  47. }
  48. sizes := make(map[string]int64)
  49. err = tree.Files().ForEach(func(f *object.File) error {
  50. if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
  51. enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
  52. return nil
  53. }
  54. // If content can not be read or file is too big just do detection by filename
  55. var content []byte
  56. if f.Size <= bigFileSize {
  57. content, _ = readFile(f, fileSizeLimit)
  58. }
  59. if enry.IsGenerated(f.Name, content) {
  60. return nil
  61. }
  62. // TODO: Use .gitattributes file for linguist overrides
  63. language := analyze.GetCodeLanguage(f.Name, content)
  64. if language == enry.OtherLanguage || language == "" {
  65. return nil
  66. }
  67. // group languages, such as Pug -> HTML; SCSS -> CSS
  68. group := enry.GetLanguageGroup(language)
  69. if group != "" {
  70. language = group
  71. }
  72. sizes[language] += f.Size
  73. return nil
  74. })
  75. if err != nil {
  76. return nil, err
  77. }
  78. // filter special languages unless they are the only language
  79. if len(sizes) > 1 {
  80. for _, language := range specialLanguages {
  81. delete(sizes, language)
  82. }
  83. }
  84. return sizes, nil
  85. }
  86. func readFile(f *object.File, limit int64) ([]byte, error) {
  87. r, err := f.Reader()
  88. if err != nil {
  89. return nil, err
  90. }
  91. defer r.Close()
  92. if limit <= 0 {
  93. return ioutil.ReadAll(r)
  94. }
  95. size := f.Size
  96. if limit > 0 && size > limit {
  97. size = limit
  98. }
  99. buf := bytes.NewBuffer(nil)
  100. buf.Grow(int(size))
  101. _, err = io.Copy(buf, io.LimitReader(r, limit))
  102. return buf.Bytes(), err
  103. }