You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

repo_language_stats.go 2.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package git
  5. import (
  6. "bytes"
  7. "io"
  8. "io/ioutil"
  9. "code.gitea.io/gitea/modules/analyze"
  10. "github.com/go-enry/go-enry/v2"
  11. "github.com/go-git/go-git/v5"
  12. "github.com/go-git/go-git/v5/plumbing"
  13. "github.com/go-git/go-git/v5/plumbing/object"
  14. )
  15. const fileSizeLimit int64 = 16 * 1024 * 1024
  16. // specialLanguages defines list of languages that are excluded from the calculation
  17. // unless they are the only language present in repository. Only languages which under
  18. // normal circumstances are not considered to be code should be listed here.
  19. var specialLanguages = []string{
  20. "XML",
  21. "JSON",
  22. "TOML",
  23. "YAML",
  24. "INI",
  25. "SVG",
  26. "Text",
  27. "Markdown",
  28. }
  29. // GetLanguageStats calculates language stats for git repository at specified commit
  30. func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
  31. r, err := git.PlainOpen(repo.Path)
  32. if err != nil {
  33. return nil, err
  34. }
  35. rev, err := r.ResolveRevision(plumbing.Revision(commitID))
  36. if err != nil {
  37. return nil, err
  38. }
  39. commit, err := r.CommitObject(*rev)
  40. if err != nil {
  41. return nil, err
  42. }
  43. tree, err := commit.Tree()
  44. if err != nil {
  45. return nil, err
  46. }
  47. sizes := make(map[string]int64)
  48. err = tree.Files().ForEach(func(f *object.File) error {
  49. if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
  50. enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
  51. return nil
  52. }
  53. // If content can not be read just do detection by filename
  54. content, _ := readFile(f, fileSizeLimit)
  55. if enry.IsGenerated(f.Name, content) {
  56. return nil
  57. }
  58. // TODO: Use .gitattributes file for linguist overrides
  59. language := analyze.GetCodeLanguage(f.Name, content)
  60. if language == enry.OtherLanguage || language == "" {
  61. return nil
  62. }
  63. // group languages, such as Pug -> HTML; SCSS -> CSS
  64. group := enry.GetLanguageGroup(language)
  65. if group != "" {
  66. language = group
  67. }
  68. sizes[language] += f.Size
  69. return nil
  70. })
  71. if err != nil {
  72. return nil, err
  73. }
  74. // filter special languages unless they are the only language
  75. if len(sizes) > 1 {
  76. for _, language := range specialLanguages {
  77. delete(sizes, language)
  78. }
  79. }
  80. return sizes, nil
  81. }
  82. func readFile(f *object.File, limit int64) ([]byte, error) {
  83. r, err := f.Reader()
  84. if err != nil {
  85. return nil, err
  86. }
  87. defer r.Close()
  88. if limit <= 0 {
  89. return ioutil.ReadAll(r)
  90. }
  91. size := f.Size
  92. if limit > 0 && size > limit {
  93. size = limit
  94. }
  95. buf := bytes.NewBuffer(nil)
  96. buf.Grow(int(size))
  97. _, err = io.Copy(buf, io.LimitReader(r, limit))
  98. return buf.Bytes(), err
  99. }