You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

repo_language_stats_gogit.go 2.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. //go:build gogit
  5. // +build gogit
  6. package git
  7. import (
  8. "bytes"
  9. "io"
  10. "io/ioutil"
  11. "code.gitea.io/gitea/modules/analyze"
  12. "github.com/go-enry/go-enry/v2"
  13. "github.com/go-git/go-git/v5"
  14. "github.com/go-git/go-git/v5/plumbing"
  15. "github.com/go-git/go-git/v5/plumbing/object"
  16. )
  17. // GetLanguageStats calculates language stats for git repository at specified commit
  18. func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
  19. r, err := git.PlainOpen(repo.Path)
  20. if err != nil {
  21. return nil, err
  22. }
  23. rev, err := r.ResolveRevision(plumbing.Revision(commitID))
  24. if err != nil {
  25. return nil, err
  26. }
  27. commit, err := r.CommitObject(*rev)
  28. if err != nil {
  29. return nil, err
  30. }
  31. tree, err := commit.Tree()
  32. if err != nil {
  33. return nil, err
  34. }
  35. sizes := make(map[string]int64)
  36. err = tree.Files().ForEach(func(f *object.File) error {
  37. if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
  38. enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
  39. return nil
  40. }
  41. // If content can not be read or file is too big just do detection by filename
  42. var content []byte
  43. if f.Size <= bigFileSize {
  44. content, _ = readFile(f, fileSizeLimit)
  45. }
  46. if enry.IsGenerated(f.Name, content) {
  47. return nil
  48. }
  49. // TODO: Use .gitattributes file for linguist overrides
  50. language := analyze.GetCodeLanguage(f.Name, content)
  51. if language == enry.OtherLanguage || language == "" {
  52. return nil
  53. }
  54. // group languages, such as Pug -> HTML; SCSS -> CSS
  55. group := enry.GetLanguageGroup(language)
  56. if group != "" {
  57. language = group
  58. }
  59. sizes[language] += f.Size
  60. return nil
  61. })
  62. if err != nil {
  63. return nil, err
  64. }
  65. // filter special languages unless they are the only language
  66. if len(sizes) > 1 {
  67. for language := range sizes {
  68. langtype := enry.GetLanguageType(language)
  69. if langtype != enry.Programming && langtype != enry.Markup {
  70. delete(sizes, language)
  71. }
  72. }
  73. }
  74. return sizes, nil
  75. }
  76. func readFile(f *object.File, limit int64) ([]byte, error) {
  77. r, err := f.Reader()
  78. if err != nil {
  79. return nil, err
  80. }
  81. defer r.Close()
  82. if limit <= 0 {
  83. return ioutil.ReadAll(r)
  84. }
  85. size := f.Size
  86. if limit > 0 && size > limit {
  87. size = limit
  88. }
  89. buf := bytes.NewBuffer(nil)
  90. buf.Grow(int(size))
  91. _, err = io.Copy(buf, io.LimitReader(r, limit))
  92. return buf.Bytes(), err
  93. }