summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCirno the Strongest <1447794+CirnoT@users.noreply.github.com>2020-05-31 00:58:55 +0200
committerGitHub <noreply@github.com>2020-05-31 01:58:55 +0300
commit9d652002c63d03d44083c4410881a457a9390e2f (patch)
tree6e3d7c37add64a2d4e51f2b3a930ee16ec0c5533
parentea4c139cd2f7e5174627a40aa8a9973fabf508ff (diff)
downloadgitea-9d652002c63d03d44083c4410881a457a9390e2f.tar.gz
gitea-9d652002c63d03d44083c4410881a457a9390e2f.zip
Fix language stat calculation (#11692)
* Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code
-rw-r--r--models/repo_language_stats.go35
-rw-r--r--modules/git/repo_language_stats.go31
-rw-r--r--modules/indexer/stats/indexer_test.go4
3 files changed, 29 insertions, 41 deletions
diff --git a/models/repo_language_stats.go b/models/repo_language_stats.go
index d08782eaf8..a15063e25a 100644
--- a/models/repo_language_stats.go
+++ b/models/repo_language_stats.go
@@ -26,22 +26,6 @@ type LanguageStat struct {
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
}
-// specialLanguages defines list of languages that are excluded from the calculation
-// unless they are the only language present in repository. Only languages which under
-// normal circumstances are not considered to be code should be listed here.
-var specialLanguages = map[string]struct{}{
- "XML": {},
- "JSON": {},
- "TOML": {},
- "YAML": {},
- "INI": {},
- "SQL": {},
- "SVG": {},
- "Text": {},
- "Markdown": {},
- "other": {},
-}
-
// LanguageStatList defines a list of language statistics
type LanguageStatList []*LanguageStat
@@ -55,27 +39,12 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
langPerc := make(map[string]float32)
var otherPerc float32 = 100
var total int64
- // Check that repository has at least one non-special language
- var skipSpecial bool
- for _, stat := range stats {
- if _, ok := specialLanguages[stat.Language]; !ok {
- skipSpecial = true
- break
- }
- }
+
for _, stat := range stats {
- // Exclude specific languages from percentage calculation
- if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
- continue
- }
total += stat.Size
}
if total > 0 {
for _, stat := range stats {
- // Exclude specific languages from percentage calculation
- if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
- continue
- }
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
if perc <= 0.1 {
continue
@@ -84,8 +53,6 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
langPerc[stat.Language] = perc
}
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
- } else {
- otherPerc = 100
}
if otherPerc > 0 {
langPerc["other"] = otherPerc
diff --git a/modules/git/repo_language_stats.go b/modules/git/repo_language_stats.go
index d623d6f57d..06d7d6aba0 100644
--- a/modules/git/repo_language_stats.go
+++ b/modules/git/repo_language_stats.go
@@ -19,6 +19,20 @@ import (
const fileSizeLimit int64 = 16 * 1024 * 1024
+// specialLanguages defines list of languages that are excluded from the calculation
+// unless they are the only language present in repository. Only languages which under
+// normal circumstances are not considered to be code should be listed here.
+var specialLanguages = []string{
+ "XML",
+ "JSON",
+ "TOML",
+ "YAML",
+ "INI",
+ "SVG",
+ "Text",
+ "Markdown",
+}
+
// GetLanguageStats calculates language stats for git repository at specified commit
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
r, err := git.PlainOpen(repo.Path)
@@ -43,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
sizes := make(map[string]int64)
err = tree.Files().ForEach(func(f *object.File) error {
- if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
+ if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
return nil
}
@@ -58,7 +72,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
language := analyze.GetCodeLanguage(f.Name, content)
if language == enry.OtherLanguage || language == "" {
- language = "other"
+ return nil
+ }
+
+ // group languages, such as Pug -> HTML; SCSS -> CSS
+ group := enry.GetLanguageGroup(language)
+ if group != "" {
+ language = group
}
sizes[language] += f.Size
@@ -69,8 +89,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
- if len(sizes) == 0 {
- sizes["other"] = 0
+ // filter special languages unless they are the only language
+ if len(sizes) > 1 {
+ for _, language := range specialLanguages {
+ delete(sizes, language)
+ }
}
return sizes, nil
diff --git a/modules/indexer/stats/indexer_test.go b/modules/indexer/stats/indexer_test.go
index b60c6d9bb4..4bcbaa9423 100644
--- a/modules/indexer/stats/indexer_test.go
+++ b/modules/indexer/stats/indexer_test.go
@@ -39,7 +39,5 @@ func TestRepoStatsIndex(t *testing.T) {
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
langs, err := repo.GetTopLanguageStats(5)
assert.NoError(t, err)
- assert.Len(t, langs, 1)
- assert.Equal(t, "other", langs[0].Language)
- assert.Equal(t, float32(100), langs[0].Percentage)
+ assert.Empty(t, langs)
}