* Change language statistics to save size instead of percentage (#11681) * Change language statistics to save size instead of percentage in database Co-Authored-By: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com> * Do not exclude if only language * Fix edge cases with special langauges Co-authored-by: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com> * Fix language stat calculation (#11692) * Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code Co-authored-by: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com>tags/v1.12.0-rc2
@@ -212,6 +212,8 @@ var migrations = []Migration{ | |||
NewMigration("Add ResolveDoerID to Comment table", addResolveDoerIDCommentColumn), | |||
// v139 -> v140 | |||
NewMigration("prepend refs/heads/ to issue refs", prependRefsHeadsToIssueRefs), | |||
// v140 -> v141 | |||
NewMigration("Save detected language file size to database instead of percent", fixLanguageStatsToSaveSize), | |||
} | |||
// GetCurrentDBVersion returns the current db version |
@@ -0,0 +1,56 @@ | |||
// Copyright 2020 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package migrations | |||
import ( | |||
"fmt" | |||
"code.gitea.io/gitea/modules/setting" | |||
"xorm.io/xorm" | |||
) | |||
func fixLanguageStatsToSaveSize(x *xorm.Engine) error { | |||
// LanguageStat see models/repo_language_stats.go | |||
type LanguageStat struct { | |||
Size int64 `xorm:"NOT NULL DEFAULT 0"` | |||
} | |||
// RepoIndexerType specifies the repository indexer type | |||
type RepoIndexerType int | |||
const ( | |||
// RepoIndexerTypeCode code indexer | |||
RepoIndexerTypeCode RepoIndexerType = iota // 0 | |||
// RepoIndexerTypeStats repository stats indexer | |||
RepoIndexerTypeStats // 1 | |||
) | |||
// RepoIndexerStatus see models/repo_indexer.go | |||
type RepoIndexerStatus struct { | |||
IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"` | |||
} | |||
if err := x.Sync2(new(LanguageStat)); err != nil { | |||
return fmt.Errorf("Sync2: %v", err) | |||
} | |||
x.Delete(&RepoIndexerStatus{IndexerType: RepoIndexerTypeStats}) | |||
// Delete language stat statuses | |||
truncExpr := "TRUNCATE TABLE" | |||
if setting.Database.UseSQLite3 { | |||
truncExpr = "DELETE FROM" | |||
} | |||
// Delete language stats | |||
if _, err := x.Exec(fmt.Sprintf("%s language_stat", truncExpr)); err != nil { | |||
return err | |||
} | |||
sess := x.NewSession() | |||
defer sess.Close() | |||
return dropTableColumns(sess, "language_stat", "percentage") | |||
} |
@@ -20,7 +20,8 @@ type LanguageStat struct { | |||
CommitID string | |||
IsPrimary bool | |||
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"` | |||
Percentage float32 `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"` | |||
Percentage float32 `xorm:"-"` | |||
Size int64 `xorm:"NOT NULL DEFAULT 0"` | |||
Color string `xorm:"-"` | |||
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"` | |||
} | |||
@@ -34,12 +35,36 @@ func (stats LanguageStatList) loadAttributes() { | |||
} | |||
} | |||
func (stats LanguageStatList) getLanguagePercentages() map[string]float32 { | |||
langPerc := make(map[string]float32) | |||
var otherPerc float32 = 100 | |||
var total int64 | |||
for _, stat := range stats { | |||
total += stat.Size | |||
} | |||
if total > 0 { | |||
for _, stat := range stats { | |||
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10) | |||
if perc <= 0.1 { | |||
continue | |||
} | |||
otherPerc -= perc | |||
langPerc[stat.Language] = perc | |||
} | |||
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10) | |||
} | |||
if otherPerc > 0 { | |||
langPerc["other"] = otherPerc | |||
} | |||
return langPerc | |||
} | |||
func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) { | |||
stats := make(LanguageStatList, 0, 6) | |||
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`percentage`").Find(&stats); err != nil { | |||
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`size`").Find(&stats); err != nil { | |||
return nil, err | |||
} | |||
stats.loadAttributes() | |||
return stats, nil | |||
} | |||
@@ -54,13 +79,18 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error) | |||
if err != nil { | |||
return nil, err | |||
} | |||
perc := stats.getLanguagePercentages() | |||
topstats := make(LanguageStatList, 0, limit) | |||
var other float32 | |||
for i := range stats { | |||
if _, ok := perc[stats[i].Language]; !ok { | |||
continue | |||
} | |||
if stats[i].Language == "other" || len(topstats) >= limit { | |||
other += stats[i].Percentage | |||
other += perc[stats[i].Language] | |||
continue | |||
} | |||
stats[i].Percentage = perc[stats[i].Language] | |||
topstats = append(topstats, stats[i]) | |||
} | |||
if other > 0 { | |||
@@ -71,11 +101,12 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error) | |||
Percentage: float32(math.Round(float64(other)*10) / 10), | |||
}) | |||
} | |||
topstats.loadAttributes() | |||
return topstats, nil | |||
} | |||
// UpdateLanguageStats updates the language statistics for repository | |||
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]float32) error { | |||
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]int64) error { | |||
sess := x.NewSession() | |||
if err := sess.Begin(); err != nil { | |||
return err | |||
@@ -87,15 +118,15 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl | |||
return err | |||
} | |||
var topLang string | |||
var p float32 | |||
for lang, perc := range stats { | |||
if perc > p { | |||
p = perc | |||
var s int64 | |||
for lang, size := range stats { | |||
if size > s { | |||
s = size | |||
topLang = strings.ToLower(lang) | |||
} | |||
} | |||
for lang, perc := range stats { | |||
for lang, size := range stats { | |||
upd := false | |||
llang := strings.ToLower(lang) | |||
for _, s := range oldstats { | |||
@@ -103,8 +134,8 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl | |||
if strings.ToLower(s.Language) == llang { | |||
s.CommitID = commitID | |||
s.IsPrimary = llang == topLang | |||
s.Percentage = perc | |||
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`percentage`", "`is_primary`").Update(s); err != nil { | |||
s.Size = size | |||
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`size`", "`is_primary`").Update(s); err != nil { | |||
return err | |||
} | |||
upd = true | |||
@@ -114,11 +145,11 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl | |||
// Insert new language | |||
if !upd { | |||
if _, err := sess.Insert(&LanguageStat{ | |||
RepoID: repo.ID, | |||
CommitID: commitID, | |||
IsPrimary: llang == topLang, | |||
Language: lang, | |||
Percentage: perc, | |||
RepoID: repo.ID, | |||
CommitID: commitID, | |||
IsPrimary: llang == topLang, | |||
Language: lang, | |||
Size: size, | |||
}); err != nil { | |||
return err | |||
} | |||
@@ -153,7 +184,7 @@ func CopyLanguageStat(originalRepo, destRepo *Repository) error { | |||
return err | |||
} | |||
RepoLang := make(LanguageStatList, 0, 6) | |||
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`percentage`").Find(&RepoLang); err != nil { | |||
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`size`").Find(&RepoLang); err != nil { | |||
return err | |||
} | |||
if len(RepoLang) > 0 { |
@@ -8,7 +8,6 @@ import ( | |||
"bytes" | |||
"io" | |||
"io/ioutil" | |||
"math" | |||
"code.gitea.io/gitea/modules/analyze" | |||
@@ -20,8 +19,22 @@ import ( | |||
const fileSizeLimit int64 = 16 * 1024 * 1024 | |||
// specialLanguages defines list of languages that are excluded from the calculation | |||
// unless they are the only language present in repository. Only languages which under | |||
// normal circumstances are not considered to be code should be listed here. | |||
var specialLanguages = []string{ | |||
"XML", | |||
"JSON", | |||
"TOML", | |||
"YAML", | |||
"INI", | |||
"SVG", | |||
"Text", | |||
"Markdown", | |||
} | |||
// GetLanguageStats calculates language stats for git repository at specified commit | |||
func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) { | |||
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { | |||
r, err := git.PlainOpen(repo.Path) | |||
if err != nil { | |||
return nil, err | |||
@@ -43,9 +56,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e | |||
} | |||
sizes := make(map[string]int64) | |||
var total int64 | |||
err = tree.Files().ForEach(func(f *object.File) error { | |||
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | |||
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | |||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { | |||
return nil | |||
} | |||
@@ -63,8 +75,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e | |||
return nil | |||
} | |||
// group languages, such as Pug -> HTML; SCSS -> CSS | |||
group := enry.GetLanguageGroup(language) | |||
if group != "" { | |||
language = group | |||
} | |||
sizes[language] += f.Size | |||
total += f.Size | |||
return nil | |||
}) | |||
@@ -72,21 +89,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e | |||
return nil, err | |||
} | |||
stats := make(map[string]float32) | |||
var otherPerc float32 = 100 | |||
for language, size := range sizes { | |||
perc := float32(math.Round(float64(size)/float64(total)*1000) / 10) | |||
if perc <= 0.1 { | |||
continue | |||
// filter special languages unless they are the only language | |||
if len(sizes) > 1 { | |||
for _, language := range specialLanguages { | |||
delete(sizes, language) | |||
} | |||
otherPerc -= perc | |||
stats[language] = perc | |||
} | |||
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10) | |||
if otherPerc > 0 { | |||
stats["other"] = otherPerc | |||
} | |||
return stats, nil | |||
return sizes, nil | |||
} | |||
func readFile(f *object.File, limit int64) ([]byte, error) { |
@@ -34,9 +34,10 @@ func TestRepoStatsIndex(t *testing.T) { | |||
repo, err := models.GetRepositoryByID(1) | |||
assert.NoError(t, err) | |||
status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats) | |||
assert.NoError(t, err) | |||
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha) | |||
langs, err := repo.GetTopLanguageStats(5) | |||
assert.NoError(t, err) | |||
assert.Len(t, langs, 1) | |||
assert.Equal(t, "other", langs[0].Language) | |||
assert.Equal(t, float32(100), langs[0].Percentage) | |||
assert.Empty(t, langs) | |||
} |