aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLauris BH <lauris@nix.lv>2020-05-30 10:46:15 +0300
committerGitHub <noreply@github.com>2020-05-30 10:46:15 +0300
commitea4c139cd2f7e5174627a40aa8a9973fabf508ff (patch)
treebcdcfc9cb15c56799af03bacbae3d25790539a02
parent4395c607ed79985602a99dda251f090fbd2f5cf9 (diff)
downloadgitea-ea4c139cd2f7e5174627a40aa8a9973fabf508ff.tar.gz
gitea-ea4c139cd2f7e5174627a40aa8a9973fabf508ff.zip
Change language statistics to save size instead of percentage (#11681)
* Change language statistics to save size instead of percentage in database Co-Authored-By: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com> * Do not exclude if only language * Fix edge cases with special langauges Co-authored-by: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com>
-rw-r--r--models/migrations/migrations.go2
-rw-r--r--models/migrations/v140.go56
-rw-r--r--models/repo_language_stats.go100
-rw-r--r--modules/git/repo_language_stats.go25
-rw-r--r--modules/indexer/stats/indexer_test.go3
5 files changed, 149 insertions, 37 deletions
diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go
index 00d84da2e8..869661aee4 100644
--- a/models/migrations/migrations.go
+++ b/models/migrations/migrations.go
@@ -212,6 +212,8 @@ var migrations = []Migration{
NewMigration("Add ResolveDoerID to Comment table", addResolveDoerIDCommentColumn),
// v139 -> v140
NewMigration("prepend refs/heads/ to issue refs", prependRefsHeadsToIssueRefs),
+ // v140 -> v141
+ NewMigration("Save detected language file size to database instead of percent", fixLanguageStatsToSaveSize),
}
// GetCurrentDBVersion returns the current db version
diff --git a/models/migrations/v140.go b/models/migrations/v140.go
new file mode 100644
index 0000000000..871d14b84e
--- /dev/null
+++ b/models/migrations/v140.go
@@ -0,0 +1,56 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package migrations
+
+import (
+ "fmt"
+
+ "code.gitea.io/gitea/modules/setting"
+
+ "xorm.io/xorm"
+)
+
+func fixLanguageStatsToSaveSize(x *xorm.Engine) error {
+ // LanguageStat see models/repo_language_stats.go
+ type LanguageStat struct {
+ Size int64 `xorm:"NOT NULL DEFAULT 0"`
+ }
+
+ // RepoIndexerType specifies the repository indexer type
+ type RepoIndexerType int
+
+ const (
+ // RepoIndexerTypeCode code indexer
+ RepoIndexerTypeCode RepoIndexerType = iota // 0
+ // RepoIndexerTypeStats repository stats indexer
+ RepoIndexerTypeStats // 1
+ )
+
+ // RepoIndexerStatus see models/repo_indexer.go
+ type RepoIndexerStatus struct {
+ IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"`
+ }
+
+ if err := x.Sync2(new(LanguageStat)); err != nil {
+ return fmt.Errorf("Sync2: %v", err)
+ }
+
+ x.Delete(&RepoIndexerStatus{IndexerType: RepoIndexerTypeStats})
+
+ // Delete language stat statuses
+ truncExpr := "TRUNCATE TABLE"
+ if setting.Database.UseSQLite3 {
+ truncExpr = "DELETE FROM"
+ }
+
+ // Delete language stats
+ if _, err := x.Exec(fmt.Sprintf("%s language_stat", truncExpr)); err != nil {
+ return err
+ }
+
+ sess := x.NewSession()
+ defer sess.Close()
+ return dropTableColumns(sess, "language_stat", "percentage")
+}
diff --git a/models/repo_language_stats.go b/models/repo_language_stats.go
index 5f1aed1f30..d08782eaf8 100644
--- a/models/repo_language_stats.go
+++ b/models/repo_language_stats.go
@@ -20,11 +20,28 @@ type LanguageStat struct {
CommitID string
IsPrimary bool
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
- Percentage float32 `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"`
+ Percentage float32 `xorm:"-"`
+ Size int64 `xorm:"NOT NULL DEFAULT 0"`
Color string `xorm:"-"`
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
}
+// specialLanguages defines list of languages that are excluded from the calculation
+// unless they are the only language present in repository. Only languages which under
+// normal circumstances are not considered to be code should be listed here.
+var specialLanguages = map[string]struct{}{
+ "XML": {},
+ "JSON": {},
+ "TOML": {},
+ "YAML": {},
+ "INI": {},
+ "SQL": {},
+ "SVG": {},
+ "Text": {},
+ "Markdown": {},
+ "other": {},
+}
+
// LanguageStatList defines a list of language statistics
type LanguageStatList []*LanguageStat
@@ -34,12 +51,53 @@ func (stats LanguageStatList) loadAttributes() {
}
}
+func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
+ langPerc := make(map[string]float32)
+ var otherPerc float32 = 100
+ var total int64
+ // Check that repository has at least one non-special language
+ var skipSpecial bool
+ for _, stat := range stats {
+ if _, ok := specialLanguages[stat.Language]; !ok {
+ skipSpecial = true
+ break
+ }
+ }
+ for _, stat := range stats {
+ // Exclude specific languages from percentage calculation
+ if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
+ continue
+ }
+ total += stat.Size
+ }
+ if total > 0 {
+ for _, stat := range stats {
+ // Exclude specific languages from percentage calculation
+ if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
+ continue
+ }
+ perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
+ if perc <= 0.1 {
+ continue
+ }
+ otherPerc -= perc
+ langPerc[stat.Language] = perc
+ }
+ otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
+ } else {
+ otherPerc = 100
+ }
+ if otherPerc > 0 {
+ langPerc["other"] = otherPerc
+ }
+ return langPerc
+}
+
func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) {
stats := make(LanguageStatList, 0, 6)
- if err := e.Where("`repo_id` = ?", repo.ID).Desc("`percentage`").Find(&stats); err != nil {
+ if err := e.Where("`repo_id` = ?", repo.ID).Desc("`size`").Find(&stats); err != nil {
return nil, err
}
- stats.loadAttributes()
return stats, nil
}
@@ -54,13 +112,18 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
if err != nil {
return nil, err
}
+ perc := stats.getLanguagePercentages()
topstats := make(LanguageStatList, 0, limit)
var other float32
for i := range stats {
+ if _, ok := perc[stats[i].Language]; !ok {
+ continue
+ }
if stats[i].Language == "other" || len(topstats) >= limit {
- other += stats[i].Percentage
+ other += perc[stats[i].Language]
continue
}
+ stats[i].Percentage = perc[stats[i].Language]
topstats = append(topstats, stats[i])
}
if other > 0 {
@@ -71,11 +134,12 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
Percentage: float32(math.Round(float64(other)*10) / 10),
})
}
+ topstats.loadAttributes()
return topstats, nil
}
// UpdateLanguageStats updates the language statistics for repository
-func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]float32) error {
+func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]int64) error {
sess := x.NewSession()
if err := sess.Begin(); err != nil {
return err
@@ -87,15 +151,15 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
return err
}
var topLang string
- var p float32
- for lang, perc := range stats {
- if perc > p {
- p = perc
+ var s int64
+ for lang, size := range stats {
+ if size > s {
+ s = size
topLang = strings.ToLower(lang)
}
}
- for lang, perc := range stats {
+ for lang, size := range stats {
upd := false
llang := strings.ToLower(lang)
for _, s := range oldstats {
@@ -103,8 +167,8 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
if strings.ToLower(s.Language) == llang {
s.CommitID = commitID
s.IsPrimary = llang == topLang
- s.Percentage = perc
- if _, err := sess.ID(s.ID).Cols("`commit_id`", "`percentage`", "`is_primary`").Update(s); err != nil {
+ s.Size = size
+ if _, err := sess.ID(s.ID).Cols("`commit_id`", "`size`", "`is_primary`").Update(s); err != nil {
return err
}
upd = true
@@ -114,11 +178,11 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
// Insert new language
if !upd {
if _, err := sess.Insert(&LanguageStat{
- RepoID: repo.ID,
- CommitID: commitID,
- IsPrimary: llang == topLang,
- Language: lang,
- Percentage: perc,
+ RepoID: repo.ID,
+ CommitID: commitID,
+ IsPrimary: llang == topLang,
+ Language: lang,
+ Size: size,
}); err != nil {
return err
}
@@ -153,7 +217,7 @@ func CopyLanguageStat(originalRepo, destRepo *Repository) error {
return err
}
RepoLang := make(LanguageStatList, 0, 6)
- if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`percentage`").Find(&RepoLang); err != nil {
+ if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`size`").Find(&RepoLang); err != nil {
return err
}
if len(RepoLang) > 0 {
diff --git a/modules/git/repo_language_stats.go b/modules/git/repo_language_stats.go
index 8ff8fa20c1..d623d6f57d 100644
--- a/modules/git/repo_language_stats.go
+++ b/modules/git/repo_language_stats.go
@@ -8,7 +8,6 @@ import (
"bytes"
"io"
"io/ioutil"
- "math"
"code.gitea.io/gitea/modules/analyze"
@@ -21,7 +20,7 @@ import (
const fileSizeLimit int64 = 16 * 1024 * 1024
// GetLanguageStats calculates language stats for git repository at specified commit
-func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) {
+func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
r, err := git.PlainOpen(repo.Path)
if err != nil {
return nil, err
@@ -43,7 +42,6 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
}
sizes := make(map[string]int64)
- var total int64
err = tree.Files().ForEach(func(f *object.File) error {
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
@@ -60,11 +58,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
language := analyze.GetCodeLanguage(f.Name, content)
if language == enry.OtherLanguage || language == "" {
- return nil
+ language = "other"
}
sizes[language] += f.Size
- total += f.Size
return nil
})
@@ -72,21 +69,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
return nil, err
}
- stats := make(map[string]float32)
- var otherPerc float32 = 100
- for language, size := range sizes {
- perc := float32(math.Round(float64(size)/float64(total)*1000) / 10)
- if perc <= 0.1 {
- continue
- }
- otherPerc -= perc
- stats[language] = perc
+ if len(sizes) == 0 {
+ sizes["other"] = 0
}
- otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
- if otherPerc > 0 {
- stats["other"] = otherPerc
- }
- return stats, nil
+
+ return sizes, nil
}
func readFile(f *object.File, limit int64) ([]byte, error) {
diff --git a/modules/indexer/stats/indexer_test.go b/modules/indexer/stats/indexer_test.go
index 29d0f6dbe4..b60c6d9bb4 100644
--- a/modules/indexer/stats/indexer_test.go
+++ b/modules/indexer/stats/indexer_test.go
@@ -34,6 +34,9 @@ func TestRepoStatsIndex(t *testing.T) {
repo, err := models.GetRepositoryByID(1)
assert.NoError(t, err)
+ status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats)
+ assert.NoError(t, err)
+ assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
langs, err := repo.GetTopLanguageStats(5)
assert.NoError(t, err)
assert.Len(t, langs, 1)