diff options
author | Lauris BH <lauris@nix.lv> | 2020-02-11 11:34:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-02-11 11:34:17 +0200 |
commit | ad2642a8aac9facb217a8471df1d3e00f1214e92 (patch) | |
tree | ea198b2b3130d22bb60886b6ba0a1df352f160ff /modules | |
parent | 37892be63580e40ced80e041ff2e7dabb2e80866 (diff) | |
download | gitea-ad2642a8aac9facb217a8471df1d3e00f1214e92.tar.gz gitea-ad2642a8aac9facb217a8471df1d3e00f1214e92.zip |
Language statistics bar for repositories (#8037)
* Implementation for calculating language statistics
Impement saving code language statistics to database
Implement rendering langauge stats
Add primary laguage to show in repository list
Implement repository stats indexer queue
Add indexer test
Refactor to use queue module
* Do not timeout for queues
Diffstat (limited to 'modules')
-rw-r--r-- | modules/git/repo_language_stats.go | 116 | ||||
-rw-r--r-- | modules/indexer/code/bleve.go | 2 | ||||
-rw-r--r-- | modules/indexer/code/git.go | 7 | ||||
-rw-r--r-- | modules/indexer/code/queue.go | 2 | ||||
-rw-r--r-- | modules/indexer/stats/db.go | 54 | ||||
-rw-r--r-- | modules/indexer/stats/indexer.go | 85 | ||||
-rw-r--r-- | modules/indexer/stats/indexer_test.go | 42 | ||||
-rw-r--r-- | modules/indexer/stats/queue.go | 43 | ||||
-rw-r--r-- | modules/notification/indexer/indexer.go | 7 |
9 files changed, 353 insertions, 5 deletions
diff --git a/modules/git/repo_language_stats.go b/modules/git/repo_language_stats.go new file mode 100644 index 0000000000..ffe6dd0848 --- /dev/null +++ b/modules/git/repo_language_stats.go @@ -0,0 +1,116 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package git + +import ( + "bytes" + "io" + "io/ioutil" + "math" + "path/filepath" + + "github.com/src-d/enry/v2" + "gopkg.in/src-d/go-git.v4" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" +) + +const fileSizeLimit int64 = 16 * 1024 * 1024 + +// GetLanguageStats calculates language stats for git repository at specified commit +func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) { + r, err := git.PlainOpen(repo.Path) + if err != nil { + return nil, err + } + + rev, err := r.ResolveRevision(plumbing.Revision(commitID)) + if err != nil { + return nil, err + } + + commit, err := r.CommitObject(*rev) + if err != nil { + return nil, err + } + + tree, err := commit.Tree() + if err != nil { + return nil, err + } + + sizes := make(map[string]int64) + var total int64 + err = tree.Files().ForEach(func(f *object.File) error { + if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || + enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { + return nil + } + + // TODO: Use .gitattributes file for linguist overrides + + language, ok := enry.GetLanguageByExtension(f.Name) + if !ok { + if language, ok = enry.GetLanguageByFilename(f.Name); !ok { + content, err := readFile(f, fileSizeLimit) + if err != nil { + return nil + } + + language = enry.GetLanguage(filepath.Base(f.Name), content) + if language == enry.OtherLanguage { + return nil + } + } + } + + if language != "" { + sizes[language] += f.Size + total += f.Size + } + + return nil + }) + if err != nil { + return nil, err + } + + stats := make(map[string]float32) + var otherPerc float32 = 100 + for language, size := range sizes { + perc := float32(math.Round(float64(size)/float64(total)*1000) / 10) + if perc <= 0.1 { + continue + } + otherPerc -= perc + stats[language] = perc + } + otherPerc = float32(math.Round(float64(otherPerc)*10) / 10) + if otherPerc > 0 { + stats["other"] = otherPerc + } + return stats, nil +} + +func readFile(f *object.File, limit int64) ([]byte, error) { + r, err := f.Reader() + if err != nil { + return nil, err + } + defer r.Close() + + if limit <= 0 { + return ioutil.ReadAll(r) + } + + size := f.Size + if limit > 0 && size > limit { + size = limit + } + buf := bytes.NewBuffer(nil) + buf.Grow(int(size)) + _, err = io.Copy(buf, io.LimitReader(r, limit)) + return buf.Bytes(), err +} diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 339dca74a1..6052304f83 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -267,7 +267,7 @@ func (b *BleveIndexer) Index(repoID int64) error { if err = batch.Flush(); err != nil { return err } - return repo.UpdateIndexerStatus(sha) + return repo.UpdateIndexerStatus(models.RepoIndexerTypeCode, sha) } // Delete deletes indexes by ids diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 114d5a9e6d..37ab5ac3d3 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -35,11 +35,12 @@ func getDefaultBranchSha(repo *models.Repository) (string, error) { // getRepoChanges returns changes to repo since last indexer update func getRepoChanges(repo *models.Repository, revision string) (*repoChanges, error) { - if err := repo.GetIndexerStatus(); err != nil { + status, err := repo.GetIndexerStatus(models.RepoIndexerTypeCode) + if err != nil { return nil, err } - if len(repo.IndexerStatus.CommitSha) == 0 { + if len(status.CommitSha) == 0 { return genesisChanges(repo, revision) } return nonGenesisChanges(repo, revision) @@ -98,7 +99,7 @@ func genesisChanges(repo *models.Repository, revision string) (*repoChanges, err // nonGenesisChanges get changes since the previous indexer update func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges, error) { diffCmd := git.NewCommand("diff", "--name-status", - repo.IndexerStatus.CommitSha, revision) + repo.CodeIndexerStatus.CommitSha, revision) stdout, err := diffCmd.RunInDir(repo.RepoPath()) if err != nil { // previous commit sha may have been removed by a force push, so diff --git a/modules/indexer/code/queue.go b/modules/indexer/code/queue.go index 4eeb6ac7d4..94675559ea 100644 --- a/modules/indexer/code/queue.go +++ b/modules/indexer/code/queue.go @@ -109,7 +109,7 @@ func populateRepoIndexer() { return default: } - ids, err := models.GetUnindexedRepos(maxRepoID, 0, 50) + ids, err := models.GetUnindexedRepos(models.RepoIndexerTypeCode, maxRepoID, 0, 50) if err != nil { log.Error("populateRepoIndexer: %v", err) return diff --git a/modules/indexer/stats/db.go b/modules/indexer/stats/db.go new file mode 100644 index 0000000000..fe219b443f --- /dev/null +++ b/modules/indexer/stats/db.go @@ -0,0 +1,54 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package stats + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/git" +) + +// DBIndexer implements Indexer interface to use database's like search +type DBIndexer struct { +} + +// Index repository status function +func (db *DBIndexer) Index(id int64) error { + repo, err := models.GetRepositoryByID(id) + if err != nil { + return err + } + status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats) + if err != nil { + return err + } + + gitRepo, err := git.OpenRepository(repo.RepoPath()) + if err != nil { + return err + } + defer gitRepo.Close() + + // Get latest commit for default branch + commitID, err := gitRepo.GetBranchCommitID(repo.DefaultBranch) + if err != nil { + return err + } + + // Do not recalculate stats if already calculated for this commit + if status.CommitSha == commitID { + return nil + } + + // Calculate and save language statistics to database + stats, err := gitRepo.GetLanguageStats(commitID) + if err != nil { + return err + } + return repo.UpdateLanguageStats(commitID, stats) +} + +// Close dummy function +func (db *DBIndexer) Close() { +} diff --git a/modules/indexer/stats/indexer.go b/modules/indexer/stats/indexer.go new file mode 100644 index 0000000000..4d8a174ff9 --- /dev/null +++ b/modules/indexer/stats/indexer.go @@ -0,0 +1,85 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package stats + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/log" +) + +// Indexer defines an interface to index repository stats +type Indexer interface { + Index(id int64) error + Close() +} + +// indexer represents a indexer instance +var indexer Indexer + +// Init initialize the repo indexer +func Init() error { + indexer = &DBIndexer{} + + if err := initStatsQueue(); err != nil { + return err + } + + go populateRepoIndexer() + + return nil +} + +// populateRepoIndexer populate the repo indexer with pre-existing data. This +// should only be run when the indexer is created for the first time. +func populateRepoIndexer() { + log.Info("Populating the repo stats indexer with existing repositories") + + isShutdown := graceful.GetManager().IsShutdown() + + exist, err := models.IsTableNotEmpty("repository") + if err != nil { + log.Fatal("System error: %v", err) + } else if !exist { + return + } + + var maxRepoID int64 + if maxRepoID, err = models.GetMaxID("repository"); err != nil { + log.Fatal("System error: %v", err) + } + + // start with the maximum existing repo ID and work backwards, so that we + // don't include repos that are created after gitea starts; such repos will + // already be added to the indexer, and we don't need to add them again. + for maxRepoID > 0 { + select { + case <-isShutdown: + log.Info("Repository Stats Indexer population shutdown before completion") + return + default: + } + ids, err := models.GetUnindexedRepos(models.RepoIndexerTypeStats, maxRepoID, 0, 50) + if err != nil { + log.Error("populateRepoIndexer: %v", err) + return + } else if len(ids) == 0 { + break + } + for _, id := range ids { + select { + case <-isShutdown: + log.Info("Repository Stats Indexer population shutdown before completion") + return + default: + } + if err := statsQueue.Push(id); err != nil { + log.Error("statsQueue.Push: %v", err) + } + maxRepoID = id - 1 + } + } + log.Info("Done (re)populating the repo stats indexer with existing repositories") +} diff --git a/modules/indexer/stats/indexer_test.go b/modules/indexer/stats/indexer_test.go new file mode 100644 index 0000000000..29d0f6dbe4 --- /dev/null +++ b/modules/indexer/stats/indexer_test.go @@ -0,0 +1,42 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package stats + +import ( + "path/filepath" + "testing" + "time" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/setting" + + "gopkg.in/ini.v1" + + "github.com/stretchr/testify/assert" +) + +func TestMain(m *testing.M) { + models.MainTest(m, filepath.Join("..", "..", "..")) +} + +func TestRepoStatsIndex(t *testing.T) { + assert.NoError(t, models.PrepareTestDatabase()) + setting.Cfg = ini.Empty() + + setting.NewQueueService() + + err := Init() + assert.NoError(t, err) + + time.Sleep(5 * time.Second) + + repo, err := models.GetRepositoryByID(1) + assert.NoError(t, err) + langs, err := repo.GetTopLanguageStats(5) + assert.NoError(t, err) + assert.Len(t, langs, 1) + assert.Equal(t, "other", langs[0].Language) + assert.Equal(t, float32(100), langs[0].Percentage) +} diff --git a/modules/indexer/stats/queue.go b/modules/indexer/stats/queue.go new file mode 100644 index 0000000000..43a4de5ac9 --- /dev/null +++ b/modules/indexer/stats/queue.go @@ -0,0 +1,43 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package stats + +import ( + "fmt" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/queue" +) + +// statsQueue represents a queue to handle repository stats updates +var statsQueue queue.Queue + +// handle passed PR IDs and test the PRs +func handle(data ...queue.Data) { + for _, datum := range data { + opts := datum.(int64) + if err := indexer.Index(opts); err != nil { + log.Error("stats queue idexer.Index(%d) failed: %v", opts, err) + } + } +} + +func initStatsQueue() error { + statsQueue = queue.CreateQueue("repo_stats_update", handle, int64(0)).(queue.Queue) + if statsQueue == nil { + return fmt.Errorf("Unable to create repo_stats_update Queue") + } + + go graceful.GetManager().RunWithShutdownFns(statsQueue.Run) + + return nil +} + +// UpdateRepoIndexer update a repository's entries in the indexer +func UpdateRepoIndexer(repo *models.Repository) error { + return statsQueue.Push(repo.ID) +} diff --git a/modules/notification/indexer/indexer.go b/modules/notification/indexer/indexer.go index 4bce990739..6caae6fa65 100644 --- a/modules/notification/indexer/indexer.go +++ b/modules/notification/indexer/indexer.go @@ -9,6 +9,7 @@ import ( "code.gitea.io/gitea/modules/git" code_indexer "code.gitea.io/gitea/modules/indexer/code" issue_indexer "code.gitea.io/gitea/modules/indexer/issues" + stats_indexer "code.gitea.io/gitea/modules/indexer/stats" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification/base" "code.gitea.io/gitea/modules/repository" @@ -117,12 +118,18 @@ func (r *indexerNotifier) NotifyMigrateRepository(doer *models.User, u *models.U if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { code_indexer.UpdateRepoIndexer(repo) } + if err := stats_indexer.UpdateRepoIndexer(repo); err != nil { + log.Error("stats_indexer.UpdateRepoIndexer(%d) failed: %v", repo.ID, err) + } } func (r *indexerNotifier) NotifyPushCommits(pusher *models.User, repo *models.Repository, refName, oldCommitID, newCommitID string, commits *repository.PushCommits) { if setting.Indexer.RepoIndexerEnabled && refName == git.BranchPrefix+repo.DefaultBranch { code_indexer.UpdateRepoIndexer(repo) } + if err := stats_indexer.UpdateRepoIndexer(repo); err != nil { + log.Error("stats_indexer.UpdateRepoIndexer(%d) failed: %v", repo.ID, err) + } } func (r *indexerNotifier) NotifyIssueChangeContent(doer *models.User, issue *models.Issue, oldContent string) { |