summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
Diffstat (limited to 'modules')
-rw-r--r--modules/git/repo_language_stats.go116
-rw-r--r--modules/indexer/code/bleve.go2
-rw-r--r--modules/indexer/code/git.go7
-rw-r--r--modules/indexer/code/queue.go2
-rw-r--r--modules/indexer/stats/db.go54
-rw-r--r--modules/indexer/stats/indexer.go85
-rw-r--r--modules/indexer/stats/indexer_test.go42
-rw-r--r--modules/indexer/stats/queue.go43
-rw-r--r--modules/notification/indexer/indexer.go7
9 files changed, 353 insertions, 5 deletions
diff --git a/modules/git/repo_language_stats.go b/modules/git/repo_language_stats.go
new file mode 100644
index 0000000000..ffe6dd0848
--- /dev/null
+++ b/modules/git/repo_language_stats.go
@@ -0,0 +1,116 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package git
+
+import (
+ "bytes"
+ "io"
+ "io/ioutil"
+ "math"
+ "path/filepath"
+
+ "github.com/src-d/enry/v2"
+ "gopkg.in/src-d/go-git.v4"
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/object"
+)
+
+const fileSizeLimit int64 = 16 * 1024 * 1024
+
+// GetLanguageStats calculates language stats for git repository at specified commit
+func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) {
+ r, err := git.PlainOpen(repo.Path)
+ if err != nil {
+ return nil, err
+ }
+
+ rev, err := r.ResolveRevision(plumbing.Revision(commitID))
+ if err != nil {
+ return nil, err
+ }
+
+ commit, err := r.CommitObject(*rev)
+ if err != nil {
+ return nil, err
+ }
+
+ tree, err := commit.Tree()
+ if err != nil {
+ return nil, err
+ }
+
+ sizes := make(map[string]int64)
+ var total int64
+ err = tree.Files().ForEach(func(f *object.File) error {
+ if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
+ enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
+ return nil
+ }
+
+ // TODO: Use .gitattributes file for linguist overrides
+
+ language, ok := enry.GetLanguageByExtension(f.Name)
+ if !ok {
+ if language, ok = enry.GetLanguageByFilename(f.Name); !ok {
+ content, err := readFile(f, fileSizeLimit)
+ if err != nil {
+ return nil
+ }
+
+ language = enry.GetLanguage(filepath.Base(f.Name), content)
+ if language == enry.OtherLanguage {
+ return nil
+ }
+ }
+ }
+
+ if language != "" {
+ sizes[language] += f.Size
+ total += f.Size
+ }
+
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ stats := make(map[string]float32)
+ var otherPerc float32 = 100
+ for language, size := range sizes {
+ perc := float32(math.Round(float64(size)/float64(total)*1000) / 10)
+ if perc <= 0.1 {
+ continue
+ }
+ otherPerc -= perc
+ stats[language] = perc
+ }
+ otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
+ if otherPerc > 0 {
+ stats["other"] = otherPerc
+ }
+ return stats, nil
+}
+
+func readFile(f *object.File, limit int64) ([]byte, error) {
+ r, err := f.Reader()
+ if err != nil {
+ return nil, err
+ }
+ defer r.Close()
+
+ if limit <= 0 {
+ return ioutil.ReadAll(r)
+ }
+
+ size := f.Size
+ if limit > 0 && size > limit {
+ size = limit
+ }
+ buf := bytes.NewBuffer(nil)
+ buf.Grow(int(size))
+ _, err = io.Copy(buf, io.LimitReader(r, limit))
+ return buf.Bytes(), err
+}
diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go
index 339dca74a1..6052304f83 100644
--- a/modules/indexer/code/bleve.go
+++ b/modules/indexer/code/bleve.go
@@ -267,7 +267,7 @@ func (b *BleveIndexer) Index(repoID int64) error {
if err = batch.Flush(); err != nil {
return err
}
- return repo.UpdateIndexerStatus(sha)
+ return repo.UpdateIndexerStatus(models.RepoIndexerTypeCode, sha)
}
// Delete deletes indexes by ids
diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go
index 114d5a9e6d..37ab5ac3d3 100644
--- a/modules/indexer/code/git.go
+++ b/modules/indexer/code/git.go
@@ -35,11 +35,12 @@ func getDefaultBranchSha(repo *models.Repository) (string, error) {
// getRepoChanges returns changes to repo since last indexer update
func getRepoChanges(repo *models.Repository, revision string) (*repoChanges, error) {
- if err := repo.GetIndexerStatus(); err != nil {
+ status, err := repo.GetIndexerStatus(models.RepoIndexerTypeCode)
+ if err != nil {
return nil, err
}
- if len(repo.IndexerStatus.CommitSha) == 0 {
+ if len(status.CommitSha) == 0 {
return genesisChanges(repo, revision)
}
return nonGenesisChanges(repo, revision)
@@ -98,7 +99,7 @@ func genesisChanges(repo *models.Repository, revision string) (*repoChanges, err
// nonGenesisChanges get changes since the previous indexer update
func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
diffCmd := git.NewCommand("diff", "--name-status",
- repo.IndexerStatus.CommitSha, revision)
+ repo.CodeIndexerStatus.CommitSha, revision)
stdout, err := diffCmd.RunInDir(repo.RepoPath())
if err != nil {
// previous commit sha may have been removed by a force push, so
diff --git a/modules/indexer/code/queue.go b/modules/indexer/code/queue.go
index 4eeb6ac7d4..94675559ea 100644
--- a/modules/indexer/code/queue.go
+++ b/modules/indexer/code/queue.go
@@ -109,7 +109,7 @@ func populateRepoIndexer() {
return
default:
}
- ids, err := models.GetUnindexedRepos(maxRepoID, 0, 50)
+ ids, err := models.GetUnindexedRepos(models.RepoIndexerTypeCode, maxRepoID, 0, 50)
if err != nil {
log.Error("populateRepoIndexer: %v", err)
return
diff --git a/modules/indexer/stats/db.go b/modules/indexer/stats/db.go
new file mode 100644
index 0000000000..fe219b443f
--- /dev/null
+++ b/modules/indexer/stats/db.go
@@ -0,0 +1,54 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/git"
+)
+
+// DBIndexer implements Indexer interface to use database's like search
+type DBIndexer struct {
+}
+
+// Index repository status function
+func (db *DBIndexer) Index(id int64) error {
+ repo, err := models.GetRepositoryByID(id)
+ if err != nil {
+ return err
+ }
+ status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats)
+ if err != nil {
+ return err
+ }
+
+ gitRepo, err := git.OpenRepository(repo.RepoPath())
+ if err != nil {
+ return err
+ }
+ defer gitRepo.Close()
+
+ // Get latest commit for default branch
+ commitID, err := gitRepo.GetBranchCommitID(repo.DefaultBranch)
+ if err != nil {
+ return err
+ }
+
+ // Do not recalculate stats if already calculated for this commit
+ if status.CommitSha == commitID {
+ return nil
+ }
+
+ // Calculate and save language statistics to database
+ stats, err := gitRepo.GetLanguageStats(commitID)
+ if err != nil {
+ return err
+ }
+ return repo.UpdateLanguageStats(commitID, stats)
+}
+
+// Close dummy function
+func (db *DBIndexer) Close() {
+}
diff --git a/modules/indexer/stats/indexer.go b/modules/indexer/stats/indexer.go
new file mode 100644
index 0000000000..4d8a174ff9
--- /dev/null
+++ b/modules/indexer/stats/indexer.go
@@ -0,0 +1,85 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/graceful"
+ "code.gitea.io/gitea/modules/log"
+)
+
+// Indexer defines an interface to index repository stats
+type Indexer interface {
+ Index(id int64) error
+ Close()
+}
+
+// indexer represents a indexer instance
+var indexer Indexer
+
+// Init initialize the repo indexer
+func Init() error {
+ indexer = &DBIndexer{}
+
+ if err := initStatsQueue(); err != nil {
+ return err
+ }
+
+ go populateRepoIndexer()
+
+ return nil
+}
+
+// populateRepoIndexer populate the repo indexer with pre-existing data. This
+// should only be run when the indexer is created for the first time.
+func populateRepoIndexer() {
+ log.Info("Populating the repo stats indexer with existing repositories")
+
+ isShutdown := graceful.GetManager().IsShutdown()
+
+ exist, err := models.IsTableNotEmpty("repository")
+ if err != nil {
+ log.Fatal("System error: %v", err)
+ } else if !exist {
+ return
+ }
+
+ var maxRepoID int64
+ if maxRepoID, err = models.GetMaxID("repository"); err != nil {
+ log.Fatal("System error: %v", err)
+ }
+
+ // start with the maximum existing repo ID and work backwards, so that we
+ // don't include repos that are created after gitea starts; such repos will
+ // already be added to the indexer, and we don't need to add them again.
+ for maxRepoID > 0 {
+ select {
+ case <-isShutdown:
+ log.Info("Repository Stats Indexer population shutdown before completion")
+ return
+ default:
+ }
+ ids, err := models.GetUnindexedRepos(models.RepoIndexerTypeStats, maxRepoID, 0, 50)
+ if err != nil {
+ log.Error("populateRepoIndexer: %v", err)
+ return
+ } else if len(ids) == 0 {
+ break
+ }
+ for _, id := range ids {
+ select {
+ case <-isShutdown:
+ log.Info("Repository Stats Indexer population shutdown before completion")
+ return
+ default:
+ }
+ if err := statsQueue.Push(id); err != nil {
+ log.Error("statsQueue.Push: %v", err)
+ }
+ maxRepoID = id - 1
+ }
+ }
+ log.Info("Done (re)populating the repo stats indexer with existing repositories")
+}
diff --git a/modules/indexer/stats/indexer_test.go b/modules/indexer/stats/indexer_test.go
new file mode 100644
index 0000000000..29d0f6dbe4
--- /dev/null
+++ b/modules/indexer/stats/indexer_test.go
@@ -0,0 +1,42 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "path/filepath"
+ "testing"
+ "time"
+
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/setting"
+
+ "gopkg.in/ini.v1"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMain(m *testing.M) {
+ models.MainTest(m, filepath.Join("..", "..", ".."))
+}
+
+func TestRepoStatsIndex(t *testing.T) {
+ assert.NoError(t, models.PrepareTestDatabase())
+ setting.Cfg = ini.Empty()
+
+ setting.NewQueueService()
+
+ err := Init()
+ assert.NoError(t, err)
+
+ time.Sleep(5 * time.Second)
+
+ repo, err := models.GetRepositoryByID(1)
+ assert.NoError(t, err)
+ langs, err := repo.GetTopLanguageStats(5)
+ assert.NoError(t, err)
+ assert.Len(t, langs, 1)
+ assert.Equal(t, "other", langs[0].Language)
+ assert.Equal(t, float32(100), langs[0].Percentage)
+}
diff --git a/modules/indexer/stats/queue.go b/modules/indexer/stats/queue.go
new file mode 100644
index 0000000000..43a4de5ac9
--- /dev/null
+++ b/modules/indexer/stats/queue.go
@@ -0,0 +1,43 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "fmt"
+
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/graceful"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/queue"
+)
+
+// statsQueue represents a queue to handle repository stats updates
+var statsQueue queue.Queue
+
+// handle passed PR IDs and test the PRs
+func handle(data ...queue.Data) {
+ for _, datum := range data {
+ opts := datum.(int64)
+ if err := indexer.Index(opts); err != nil {
+ log.Error("stats queue idexer.Index(%d) failed: %v", opts, err)
+ }
+ }
+}
+
+func initStatsQueue() error {
+ statsQueue = queue.CreateQueue("repo_stats_update", handle, int64(0)).(queue.Queue)
+ if statsQueue == nil {
+ return fmt.Errorf("Unable to create repo_stats_update Queue")
+ }
+
+ go graceful.GetManager().RunWithShutdownFns(statsQueue.Run)
+
+ return nil
+}
+
+// UpdateRepoIndexer update a repository's entries in the indexer
+func UpdateRepoIndexer(repo *models.Repository) error {
+ return statsQueue.Push(repo.ID)
+}
diff --git a/modules/notification/indexer/indexer.go b/modules/notification/indexer/indexer.go
index 4bce990739..6caae6fa65 100644
--- a/modules/notification/indexer/indexer.go
+++ b/modules/notification/indexer/indexer.go
@@ -9,6 +9,7 @@ import (
"code.gitea.io/gitea/modules/git"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
issue_indexer "code.gitea.io/gitea/modules/indexer/issues"
+ stats_indexer "code.gitea.io/gitea/modules/indexer/stats"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification/base"
"code.gitea.io/gitea/modules/repository"
@@ -117,12 +118,18 @@ func (r *indexerNotifier) NotifyMigrateRepository(doer *models.User, u *models.U
if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty {
code_indexer.UpdateRepoIndexer(repo)
}
+ if err := stats_indexer.UpdateRepoIndexer(repo); err != nil {
+ log.Error("stats_indexer.UpdateRepoIndexer(%d) failed: %v", repo.ID, err)
+ }
}
func (r *indexerNotifier) NotifyPushCommits(pusher *models.User, repo *models.Repository, refName, oldCommitID, newCommitID string, commits *repository.PushCommits) {
if setting.Indexer.RepoIndexerEnabled && refName == git.BranchPrefix+repo.DefaultBranch {
code_indexer.UpdateRepoIndexer(repo)
}
+ if err := stats_indexer.UpdateRepoIndexer(repo); err != nil {
+ log.Error("stats_indexer.UpdateRepoIndexer(%d) failed: %v", repo.ID, err)
+ }
}
func (r *indexerNotifier) NotifyIssueChangeContent(doer *models.User, issue *models.Issue, oldContent string) {