summaryrefslogtreecommitdiffstats
path: root/models
diff options
context:
space:
mode:
authorAlexey Terentyev <axifnx@gmail.com>2018-06-21 12:09:46 +0300
committerLunny Xiao <xiaolunwen@gmail.com>2018-06-21 17:09:46 +0800
commit46d19c4676efe5201c5de790bcb963bfc93a95c7 (patch)
treeb99878b6b1b52bc628254e74e9c966a806f61efe /models
parent9ae7664df7caa24825cc4cee4e4121e9f1d73e59 (diff)
downloadgitea-46d19c4676efe5201c5de790bcb963bfc93a95c7.tar.gz
gitea-46d19c4676efe5201c5de790bcb963bfc93a95c7.zip
Fix topics addition (Another solution) (#4031) (#4258)
* Added topics validation, fixed repo topics duplication (#4031) Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Added tests Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Fixed fmt Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Added comments to exported functions Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Deleted RemoveDuplicateTopics function Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Fixed messages Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Added migration Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * fmt migration file Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * fixed lint Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Added Copyright Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Added query solution for duplicates Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Fixed migration query Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Changed RegExp. Fixed migration Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * fmt migration file Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Fixed test for changed regexp Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Removed validation log messages Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Renamed migration file Signed-off-by: Alexey Terentyev <axifnx@gmail.com> * Renamed validate function Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
Diffstat (limited to 'models')
-rw-r--r--models/migrations/migrations.go2
-rw-r--r--models/migrations/v68.go160
-rw-r--r--models/topic.go15
-rw-r--r--models/topic_test.go13
4 files changed, 190 insertions, 0 deletions
diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go
index 2537e5712b..7732e17094 100644
--- a/models/migrations/migrations.go
+++ b/models/migrations/migrations.go
@@ -188,6 +188,8 @@ var migrations = []Migration{
NewMigration("add login source id column for public_key table", addLoginSourceIDToPublicKeyTable),
// v67 -> v68
NewMigration("remove stale watches", removeStaleWatches),
+ // v68 -> V69
+ NewMigration("Reformat and remove incorrect topics", reformatAndRemoveIncorrectTopics),
}
// Migrate database to current version
diff --git a/models/migrations/v68.go b/models/migrations/v68.go
new file mode 100644
index 0000000000..d6a0d04c53
--- /dev/null
+++ b/models/migrations/v68.go
@@ -0,0 +1,160 @@
+// Copyright 2018 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package migrations
+
+import (
+ "strings"
+
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/log"
+
+ "github.com/go-xorm/xorm"
+)
+
+func reformatAndRemoveIncorrectTopics(x *xorm.Engine) (err error) {
+ log.Info("This migration could take up to minutes, please be patient.")
+ type Topic struct {
+ ID int64
+ Name string `xorm:"unique"`
+ }
+
+ sess := x.NewSession()
+ defer sess.Close()
+
+ const batchSize = 100
+ touchedRepo := make(map[int64]struct{})
+ topics := make([]*Topic, 0, batchSize)
+ delTopicIDs := make([]int64, 0, batchSize)
+ ids := make([]int64, 0, 30)
+
+ if err := sess.Begin(); err != nil {
+ return err
+ }
+ log.Info("Validating existed topics...")
+ for start := 0; ; start += batchSize {
+ topics = topics[:0]
+ if err := sess.Asc("id").Limit(batchSize, start).Find(&topics); err != nil {
+ return err
+ }
+ if len(topics) == 0 {
+ break
+ }
+ for _, topic := range topics {
+ if models.ValidateTopic(topic.Name) {
+ continue
+ }
+ topic.Name = strings.Replace(strings.TrimSpace(strings.ToLower(topic.Name)), " ", "-", -1)
+
+ if err := sess.Table("repo_topic").Cols("repo_id").
+ Where("topic_id = ?", topic.ID).Find(&ids); err != nil {
+ return err
+ }
+ for _, id := range ids {
+ touchedRepo[id] = struct{}{}
+ }
+
+ if models.ValidateTopic(topic.Name) {
+ log.Info("Updating topic: id = %v, name = %v", topic.ID, topic.Name)
+ if _, err := sess.Table("topic").ID(topic.ID).
+ Update(&Topic{Name: topic.Name}); err != nil {
+ return err
+ }
+ } else {
+ delTopicIDs = append(delTopicIDs, topic.ID)
+ }
+ }
+ }
+
+ log.Info("Deleting incorrect topics...")
+ for start := 0; ; start += batchSize {
+ if (start + batchSize) < len(delTopicIDs) {
+ ids = delTopicIDs[start:(start + batchSize)]
+ } else {
+ ids = delTopicIDs[start:]
+ }
+
+ log.Info("Deleting 'repo_topic' rows for topics with ids = %v", ids)
+ if _, err := sess.In("topic_id", ids).Delete(&models.RepoTopic{}); err != nil {
+ return err
+ }
+
+ log.Info("Deleting topics with id = %v", ids)
+ if _, err := sess.In("id", ids).Delete(&Topic{}); err != nil {
+ return err
+ }
+
+ if len(ids) < batchSize {
+ break
+ }
+ }
+
+ repoTopics := make([]*models.RepoTopic, 0, batchSize)
+ delRepoTopics := make([]*models.RepoTopic, 0, batchSize)
+ tmpRepoTopics := make([]*models.RepoTopic, 0, 30)
+
+ log.Info("Checking the number of topics in the repositories...")
+ for start := 0; ; start += batchSize {
+ repoTopics = repoTopics[:0]
+ if err := sess.Cols("repo_id").Asc("repo_id").Limit(batchSize, start).
+ GroupBy("repo_id").Having("COUNT(*) > 25").Find(&repoTopics); err != nil {
+ return err
+ }
+ if len(repoTopics) == 0 {
+ break
+ }
+
+ log.Info("Number of repositories with more than 25 topics: %v", len(repoTopics))
+ for _, repoTopic := range repoTopics {
+ touchedRepo[repoTopic.RepoID] = struct{}{}
+
+ tmpRepoTopics = tmpRepoTopics[:0]
+ if err := sess.Where("repo_id = ?", repoTopic.RepoID).Find(&tmpRepoTopics); err != nil {
+ return err
+ }
+
+ log.Info("Repository with id = %v has %v topics", repoTopic.RepoID, len(tmpRepoTopics))
+
+ for i := len(tmpRepoTopics) - 1; i > 24; i-- {
+ delRepoTopics = append(delRepoTopics, tmpRepoTopics[i])
+ }
+ }
+ }
+
+ log.Info("Deleting superfluous topics for repositories (more than 25 topics)...")
+ for _, repoTopic := range delRepoTopics {
+ log.Info("Deleting 'repo_topic' rows for 'repository' with id = %v. Topic id = %v",
+ repoTopic.RepoID, repoTopic.TopicID)
+
+ if _, err := sess.Where("repo_id = ? AND topic_id = ?", repoTopic.RepoID,
+ repoTopic.TopicID).Delete(&models.RepoTopic{}); err != nil {
+ return err
+ }
+ if _, err := sess.Exec(
+ "UPDATE topic SET repo_count = (SELECT repo_count FROM topic WHERE id = ?) - 1 WHERE id = ?",
+ repoTopic.TopicID, repoTopic.TopicID); err != nil {
+ return err
+ }
+ }
+
+ topicNames := make([]string, 0, 30)
+ log.Info("Updating repositories 'topics' fields...")
+ for repoID := range touchedRepo {
+ if err := sess.Table("topic").Cols("name").
+ Join("INNER", "repo_topic", "topic.id = repo_topic.topic_id").
+ Where("repo_topic.repo_id = ?", repoID).Find(&topicNames); err != nil {
+ return err
+ }
+ log.Info("Updating 'topics' field for repository with id = %v", repoID)
+ if _, err := sess.ID(repoID).Cols("topics").
+ Update(&models.Repository{Topics: topicNames}); err != nil {
+ return err
+ }
+ }
+ if err := sess.Commit(); err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/models/topic.go b/models/topic.go
index 3b1737f8af..247aac5fff 100644
--- a/models/topic.go
+++ b/models/topic.go
@@ -6,6 +6,7 @@ package models
import (
"fmt"
+ "regexp"
"strings"
"code.gitea.io/gitea/modules/util"
@@ -20,6 +21,8 @@ func init() {
)
}
+var topicPattern = regexp.MustCompile(`^[a-z0-9][a-z0-9-]*$`)
+
// Topic represents a topic of repositories
type Topic struct {
ID int64
@@ -51,6 +54,11 @@ func (err ErrTopicNotExist) Error() string {
return fmt.Sprintf("topic is not exist [name: %s]", err.Name)
}
+// ValidateTopic checks topics by length and match pattern rules
+func ValidateTopic(topic string) bool {
+ return len(topic) <= 35 && topicPattern.MatchString(topic)
+}
+
// GetTopicByName retrieves topic by name
func GetTopicByName(name string) (*Topic, error) {
var topic Topic
@@ -182,6 +190,13 @@ func SaveTopics(repoID int64, topicNames ...string) error {
}
}
+ topicNames = topicNames[:0]
+ if err := sess.Table("topic").Cols("name").
+ Join("INNER", "repo_topic", "topic.id = repo_topic.topic_id").
+ Where("repo_topic.repo_id = ?", repoID).Find(&topicNames); err != nil {
+ return err
+ }
+
if _, err := sess.ID(repoID).Cols("topics").Update(&Repository{
Topics: topicNames,
}); err != nil {
diff --git a/models/topic_test.go b/models/topic_test.go
index 472f4e52d9..ef374e557b 100644
--- a/models/topic_test.go
+++ b/models/topic_test.go
@@ -55,3 +55,16 @@ func TestAddTopic(t *testing.T) {
assert.NoError(t, err)
assert.EqualValues(t, 2, len(topics))
}
+
+func TestTopicValidator(t *testing.T) {
+ assert.True(t, ValidateTopic("12345"))
+ assert.True(t, ValidateTopic("2-test"))
+ assert.True(t, ValidateTopic("test-3"))
+ assert.True(t, ValidateTopic("first"))
+ assert.True(t, ValidateTopic("second-test-topic"))
+ assert.True(t, ValidateTopic("third-project-topic-with-max-length"))
+
+ assert.False(t, ValidateTopic("$fourth-test,topic"))
+ assert.False(t, ValidateTopic("-fifth-test-topic"))
+ assert.False(t, ValidateTopic("sixth-go-project-topic-with-excess-length"))
+}