diff options
author | Alexey Terentyev <axifnx@gmail.com> | 2018-06-21 12:09:46 +0300 |
---|---|---|
committer | Lunny Xiao <xiaolunwen@gmail.com> | 2018-06-21 17:09:46 +0800 |
commit | 46d19c4676efe5201c5de790bcb963bfc93a95c7 (patch) | |
tree | b99878b6b1b52bc628254e74e9c966a806f61efe /models | |
parent | 9ae7664df7caa24825cc4cee4e4121e9f1d73e59 (diff) | |
download | gitea-46d19c4676efe5201c5de790bcb963bfc93a95c7.tar.gz gitea-46d19c4676efe5201c5de790bcb963bfc93a95c7.zip |
Fix topics addition (Another solution) (#4031) (#4258)
* Added topics validation, fixed repo topics duplication (#4031)
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Added tests
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Fixed fmt
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Added comments to exported functions
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Deleted RemoveDuplicateTopics function
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Fixed messages
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Added migration
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* fmt migration file
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* fixed lint
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Added Copyright
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Added query solution for duplicates
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Fixed migration query
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Changed RegExp. Fixed migration
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* fmt migration file
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Fixed test for changed regexp
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Removed validation log messages
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Renamed migration file
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
* Renamed validate function
Signed-off-by: Alexey Terentyev <axifnx@gmail.com>
Diffstat (limited to 'models')
-rw-r--r-- | models/migrations/migrations.go | 2 | ||||
-rw-r--r-- | models/migrations/v68.go | 160 | ||||
-rw-r--r-- | models/topic.go | 15 | ||||
-rw-r--r-- | models/topic_test.go | 13 |
4 files changed, 190 insertions, 0 deletions
diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go index 2537e5712b..7732e17094 100644 --- a/models/migrations/migrations.go +++ b/models/migrations/migrations.go @@ -188,6 +188,8 @@ var migrations = []Migration{ NewMigration("add login source id column for public_key table", addLoginSourceIDToPublicKeyTable), // v67 -> v68 NewMigration("remove stale watches", removeStaleWatches), + // v68 -> V69 + NewMigration("Reformat and remove incorrect topics", reformatAndRemoveIncorrectTopics), } // Migrate database to current version diff --git a/models/migrations/v68.go b/models/migrations/v68.go new file mode 100644 index 0000000000..d6a0d04c53 --- /dev/null +++ b/models/migrations/v68.go @@ -0,0 +1,160 @@ +// Copyright 2018 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package migrations + +import ( + "strings" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/log" + + "github.com/go-xorm/xorm" +) + +func reformatAndRemoveIncorrectTopics(x *xorm.Engine) (err error) { + log.Info("This migration could take up to minutes, please be patient.") + type Topic struct { + ID int64 + Name string `xorm:"unique"` + } + + sess := x.NewSession() + defer sess.Close() + + const batchSize = 100 + touchedRepo := make(map[int64]struct{}) + topics := make([]*Topic, 0, batchSize) + delTopicIDs := make([]int64, 0, batchSize) + ids := make([]int64, 0, 30) + + if err := sess.Begin(); err != nil { + return err + } + log.Info("Validating existed topics...") + for start := 0; ; start += batchSize { + topics = topics[:0] + if err := sess.Asc("id").Limit(batchSize, start).Find(&topics); err != nil { + return err + } + if len(topics) == 0 { + break + } + for _, topic := range topics { + if models.ValidateTopic(topic.Name) { + continue + } + topic.Name = strings.Replace(strings.TrimSpace(strings.ToLower(topic.Name)), " ", "-", -1) + + if err := sess.Table("repo_topic").Cols("repo_id"). + Where("topic_id = ?", topic.ID).Find(&ids); err != nil { + return err + } + for _, id := range ids { + touchedRepo[id] = struct{}{} + } + + if models.ValidateTopic(topic.Name) { + log.Info("Updating topic: id = %v, name = %v", topic.ID, topic.Name) + if _, err := sess.Table("topic").ID(topic.ID). + Update(&Topic{Name: topic.Name}); err != nil { + return err + } + } else { + delTopicIDs = append(delTopicIDs, topic.ID) + } + } + } + + log.Info("Deleting incorrect topics...") + for start := 0; ; start += batchSize { + if (start + batchSize) < len(delTopicIDs) { + ids = delTopicIDs[start:(start + batchSize)] + } else { + ids = delTopicIDs[start:] + } + + log.Info("Deleting 'repo_topic' rows for topics with ids = %v", ids) + if _, err := sess.In("topic_id", ids).Delete(&models.RepoTopic{}); err != nil { + return err + } + + log.Info("Deleting topics with id = %v", ids) + if _, err := sess.In("id", ids).Delete(&Topic{}); err != nil { + return err + } + + if len(ids) < batchSize { + break + } + } + + repoTopics := make([]*models.RepoTopic, 0, batchSize) + delRepoTopics := make([]*models.RepoTopic, 0, batchSize) + tmpRepoTopics := make([]*models.RepoTopic, 0, 30) + + log.Info("Checking the number of topics in the repositories...") + for start := 0; ; start += batchSize { + repoTopics = repoTopics[:0] + if err := sess.Cols("repo_id").Asc("repo_id").Limit(batchSize, start). + GroupBy("repo_id").Having("COUNT(*) > 25").Find(&repoTopics); err != nil { + return err + } + if len(repoTopics) == 0 { + break + } + + log.Info("Number of repositories with more than 25 topics: %v", len(repoTopics)) + for _, repoTopic := range repoTopics { + touchedRepo[repoTopic.RepoID] = struct{}{} + + tmpRepoTopics = tmpRepoTopics[:0] + if err := sess.Where("repo_id = ?", repoTopic.RepoID).Find(&tmpRepoTopics); err != nil { + return err + } + + log.Info("Repository with id = %v has %v topics", repoTopic.RepoID, len(tmpRepoTopics)) + + for i := len(tmpRepoTopics) - 1; i > 24; i-- { + delRepoTopics = append(delRepoTopics, tmpRepoTopics[i]) + } + } + } + + log.Info("Deleting superfluous topics for repositories (more than 25 topics)...") + for _, repoTopic := range delRepoTopics { + log.Info("Deleting 'repo_topic' rows for 'repository' with id = %v. Topic id = %v", + repoTopic.RepoID, repoTopic.TopicID) + + if _, err := sess.Where("repo_id = ? AND topic_id = ?", repoTopic.RepoID, + repoTopic.TopicID).Delete(&models.RepoTopic{}); err != nil { + return err + } + if _, err := sess.Exec( + "UPDATE topic SET repo_count = (SELECT repo_count FROM topic WHERE id = ?) - 1 WHERE id = ?", + repoTopic.TopicID, repoTopic.TopicID); err != nil { + return err + } + } + + topicNames := make([]string, 0, 30) + log.Info("Updating repositories 'topics' fields...") + for repoID := range touchedRepo { + if err := sess.Table("topic").Cols("name"). + Join("INNER", "repo_topic", "topic.id = repo_topic.topic_id"). + Where("repo_topic.repo_id = ?", repoID).Find(&topicNames); err != nil { + return err + } + log.Info("Updating 'topics' field for repository with id = %v", repoID) + if _, err := sess.ID(repoID).Cols("topics"). + Update(&models.Repository{Topics: topicNames}); err != nil { + return err + } + } + if err := sess.Commit(); err != nil { + return err + } + + return nil +} diff --git a/models/topic.go b/models/topic.go index 3b1737f8af..247aac5fff 100644 --- a/models/topic.go +++ b/models/topic.go @@ -6,6 +6,7 @@ package models import ( "fmt" + "regexp" "strings" "code.gitea.io/gitea/modules/util" @@ -20,6 +21,8 @@ func init() { ) } +var topicPattern = regexp.MustCompile(`^[a-z0-9][a-z0-9-]*$`) + // Topic represents a topic of repositories type Topic struct { ID int64 @@ -51,6 +54,11 @@ func (err ErrTopicNotExist) Error() string { return fmt.Sprintf("topic is not exist [name: %s]", err.Name) } +// ValidateTopic checks topics by length and match pattern rules +func ValidateTopic(topic string) bool { + return len(topic) <= 35 && topicPattern.MatchString(topic) +} + // GetTopicByName retrieves topic by name func GetTopicByName(name string) (*Topic, error) { var topic Topic @@ -182,6 +190,13 @@ func SaveTopics(repoID int64, topicNames ...string) error { } } + topicNames = topicNames[:0] + if err := sess.Table("topic").Cols("name"). + Join("INNER", "repo_topic", "topic.id = repo_topic.topic_id"). + Where("repo_topic.repo_id = ?", repoID).Find(&topicNames); err != nil { + return err + } + if _, err := sess.ID(repoID).Cols("topics").Update(&Repository{ Topics: topicNames, }); err != nil { diff --git a/models/topic_test.go b/models/topic_test.go index 472f4e52d9..ef374e557b 100644 --- a/models/topic_test.go +++ b/models/topic_test.go @@ -55,3 +55,16 @@ func TestAddTopic(t *testing.T) { assert.NoError(t, err) assert.EqualValues(t, 2, len(topics)) } + +func TestTopicValidator(t *testing.T) { + assert.True(t, ValidateTopic("12345")) + assert.True(t, ValidateTopic("2-test")) + assert.True(t, ValidateTopic("test-3")) + assert.True(t, ValidateTopic("first")) + assert.True(t, ValidateTopic("second-test-topic")) + assert.True(t, ValidateTopic("third-project-topic-with-max-length")) + + assert.False(t, ValidateTopic("$fourth-test,topic")) + assert.False(t, ValidateTopic("-fifth-test-topic")) + assert.False(t, ValidateTopic("sixth-go-project-topic-with-excess-length")) +} |