summaryrefslogtreecommitdiffstats
path: root/models/migrations/v68.go
blob: d6a0d04c537d49b946394fd2b3b549b799ba73d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
// Copyright 2018 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package migrations

import (
	"strings"

	"code.gitea.io/gitea/models"
	"code.gitea.io/gitea/modules/log"

	"github.com/go-xorm/xorm"
)

func reformatAndRemoveIncorrectTopics(x *xorm.Engine) (err error) {
	log.Info("This migration could take up to minutes, please be patient.")
	type Topic struct {
		ID   int64
		Name string `xorm:"unique"`
	}

	sess := x.NewSession()
	defer sess.Close()

	const batchSize = 100
	touchedRepo := make(map[int64]struct{})
	topics := make([]*Topic, 0, batchSize)
	delTopicIDs := make([]int64, 0, batchSize)
	ids := make([]int64, 0, 30)

	if err := sess.Begin(); err != nil {
		return err
	}
	log.Info("Validating existed topics...")
	for start := 0; ; start += batchSize {
		topics = topics[:0]
		if err := sess.Asc("id").Limit(batchSize, start).Find(&topics); err != nil {
			return err
		}
		if len(topics) == 0 {
			break
		}
		for _, topic := range topics {
			if models.ValidateTopic(topic.Name) {
				continue
			}
			topic.Name = strings.Replace(strings.TrimSpace(strings.ToLower(topic.Name)), " ", "-", -1)

			if err := sess.Table("repo_topic").Cols("repo_id").
				Where("topic_id = ?", topic.ID).Find(&ids); err != nil {
				return err
			}
			for _, id := range ids {
				touchedRepo[id] = struct{}{}
			}

			if models.ValidateTopic(topic.Name) {
				log.Info("Updating topic: id = %v, name = %v", topic.ID, topic.Name)
				if _, err := sess.Table("topic").ID(topic.ID).
					Update(&Topic{Name: topic.Name}); err != nil {
					return err
				}
			} else {
				delTopicIDs = append(delTopicIDs, topic.ID)
			}
		}
	}

	log.Info("Deleting incorrect topics...")
	for start := 0; ; start += batchSize {
		if (start + batchSize) < len(delTopicIDs) {
			ids = delTopicIDs[start:(start + batchSize)]
		} else {
			ids = delTopicIDs[start:]
		}

		log.Info("Deleting 'repo_topic' rows for topics with ids = %v", ids)
		if _, err := sess.In("topic_id", ids).Delete(&models.RepoTopic{}); err != nil {
			return err
		}

		log.Info("Deleting topics with id = %v", ids)
		if _, err := sess.In("id", ids).Delete(&Topic{}); err != nil {
			return err
		}

		if len(ids) < batchSize {
			break
		}
	}

	repoTopics := make([]*models.RepoTopic, 0, batchSize)
	delRepoTopics := make([]*models.RepoTopic, 0, batchSize)
	tmpRepoTopics := make([]*models.RepoTopic, 0, 30)

	log.Info("Checking the number of topics in the repositories...")
	for start := 0; ; start += batchSize {
		repoTopics = repoTopics[:0]
		if err := sess.Cols("repo_id").Asc("repo_id").Limit(batchSize, start).
			GroupBy("repo_id").Having("COUNT(*) > 25").Find(&repoTopics); err != nil {
			return err
		}
		if len(repoTopics) == 0 {
			break
		}

		log.Info("Number of repositories with more than 25 topics: %v", len(repoTopics))
		for _, repoTopic := range repoTopics {
			touchedRepo[repoTopic.RepoID] = struct{}{}

			tmpRepoTopics = tmpRepoTopics[:0]
			if err := sess.Where("repo_id = ?", repoTopic.RepoID).Find(&tmpRepoTopics); err != nil {
				return err
			}

			log.Info("Repository with id = %v has %v topics", repoTopic.RepoID, len(tmpRepoTopics))

			for i := len(tmpRepoTopics) - 1; i > 24; i-- {
				delRepoTopics = append(delRepoTopics, tmpRepoTopics[i])
			}
		}
	}

	log.Info("Deleting superfluous topics for repositories (more than 25 topics)...")
	for _, repoTopic := range delRepoTopics {
		log.Info("Deleting 'repo_topic' rows for 'repository' with id = %v. Topic id = %v",
			repoTopic.RepoID, repoTopic.TopicID)

		if _, err := sess.Where("repo_id = ? AND topic_id = ?", repoTopic.RepoID,
			repoTopic.TopicID).Delete(&models.RepoTopic{}); err != nil {
			return err
		}
		if _, err := sess.Exec(
			"UPDATE topic SET repo_count = (SELECT repo_count FROM topic WHERE id = ?) - 1 WHERE id = ?",
			repoTopic.TopicID, repoTopic.TopicID); err != nil {
			return err
		}
	}

	topicNames := make([]string, 0, 30)
	log.Info("Updating repositories 'topics' fields...")
	for repoID := range touchedRepo {
		if err := sess.Table("topic").Cols("name").
			Join("INNER", "repo_topic", "topic.id = repo_topic.topic_id").
			Where("repo_topic.repo_id = ?", repoID).Find(&topicNames); err != nil {
			return err
		}
		log.Info("Updating 'topics' field for repository with id = %v", repoID)
		if _, err := sess.ID(repoID).Cols("topics").
			Update(&models.Repository{Topics: topicNames}); err != nil {
			return err
		}
	}
	if err := sess.Commit(); err != nil {
		return err
	}

	return nil
}