summaryrefslogtreecommitdiffstats
path: root/models/migrations/v68.go
blob: d9e80ca80eb4fb9ca1ac14ddec6447f21c5163c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
// Copyright 2018 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package migrations

import (
	"fmt"
	"regexp"
	"strings"

	"code.gitea.io/gitea/modules/log"

	"github.com/go-xorm/xorm"
)

var topicPattern = regexp.MustCompile(`^[a-z0-9][a-z0-9-]*$`)

func validateTopic(topic string) bool {
	return len(topic) <= 35 && topicPattern.MatchString(topic)
}

func reformatAndRemoveIncorrectTopics(x *xorm.Engine) (err error) {
	log.Info("This migration could take up to minutes, please be patient.")

	type Topic struct {
		ID          int64
		Name        string `xorm:"UNIQUE VARCHAR(25)"`
		RepoCount   int
		CreatedUnix int64 `xorm:"INDEX created"`
		UpdatedUnix int64 `xorm:"INDEX updated"`
	}

	type RepoTopic struct {
		RepoID  int64 `xorm:"UNIQUE(s)"`
		TopicID int64 `xorm:"UNIQUE(s)"`
	}

	type Repository struct {
		ID     int64    `xorm:"pk autoincr"`
		Topics []string `xorm:"TEXT JSON"`
	}

	if err := x.Sync2(new(Topic)); err != nil {
		return fmt.Errorf("Sync2: %v", err)
	}
	if err := x.Sync2(new(RepoTopic)); err != nil {
		return fmt.Errorf("Sync2: %v", err)
	}

	sess := x.NewSession()
	defer sess.Close()

	const batchSize = 100
	touchedRepo := make(map[int64]struct{})
	delTopicIDs := make([]int64, 0, batchSize)

	log.Info("Validating existed topics...")
	if err := sess.Begin(); err != nil {
		return err
	}
	for start := 0; ; start += batchSize {
		topics := make([]*Topic, 0, batchSize)
		if err := x.Cols("id", "name").Asc("id").Limit(batchSize, start).Find(&topics); err != nil {
			return err
		}
		if len(topics) == 0 {
			break
		}
		for _, topic := range topics {
			if validateTopic(topic.Name) {
				continue
			}
			log.Info("Incorrect topic: id = %v, name = %q", topic.ID, topic.Name)

			topic.Name = strings.Replace(strings.TrimSpace(strings.ToLower(topic.Name)), " ", "-", -1)

			ids := make([]int64, 0, 30)
			if err := sess.Table("repo_topic").Cols("repo_id").
				Where("topic_id = ?", topic.ID).Find(&ids); err != nil {
				return err
			}
			log.Info("Touched repo ids: %v", ids)
			for _, id := range ids {
				touchedRepo[id] = struct{}{}
			}

			if validateTopic(topic.Name) {
				unifiedTopic := Topic{Name: topic.Name}
				exists, err := sess.Cols("id", "name").Get(&unifiedTopic)
				log.Info("Exists topic with the name %q? %v, id = %v", topic.Name, exists, unifiedTopic.ID)
				if err != nil {
					return err
				}
				if exists {
					log.Info("Updating repo_topic rows with topic_id = %v to topic_id = %v", topic.ID, unifiedTopic.ID)
					if _, err := sess.Where("topic_id = ? AND repo_id NOT IN "+
						"(SELECT rt1.repo_id FROM repo_topic rt1 INNER JOIN repo_topic rt2 "+
						"ON rt1.repo_id = rt2.repo_id WHERE rt1.topic_id = ? AND rt2.topic_id = ?)",
						topic.ID, topic.ID, unifiedTopic.ID).Update(&RepoTopic{TopicID: unifiedTopic.ID}); err != nil {
						return err
					}
					log.Info("Updating topic `repo_count` field")
					if _, err := sess.Exec(
						"UPDATE topic SET repo_count = (SELECT COUNT(*) FROM repo_topic WHERE topic_id = ? GROUP BY topic_id) WHERE id = ?",
						unifiedTopic.ID, unifiedTopic.ID); err != nil {
						return err
					}
				} else {
					log.Info("Updating topic: id = %v, name = %q", topic.ID, topic.Name)
					if _, err := sess.Table("topic").ID(topic.ID).
						Update(&Topic{Name: topic.Name}); err != nil {
						return err
					}
					continue
				}
			}
			delTopicIDs = append(delTopicIDs, topic.ID)
		}
	}
	if err := sess.Commit(); err != nil {
		return err
	}

	sess.Init()

	log.Info("Deleting incorrect topics...")
	if err := sess.Begin(); err != nil {
		return err
	}
	log.Info("Deleting 'repo_topic' rows for topics with ids = %v", delTopicIDs)
	if _, err := sess.In("topic_id", delTopicIDs).Delete(&RepoTopic{}); err != nil {
		return err
	}
	log.Info("Deleting topics with id = %v", delTopicIDs)
	if _, err := sess.In("id", delTopicIDs).Delete(&Topic{}); err != nil {
		return err
	}
	if err := sess.Commit(); err != nil {
		return err
	}

	delRepoTopics := make([]*RepoTopic, 0, batchSize)

	log.Info("Checking the number of topics in the repositories...")
	for start := 0; ; start += batchSize {
		repoTopics := make([]*RepoTopic, 0, batchSize)
		if err := x.Cols("repo_id").Asc("repo_id").Limit(batchSize, start).
			GroupBy("repo_id").Having("COUNT(*) > 25").Find(&repoTopics); err != nil {
			return err
		}
		if len(repoTopics) == 0 {
			break
		}

		log.Info("Number of repositories with more than 25 topics: %v", len(repoTopics))
		for _, repoTopic := range repoTopics {
			touchedRepo[repoTopic.RepoID] = struct{}{}

			tmpRepoTopics := make([]*RepoTopic, 0, 30)
			if err := x.Where("repo_id = ?", repoTopic.RepoID).Find(&tmpRepoTopics); err != nil {
				return err
			}

			log.Info("Repository with id = %v has %v topics", repoTopic.RepoID, len(tmpRepoTopics))

			for i := len(tmpRepoTopics) - 1; i > 24; i-- {
				delRepoTopics = append(delRepoTopics, tmpRepoTopics[i])
			}
		}
	}

	sess.Init()

	log.Info("Deleting superfluous topics for repositories (more than 25 topics)...")
	if err := sess.Begin(); err != nil {
		return err
	}
	for _, repoTopic := range delRepoTopics {
		log.Info("Deleting 'repo_topic' rows for 'repository' with id = %v. Topic id = %v",
			repoTopic.RepoID, repoTopic.TopicID)

		if _, err := sess.Where("repo_id = ? AND topic_id = ?", repoTopic.RepoID,
			repoTopic.TopicID).Delete(&RepoTopic{}); err != nil {
			return err
		}
		if _, err := sess.Exec(
			"UPDATE topic SET repo_count = (SELECT repo_count FROM topic WHERE id = ?) - 1 WHERE id = ?",
			repoTopic.TopicID, repoTopic.TopicID); err != nil {
			return err
		}
	}

	log.Info("Updating repositories 'topics' fields...")
	for repoID := range touchedRepo {
		topicNames := make([]string, 0, 30)
		if err := sess.Table("topic").Cols("name").
			Join("INNER", "repo_topic", "repo_topic.topic_id = topic.id").
			Where("repo_topic.repo_id = ?", repoID).Desc("topic.repo_count").Find(&topicNames); err != nil {
			return err
		}
		log.Info("Updating 'topics' field for repository with id = %v", repoID)
		if _, err := sess.ID(repoID).Cols("topics").
			Update(&Repository{Topics: topicNames}); err != nil {
			return err
		}
	}

	return sess.Commit()
}