You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

indexer.go 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. // Copyright 2018 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package issues
  4. import (
  5. "context"
  6. "fmt"
  7. "os"
  8. "runtime/pprof"
  9. "sync/atomic"
  10. "time"
  11. db_model "code.gitea.io/gitea/models/db"
  12. repo_model "code.gitea.io/gitea/models/repo"
  13. "code.gitea.io/gitea/modules/container"
  14. "code.gitea.io/gitea/modules/graceful"
  15. "code.gitea.io/gitea/modules/indexer/issues/bleve"
  16. "code.gitea.io/gitea/modules/indexer/issues/db"
  17. "code.gitea.io/gitea/modules/indexer/issues/elasticsearch"
  18. "code.gitea.io/gitea/modules/indexer/issues/internal"
  19. "code.gitea.io/gitea/modules/indexer/issues/meilisearch"
  20. "code.gitea.io/gitea/modules/log"
  21. "code.gitea.io/gitea/modules/process"
  22. "code.gitea.io/gitea/modules/queue"
  23. "code.gitea.io/gitea/modules/setting"
  24. "code.gitea.io/gitea/modules/util"
  25. )
  26. // IndexerMetadata is used to send data to the queue, so it contains only the ids.
  27. // It may look weired, because it has to be compatible with the old queue data format.
  28. // If the IsDelete flag is true, the IDs specify the issues to delete from the index without querying the database.
  29. // If the IsDelete flag is false, the ID specify the issue to index, so Indexer will query the database to get the issue data.
  30. // It should be noted that if the id is not existing in the database, it's index will be deleted too even if IsDelete is false.
  31. // Valid values:
  32. // - IsDelete = true, IDs = [1, 2, 3], and ID will be ignored
  33. // - IsDelete = false, ID = 1, and IDs will be ignored
  34. type IndexerMetadata struct {
  35. ID int64 `json:"id"`
  36. IsDelete bool `json:"is_delete"`
  37. IDs []int64 `json:"ids"`
  38. }
  39. var (
  40. // issueIndexerQueue queue of issue ids to be updated
  41. issueIndexerQueue *queue.WorkerPoolQueue[*IndexerMetadata]
  42. // globalIndexer is the global indexer, it cannot be nil.
  43. // When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
  44. // So it's always safe use it as *globalIndexer.Load() and call its methods.
  45. globalIndexer atomic.Pointer[internal.Indexer]
  46. dummyIndexer *internal.Indexer
  47. )
  48. func init() {
  49. i := internal.NewDummyIndexer()
  50. dummyIndexer = &i
  51. globalIndexer.Store(dummyIndexer)
  52. }
  53. // InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until
  54. // all issue index done.
  55. func InitIssueIndexer(syncReindex bool) {
  56. ctx, _, finished := process.GetManager().AddTypedContext(context.Background(), "Service: IssueIndexer", process.SystemProcessType, false)
  57. indexerInitWaitChannel := make(chan time.Duration, 1)
  58. // Create the Queue
  59. issueIndexerQueue = queue.CreateUniqueQueue(ctx, "issue_indexer", getIssueIndexerQueueHandler(ctx))
  60. graceful.GetManager().RunAtTerminate(finished)
  61. // Create the Indexer
  62. go func() {
  63. pprof.SetGoroutineLabels(ctx)
  64. start := time.Now()
  65. log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType)
  66. var (
  67. issueIndexer internal.Indexer
  68. existed bool
  69. err error
  70. )
  71. switch setting.Indexer.IssueType {
  72. case "bleve":
  73. defer func() {
  74. if err := recover(); err != nil {
  75. log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2))
  76. log.Error("The indexer files are likely corrupted and may need to be deleted")
  77. log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.IssuePath)
  78. globalIndexer.Store(dummyIndexer)
  79. log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err)
  80. }
  81. }()
  82. issueIndexer = bleve.NewIndexer(setting.Indexer.IssuePath)
  83. existed, err = issueIndexer.Init(ctx)
  84. if err != nil {
  85. log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err)
  86. }
  87. case "elasticsearch":
  88. issueIndexer = elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName)
  89. existed, err = issueIndexer.Init(ctx)
  90. if err != nil {
  91. log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
  92. }
  93. case "db":
  94. issueIndexer = db.NewIndexer()
  95. case "meilisearch":
  96. issueIndexer = meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName)
  97. existed, err = issueIndexer.Init(ctx)
  98. if err != nil {
  99. log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
  100. }
  101. default:
  102. log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType)
  103. }
  104. globalIndexer.Store(&issueIndexer)
  105. graceful.GetManager().RunAtTerminate(func() {
  106. log.Debug("Closing issue indexer")
  107. (*globalIndexer.Load()).Close()
  108. log.Info("PID: %d Issue Indexer closed", os.Getpid())
  109. })
  110. // Start processing the queue
  111. go graceful.GetManager().RunWithCancel(issueIndexerQueue)
  112. // Populate the index
  113. if !existed {
  114. if syncReindex {
  115. graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
  116. } else {
  117. go graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
  118. }
  119. }
  120. indexerInitWaitChannel <- time.Since(start)
  121. close(indexerInitWaitChannel)
  122. }()
  123. if syncReindex {
  124. select {
  125. case <-indexerInitWaitChannel:
  126. case <-graceful.GetManager().IsShutdown():
  127. }
  128. } else if setting.Indexer.StartupTimeout > 0 {
  129. go func() {
  130. pprof.SetGoroutineLabels(ctx)
  131. timeout := setting.Indexer.StartupTimeout
  132. if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 {
  133. timeout += setting.GracefulHammerTime
  134. }
  135. select {
  136. case duration := <-indexerInitWaitChannel:
  137. log.Info("Issue Indexer Initialization took %v", duration)
  138. case <-graceful.GetManager().IsShutdown():
  139. log.Warn("Shutdown occurred before issue index initialisation was complete")
  140. case <-time.After(timeout):
  141. issueIndexerQueue.ShutdownWait(5 * time.Second)
  142. log.Fatal("Issue Indexer Initialization timed-out after: %v", timeout)
  143. }
  144. }()
  145. }
  146. }
  147. func getIssueIndexerQueueHandler(ctx context.Context) func(items ...*IndexerMetadata) []*IndexerMetadata {
  148. return func(items ...*IndexerMetadata) []*IndexerMetadata {
  149. var unhandled []*IndexerMetadata
  150. indexer := *globalIndexer.Load()
  151. for _, item := range items {
  152. log.Trace("IndexerMetadata Process: %d %v %t", item.ID, item.IDs, item.IsDelete)
  153. if item.IsDelete {
  154. if err := indexer.Delete(ctx, item.IDs...); err != nil {
  155. log.Error("Issue indexer handler: failed to from index: %v Error: %v", item.IDs, err)
  156. unhandled = append(unhandled, item)
  157. }
  158. continue
  159. }
  160. data, existed, err := getIssueIndexerData(ctx, item.ID)
  161. if err != nil {
  162. log.Error("Issue indexer handler: failed to get issue data of %d: %v", item.ID, err)
  163. unhandled = append(unhandled, item)
  164. continue
  165. }
  166. if !existed {
  167. if err := indexer.Delete(ctx, item.ID); err != nil {
  168. log.Error("Issue indexer handler: failed to delete issue %d from index: %v", item.ID, err)
  169. unhandled = append(unhandled, item)
  170. }
  171. continue
  172. }
  173. if err := indexer.Index(ctx, data); err != nil {
  174. log.Error("Issue indexer handler: failed to index issue %d: %v", item.ID, err)
  175. unhandled = append(unhandled, item)
  176. continue
  177. }
  178. }
  179. return unhandled
  180. }
  181. }
  182. // populateIssueIndexer populate the issue indexer with issue data
  183. func populateIssueIndexer(ctx context.Context) {
  184. ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Service: PopulateIssueIndexer", process.SystemProcessType, true)
  185. defer finished()
  186. ctx = contextWithKeepRetry(ctx) // keep retrying since it's a background task
  187. if err := PopulateIssueIndexer(ctx); err != nil {
  188. log.Error("Issue indexer population failed: %v", err)
  189. }
  190. }
  191. func PopulateIssueIndexer(ctx context.Context) error {
  192. for page := 1; ; page++ {
  193. select {
  194. case <-ctx.Done():
  195. return fmt.Errorf("shutdown before completion: %w", ctx.Err())
  196. default:
  197. }
  198. repos, _, err := repo_model.SearchRepositoryByName(ctx, &repo_model.SearchRepoOptions{
  199. ListOptions: db_model.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize},
  200. OrderBy: db_model.SearchOrderByID,
  201. Private: true,
  202. Collaborate: util.OptionalBoolFalse,
  203. })
  204. if err != nil {
  205. log.Error("SearchRepositoryByName: %v", err)
  206. continue
  207. }
  208. if len(repos) == 0 {
  209. log.Debug("Issue Indexer population complete")
  210. return nil
  211. }
  212. for _, repo := range repos {
  213. if err := updateRepoIndexer(ctx, repo.ID); err != nil {
  214. return fmt.Errorf("populate issue indexer for repo %d: %v", repo.ID, err)
  215. }
  216. }
  217. }
  218. }
  219. // UpdateRepoIndexer add/update all issues of the repositories
  220. func UpdateRepoIndexer(ctx context.Context, repoID int64) {
  221. if err := updateRepoIndexer(ctx, repoID); err != nil {
  222. log.Error("Unable to push repo %d to issue indexer: %v", repoID, err)
  223. }
  224. }
  225. // UpdateIssueIndexer add/update an issue to the issue indexer
  226. func UpdateIssueIndexer(ctx context.Context, issueID int64) {
  227. if err := updateIssueIndexer(ctx, issueID); err != nil {
  228. log.Error("Unable to push issue %d to issue indexer: %v", issueID, err)
  229. }
  230. }
  231. // DeleteRepoIssueIndexer deletes repo's all issues indexes
  232. func DeleteRepoIssueIndexer(ctx context.Context, repoID int64) {
  233. if err := deleteRepoIssueIndexer(ctx, repoID); err != nil {
  234. log.Error("Unable to push deleted repo %d to issue indexer: %v", repoID, err)
  235. }
  236. }
  237. // IsAvailable checks if issue indexer is available
  238. func IsAvailable(ctx context.Context) bool {
  239. return (*globalIndexer.Load()).Ping(ctx) == nil
  240. }
  241. // SearchOptions indicates the options for searching issues
  242. type SearchOptions = internal.SearchOptions
  243. const (
  244. SortByCreatedDesc = internal.SortByCreatedDesc
  245. SortByUpdatedDesc = internal.SortByUpdatedDesc
  246. SortByCommentsDesc = internal.SortByCommentsDesc
  247. SortByDeadlineDesc = internal.SortByDeadlineDesc
  248. SortByCreatedAsc = internal.SortByCreatedAsc
  249. SortByUpdatedAsc = internal.SortByUpdatedAsc
  250. SortByCommentsAsc = internal.SortByCommentsAsc
  251. SortByDeadlineAsc = internal.SortByDeadlineAsc
  252. )
  253. // SearchIssues search issues by options.
  254. func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
  255. indexer := *globalIndexer.Load()
  256. if opts.Keyword == "" {
  257. // This is a conservative shortcut.
  258. // If the keyword is empty, db has better (at least not worse) performance to filter issues.
  259. // When the keyword is empty, it tends to listing rather than searching issues.
  260. // So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
  261. // Even worse, the external indexer like elastic search may not be available for a while,
  262. // and the user may not be able to list issues completely until it is available again.
  263. indexer = db.NewIndexer()
  264. }
  265. result, err := indexer.Search(ctx, opts)
  266. if err != nil {
  267. return nil, 0, err
  268. }
  269. ret := make([]int64, 0, len(result.Hits))
  270. for _, hit := range result.Hits {
  271. ret = append(ret, hit.ID)
  272. }
  273. return ret, result.Total, nil
  274. }
  275. // CountIssues counts issues by options. It is a shortcut of SearchIssues(ctx, opts) but only returns the total count.
  276. func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) {
  277. opts = opts.Copy(func(options *SearchOptions) { options.Paginator = &db_model.ListOptions{PageSize: 0} })
  278. _, total, err := SearchIssues(ctx, opts)
  279. return total, err
  280. }
  281. // CountIssuesByRepo counts issues by options and group by repo id.
  282. // It's not a complete implementation, since it requires the caller should provide the repo ids.
  283. // That means opts.RepoIDs must be specified, and opts.AllPublic must be false.
  284. // It's good enough for the current usage, and it can be improved if needed.
  285. // TODO: use "group by" of the indexer engines to implement it.
  286. func CountIssuesByRepo(ctx context.Context, opts *SearchOptions) (map[int64]int64, error) {
  287. if len(opts.RepoIDs) == 0 {
  288. return nil, fmt.Errorf("opts.RepoIDs must be specified")
  289. }
  290. if opts.AllPublic {
  291. return nil, fmt.Errorf("opts.AllPublic must be false")
  292. }
  293. repoIDs := container.SetOf(opts.RepoIDs...).Values()
  294. ret := make(map[int64]int64, len(repoIDs))
  295. // TODO: it could be faster if do it in parallel for some indexer engines. Improve it if users report it's slow.
  296. for _, repoID := range repoIDs {
  297. count, err := CountIssues(ctx, opts.Copy(func(o *internal.SearchOptions) { o.RepoIDs = []int64{repoID} }))
  298. if err != nil {
  299. return nil, err
  300. }
  301. ret[repoID] = count
  302. }
  303. return ret, nil
  304. }