You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

merge.go 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package scorch
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "os"
  19. "strings"
  20. "sync/atomic"
  21. "time"
  22. "github.com/RoaringBitmap/roaring"
  23. "github.com/blevesearch/bleve/index/scorch/mergeplan"
  24. "github.com/blevesearch/bleve/index/scorch/segment"
  25. )
  26. func (s *Scorch) mergerLoop() {
  27. var lastEpochMergePlanned uint64
  28. mergePlannerOptions, err := s.parseMergePlannerOptions()
  29. if err != nil {
  30. s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
  31. s.asyncTasks.Done()
  32. return
  33. }
  34. OUTER:
  35. for {
  36. atomic.AddUint64(&s.stats.TotFileMergeLoopBeg, 1)
  37. select {
  38. case <-s.closeCh:
  39. break OUTER
  40. default:
  41. // check to see if there is a new snapshot to persist
  42. s.rootLock.Lock()
  43. ourSnapshot := s.root
  44. ourSnapshot.AddRef()
  45. atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
  46. atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
  47. s.rootLock.Unlock()
  48. if ourSnapshot.epoch != lastEpochMergePlanned {
  49. startTime := time.Now()
  50. // lets get started
  51. err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
  52. if err != nil {
  53. atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
  54. if err == segment.ErrClosed {
  55. // index has been closed
  56. _ = ourSnapshot.DecRef()
  57. break OUTER
  58. }
  59. s.fireAsyncError(fmt.Errorf("merging err: %v", err))
  60. _ = ourSnapshot.DecRef()
  61. atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
  62. continue OUTER
  63. }
  64. lastEpochMergePlanned = ourSnapshot.epoch
  65. atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch)
  66. s.fireEvent(EventKindMergerProgress, time.Since(startTime))
  67. }
  68. _ = ourSnapshot.DecRef()
  69. // tell the persister we're waiting for changes
  70. // first make a epochWatcher chan
  71. ew := &epochWatcher{
  72. epoch: lastEpochMergePlanned,
  73. notifyCh: make(notificationChan, 1),
  74. }
  75. // give it to the persister
  76. select {
  77. case <-s.closeCh:
  78. break OUTER
  79. case s.persisterNotifier <- ew:
  80. }
  81. // now wait for persister (but also detect close)
  82. select {
  83. case <-s.closeCh:
  84. break OUTER
  85. case <-ew.notifyCh:
  86. }
  87. }
  88. atomic.AddUint64(&s.stats.TotFileMergeLoopEnd, 1)
  89. }
  90. s.asyncTasks.Done()
  91. }
  92. func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
  93. error) {
  94. mergePlannerOptions := mergeplan.DefaultMergePlanOptions
  95. if v, ok := s.config["scorchMergePlanOptions"]; ok {
  96. b, err := json.Marshal(v)
  97. if err != nil {
  98. return &mergePlannerOptions, err
  99. }
  100. err = json.Unmarshal(b, &mergePlannerOptions)
  101. if err != nil {
  102. return &mergePlannerOptions, err
  103. }
  104. err = mergeplan.ValidateMergePlannerOptions(&mergePlannerOptions)
  105. if err != nil {
  106. return nil, err
  107. }
  108. }
  109. return &mergePlannerOptions, nil
  110. }
  111. func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
  112. options *mergeplan.MergePlanOptions) error {
  113. // build list of persisted segments in this snapshot
  114. var onlyPersistedSnapshots []mergeplan.Segment
  115. for _, segmentSnapshot := range ourSnapshot.segment {
  116. if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
  117. onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
  118. }
  119. }
  120. atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
  121. // give this list to the planner
  122. resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
  123. if err != nil {
  124. atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
  125. return fmt.Errorf("merge planning err: %v", err)
  126. }
  127. if resultMergePlan == nil {
  128. // nothing to do
  129. atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
  130. return nil
  131. }
  132. atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
  133. atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
  134. // process tasks in serial for now
  135. var filenames []string
  136. for _, task := range resultMergePlan.Tasks {
  137. if len(task.Segments) == 0 {
  138. atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
  139. continue
  140. }
  141. atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
  142. oldMap := make(map[uint64]*SegmentSnapshot)
  143. newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
  144. segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
  145. docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
  146. for _, planSegment := range task.Segments {
  147. if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
  148. oldMap[segSnapshot.id] = segSnapshot
  149. if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
  150. if segSnapshot.LiveSize() == 0 {
  151. atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
  152. oldMap[segSnapshot.id] = nil
  153. } else {
  154. segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
  155. docsToDrop = append(docsToDrop, segSnapshot.deleted)
  156. }
  157. // track the files getting merged for unsetting the
  158. // removal ineligibility. This helps to unflip files
  159. // even with fast merger, slow persister work flows.
  160. path := persistedSeg.Path()
  161. filenames = append(filenames,
  162. strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
  163. }
  164. }
  165. }
  166. var oldNewDocNums map[uint64][]uint64
  167. var seg segment.Segment
  168. if len(segmentsToMerge) > 0 {
  169. filename := zapFileName(newSegmentID)
  170. s.markIneligibleForRemoval(filename)
  171. path := s.path + string(os.PathSeparator) + filename
  172. fileMergeZapStartTime := time.Now()
  173. atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
  174. newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
  175. s.closeCh, s)
  176. atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
  177. fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
  178. atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
  179. if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
  180. atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
  181. }
  182. if err != nil {
  183. s.unmarkIneligibleForRemoval(filename)
  184. atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
  185. if err == segment.ErrClosed {
  186. return err
  187. }
  188. return fmt.Errorf("merging failed: %v", err)
  189. }
  190. seg, err = s.segPlugin.Open(path)
  191. if err != nil {
  192. s.unmarkIneligibleForRemoval(filename)
  193. atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
  194. return err
  195. }
  196. oldNewDocNums = make(map[uint64][]uint64)
  197. for i, segNewDocNums := range newDocNums {
  198. oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
  199. }
  200. atomic.AddUint64(&s.stats.TotFileMergeSegments, uint64(len(segmentsToMerge)))
  201. }
  202. sm := &segmentMerge{
  203. id: newSegmentID,
  204. old: oldMap,
  205. oldNewDocNums: oldNewDocNums,
  206. new: seg,
  207. notify: make(chan *IndexSnapshot),
  208. }
  209. // give it to the introducer
  210. select {
  211. case <-s.closeCh:
  212. _ = seg.Close()
  213. return segment.ErrClosed
  214. case s.merges <- sm:
  215. atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
  216. }
  217. introStartTime := time.Now()
  218. // it is safe to blockingly wait for the merge introduction
  219. // here as the introducer is bound to handle the notify channel.
  220. newSnapshot := <-sm.notify
  221. introTime := uint64(time.Since(introStartTime))
  222. atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
  223. if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
  224. atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
  225. }
  226. atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
  227. if newSnapshot != nil {
  228. _ = newSnapshot.DecRef()
  229. }
  230. atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
  231. }
  232. // once all the newly merged segment introductions are done,
  233. // its safe to unflip the removal ineligibility for the replaced
  234. // older segments
  235. for _, f := range filenames {
  236. s.unmarkIneligibleForRemoval(f)
  237. }
  238. return nil
  239. }
  240. type segmentMerge struct {
  241. id uint64
  242. old map[uint64]*SegmentSnapshot
  243. oldNewDocNums map[uint64][]uint64
  244. new segment.Segment
  245. notify chan *IndexSnapshot
  246. }
  247. // perform a merging of the given SegmentBase instances into a new,
  248. // persisted segment, and synchronously introduce that new segment
  249. // into the root
  250. func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
  251. sbs []segment.Segment, sbsDrops []*roaring.Bitmap,
  252. sbsIndexes []int) (*IndexSnapshot, uint64, error) {
  253. atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
  254. memMergeZapStartTime := time.Now()
  255. atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
  256. newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
  257. filename := zapFileName(newSegmentID)
  258. path := s.path + string(os.PathSeparator) + filename
  259. newDocNums, _, err :=
  260. s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s)
  261. atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
  262. memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
  263. atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
  264. if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
  265. atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
  266. }
  267. if err != nil {
  268. atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
  269. return nil, 0, err
  270. }
  271. seg, err := s.segPlugin.Open(path)
  272. if err != nil {
  273. atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
  274. return nil, 0, err
  275. }
  276. // update persisted stats
  277. atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
  278. atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
  279. sm := &segmentMerge{
  280. id: newSegmentID,
  281. old: make(map[uint64]*SegmentSnapshot),
  282. oldNewDocNums: make(map[uint64][]uint64),
  283. new: seg,
  284. notify: make(chan *IndexSnapshot),
  285. }
  286. for i, idx := range sbsIndexes {
  287. ss := snapshot.segment[idx]
  288. sm.old[ss.id] = ss
  289. sm.oldNewDocNums[ss.id] = newDocNums[i]
  290. }
  291. select { // send to introducer
  292. case <-s.closeCh:
  293. _ = seg.DecRef()
  294. return nil, 0, segment.ErrClosed
  295. case s.merges <- sm:
  296. }
  297. // blockingly wait for the introduction to complete
  298. newSnapshot := <-sm.notify
  299. if newSnapshot != nil {
  300. atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
  301. atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
  302. }
  303. return newSnapshot, newSegmentID, nil
  304. }
  305. func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
  306. atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
  307. }