You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

freelist.go 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. package bbolt
  2. import (
  3. "fmt"
  4. "sort"
  5. "unsafe"
  6. )
  7. // txPending holds a list of pgids and corresponding allocation txns
  8. // that are pending to be freed.
  9. type txPending struct {
  10. ids []pgid
  11. alloctx []txid // txids allocating the ids
  12. lastReleaseBegin txid // beginning txid of last matching releaseRange
  13. }
  14. // pidSet holds the set of starting pgids which have the same span size
  15. type pidSet map[pgid]struct{}
  16. // freelist represents a list of all pages that are available for allocation.
  17. // It also tracks pages that have been freed but are still in use by open transactions.
  18. type freelist struct {
  19. freelistType FreelistType // freelist type
  20. ids []pgid // all free and available free page ids.
  21. allocs map[pgid]txid // mapping of txid that allocated a pgid.
  22. pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
  23. cache map[pgid]bool // fast lookup of all free and pending page ids.
  24. freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
  25. forwardMap map[pgid]uint64 // key is start pgid, value is its span size
  26. backwardMap map[pgid]uint64 // key is end pgid, value is its span size
  27. allocate func(txid txid, n int) pgid // the freelist allocate func
  28. free_count func() int // the function which gives you free page number
  29. mergeSpans func(ids pgids) // the mergeSpan func
  30. getFreePageIDs func() []pgid // get free pgids func
  31. readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
  32. }
  33. // newFreelist returns an empty, initialized freelist.
  34. func newFreelist(freelistType FreelistType) *freelist {
  35. f := &freelist{
  36. freelistType: freelistType,
  37. allocs: make(map[pgid]txid),
  38. pending: make(map[txid]*txPending),
  39. cache: make(map[pgid]bool),
  40. freemaps: make(map[uint64]pidSet),
  41. forwardMap: make(map[pgid]uint64),
  42. backwardMap: make(map[pgid]uint64),
  43. }
  44. if freelistType == FreelistMapType {
  45. f.allocate = f.hashmapAllocate
  46. f.free_count = f.hashmapFreeCount
  47. f.mergeSpans = f.hashmapMergeSpans
  48. f.getFreePageIDs = f.hashmapGetFreePageIDs
  49. f.readIDs = f.hashmapReadIDs
  50. } else {
  51. f.allocate = f.arrayAllocate
  52. f.free_count = f.arrayFreeCount
  53. f.mergeSpans = f.arrayMergeSpans
  54. f.getFreePageIDs = f.arrayGetFreePageIDs
  55. f.readIDs = f.arrayReadIDs
  56. }
  57. return f
  58. }
  59. // size returns the size of the page after serialization.
  60. func (f *freelist) size() int {
  61. n := f.count()
  62. if n >= 0xFFFF {
  63. // The first element will be used to store the count. See freelist.write.
  64. n++
  65. }
  66. return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
  67. }
  68. // count returns count of pages on the freelist
  69. func (f *freelist) count() int {
  70. return f.free_count() + f.pending_count()
  71. }
  72. // arrayFreeCount returns count of free pages(array version)
  73. func (f *freelist) arrayFreeCount() int {
  74. return len(f.ids)
  75. }
  76. // pending_count returns count of pending pages
  77. func (f *freelist) pending_count() int {
  78. var count int
  79. for _, txp := range f.pending {
  80. count += len(txp.ids)
  81. }
  82. return count
  83. }
  84. // copyall copies into dst a list of all free ids and all pending ids in one sorted list.
  85. // f.count returns the minimum length required for dst.
  86. func (f *freelist) copyall(dst []pgid) {
  87. m := make(pgids, 0, f.pending_count())
  88. for _, txp := range f.pending {
  89. m = append(m, txp.ids...)
  90. }
  91. sort.Sort(m)
  92. mergepgids(dst, f.getFreePageIDs(), m)
  93. }
  94. // arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
  95. // If a contiguous block cannot be found then 0 is returned.
  96. func (f *freelist) arrayAllocate(txid txid, n int) pgid {
  97. if len(f.ids) == 0 {
  98. return 0
  99. }
  100. var initial, previd pgid
  101. for i, id := range f.ids {
  102. if id <= 1 {
  103. panic(fmt.Sprintf("invalid page allocation: %d", id))
  104. }
  105. // Reset initial page if this is not contiguous.
  106. if previd == 0 || id-previd != 1 {
  107. initial = id
  108. }
  109. // If we found a contiguous block then remove it and return it.
  110. if (id-initial)+1 == pgid(n) {
  111. // If we're allocating off the beginning then take the fast path
  112. // and just adjust the existing slice. This will use extra memory
  113. // temporarily but the append() in free() will realloc the slice
  114. // as is necessary.
  115. if (i + 1) == n {
  116. f.ids = f.ids[i+1:]
  117. } else {
  118. copy(f.ids[i-n+1:], f.ids[i+1:])
  119. f.ids = f.ids[:len(f.ids)-n]
  120. }
  121. // Remove from the free cache.
  122. for i := pgid(0); i < pgid(n); i++ {
  123. delete(f.cache, initial+i)
  124. }
  125. f.allocs[initial] = txid
  126. return initial
  127. }
  128. previd = id
  129. }
  130. return 0
  131. }
  132. // free releases a page and its overflow for a given transaction id.
  133. // If the page is already free then a panic will occur.
  134. func (f *freelist) free(txid txid, p *page) {
  135. if p.id <= 1 {
  136. panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
  137. }
  138. // Free page and all its overflow pages.
  139. txp := f.pending[txid]
  140. if txp == nil {
  141. txp = &txPending{}
  142. f.pending[txid] = txp
  143. }
  144. allocTxid, ok := f.allocs[p.id]
  145. if ok {
  146. delete(f.allocs, p.id)
  147. } else if (p.flags & freelistPageFlag) != 0 {
  148. // Freelist is always allocated by prior tx.
  149. allocTxid = txid - 1
  150. }
  151. for id := p.id; id <= p.id+pgid(p.overflow); id++ {
  152. // Verify that page is not already free.
  153. if f.cache[id] {
  154. panic(fmt.Sprintf("page %d already freed", id))
  155. }
  156. // Add to the freelist and cache.
  157. txp.ids = append(txp.ids, id)
  158. txp.alloctx = append(txp.alloctx, allocTxid)
  159. f.cache[id] = true
  160. }
  161. }
  162. // release moves all page ids for a transaction id (or older) to the freelist.
  163. func (f *freelist) release(txid txid) {
  164. m := make(pgids, 0)
  165. for tid, txp := range f.pending {
  166. if tid <= txid {
  167. // Move transaction's pending pages to the available freelist.
  168. // Don't remove from the cache since the page is still free.
  169. m = append(m, txp.ids...)
  170. delete(f.pending, tid)
  171. }
  172. }
  173. f.mergeSpans(m)
  174. }
  175. // releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
  176. func (f *freelist) releaseRange(begin, end txid) {
  177. if begin > end {
  178. return
  179. }
  180. var m pgids
  181. for tid, txp := range f.pending {
  182. if tid < begin || tid > end {
  183. continue
  184. }
  185. // Don't recompute freed pages if ranges haven't updated.
  186. if txp.lastReleaseBegin == begin {
  187. continue
  188. }
  189. for i := 0; i < len(txp.ids); i++ {
  190. if atx := txp.alloctx[i]; atx < begin || atx > end {
  191. continue
  192. }
  193. m = append(m, txp.ids[i])
  194. txp.ids[i] = txp.ids[len(txp.ids)-1]
  195. txp.ids = txp.ids[:len(txp.ids)-1]
  196. txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
  197. txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
  198. i--
  199. }
  200. txp.lastReleaseBegin = begin
  201. if len(txp.ids) == 0 {
  202. delete(f.pending, tid)
  203. }
  204. }
  205. f.mergeSpans(m)
  206. }
  207. // rollback removes the pages from a given pending tx.
  208. func (f *freelist) rollback(txid txid) {
  209. // Remove page ids from cache.
  210. txp := f.pending[txid]
  211. if txp == nil {
  212. return
  213. }
  214. var m pgids
  215. for i, pgid := range txp.ids {
  216. delete(f.cache, pgid)
  217. tx := txp.alloctx[i]
  218. if tx == 0 {
  219. continue
  220. }
  221. if tx != txid {
  222. // Pending free aborted; restore page back to alloc list.
  223. f.allocs[pgid] = tx
  224. } else {
  225. // Freed page was allocated by this txn; OK to throw away.
  226. m = append(m, pgid)
  227. }
  228. }
  229. // Remove pages from pending list and mark as free if allocated by txid.
  230. delete(f.pending, txid)
  231. f.mergeSpans(m)
  232. }
  233. // freed returns whether a given page is in the free list.
  234. func (f *freelist) freed(pgid pgid) bool {
  235. return f.cache[pgid]
  236. }
  237. // read initializes the freelist from a freelist page.
  238. func (f *freelist) read(p *page) {
  239. if (p.flags & freelistPageFlag) == 0 {
  240. panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
  241. }
  242. // If the page.count is at the max uint16 value (64k) then it's considered
  243. // an overflow and the size of the freelist is stored as the first element.
  244. idx, count := 0, int(p.count)
  245. if count == 0xFFFF {
  246. idx = 1
  247. count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
  248. }
  249. // Copy the list of page ids from the freelist.
  250. if count == 0 {
  251. f.ids = nil
  252. } else {
  253. ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
  254. // copy the ids, so we don't modify on the freelist page directly
  255. idsCopy := make([]pgid, count)
  256. copy(idsCopy, ids)
  257. // Make sure they're sorted.
  258. sort.Sort(pgids(idsCopy))
  259. f.readIDs(idsCopy)
  260. }
  261. }
  262. // arrayReadIDs initializes the freelist from a given list of ids.
  263. func (f *freelist) arrayReadIDs(ids []pgid) {
  264. f.ids = ids
  265. f.reindex()
  266. }
  267. func (f *freelist) arrayGetFreePageIDs() []pgid {
  268. return f.ids
  269. }
  270. // write writes the page ids onto a freelist page. All free and pending ids are
  271. // saved to disk since in the event of a program crash, all pending ids will
  272. // become free.
  273. func (f *freelist) write(p *page) error {
  274. // Combine the old free pgids and pgids waiting on an open transaction.
  275. // Update the header flag.
  276. p.flags |= freelistPageFlag
  277. // The page.count can only hold up to 64k elements so if we overflow that
  278. // number then we handle it by putting the size in the first element.
  279. lenids := f.count()
  280. if lenids == 0 {
  281. p.count = uint16(lenids)
  282. } else if lenids < 0xFFFF {
  283. p.count = uint16(lenids)
  284. f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
  285. } else {
  286. p.count = 0xFFFF
  287. ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
  288. f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
  289. }
  290. return nil
  291. }
  292. // reload reads the freelist from a page and filters out pending items.
  293. func (f *freelist) reload(p *page) {
  294. f.read(p)
  295. // Build a cache of only pending pages.
  296. pcache := make(map[pgid]bool)
  297. for _, txp := range f.pending {
  298. for _, pendingID := range txp.ids {
  299. pcache[pendingID] = true
  300. }
  301. }
  302. // Check each page in the freelist and build a new available freelist
  303. // with any pages not in the pending lists.
  304. var a []pgid
  305. for _, id := range f.getFreePageIDs() {
  306. if !pcache[id] {
  307. a = append(a, id)
  308. }
  309. }
  310. f.readIDs(a)
  311. }
  312. // reindex rebuilds the free cache based on available and pending free lists.
  313. func (f *freelist) reindex() {
  314. ids := f.getFreePageIDs()
  315. f.cache = make(map[pgid]bool, len(ids))
  316. for _, id := range ids {
  317. f.cache[id] = true
  318. }
  319. for _, txp := range f.pending {
  320. for _, pendingID := range txp.ids {
  321. f.cache[pendingID] = true
  322. }
  323. }
  324. }
  325. // arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
  326. func (f *freelist) arrayMergeSpans(ids pgids) {
  327. sort.Sort(ids)
  328. f.ids = pgids(f.ids).merge(ids)
  329. }