You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

freelist.go 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. package bbolt
  2. import (
  3. "fmt"
  4. "reflect"
  5. "sort"
  6. "unsafe"
  7. )
  8. // txPending holds a list of pgids and corresponding allocation txns
  9. // that are pending to be freed.
  10. type txPending struct {
  11. ids []pgid
  12. alloctx []txid // txids allocating the ids
  13. lastReleaseBegin txid // beginning txid of last matching releaseRange
  14. }
  15. // pidSet holds the set of starting pgids which have the same span size
  16. type pidSet map[pgid]struct{}
  17. // freelist represents a list of all pages that are available for allocation.
  18. // It also tracks pages that have been freed but are still in use by open transactions.
  19. type freelist struct {
  20. freelistType FreelistType // freelist type
  21. ids []pgid // all free and available free page ids.
  22. allocs map[pgid]txid // mapping of txid that allocated a pgid.
  23. pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
  24. cache map[pgid]bool // fast lookup of all free and pending page ids.
  25. freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
  26. forwardMap map[pgid]uint64 // key is start pgid, value is its span size
  27. backwardMap map[pgid]uint64 // key is end pgid, value is its span size
  28. allocate func(txid txid, n int) pgid // the freelist allocate func
  29. free_count func() int // the function which gives you free page number
  30. mergeSpans func(ids pgids) // the mergeSpan func
  31. getFreePageIDs func() []pgid // get free pgids func
  32. readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
  33. }
  34. // newFreelist returns an empty, initialized freelist.
  35. func newFreelist(freelistType FreelistType) *freelist {
  36. f := &freelist{
  37. freelistType: freelistType,
  38. allocs: make(map[pgid]txid),
  39. pending: make(map[txid]*txPending),
  40. cache: make(map[pgid]bool),
  41. freemaps: make(map[uint64]pidSet),
  42. forwardMap: make(map[pgid]uint64),
  43. backwardMap: make(map[pgid]uint64),
  44. }
  45. if freelistType == FreelistMapType {
  46. f.allocate = f.hashmapAllocate
  47. f.free_count = f.hashmapFreeCount
  48. f.mergeSpans = f.hashmapMergeSpans
  49. f.getFreePageIDs = f.hashmapGetFreePageIDs
  50. f.readIDs = f.hashmapReadIDs
  51. } else {
  52. f.allocate = f.arrayAllocate
  53. f.free_count = f.arrayFreeCount
  54. f.mergeSpans = f.arrayMergeSpans
  55. f.getFreePageIDs = f.arrayGetFreePageIDs
  56. f.readIDs = f.arrayReadIDs
  57. }
  58. return f
  59. }
  60. // size returns the size of the page after serialization.
  61. func (f *freelist) size() int {
  62. n := f.count()
  63. if n >= 0xFFFF {
  64. // The first element will be used to store the count. See freelist.write.
  65. n++
  66. }
  67. return int(pageHeaderSize) + (int(unsafe.Sizeof(pgid(0))) * n)
  68. }
  69. // count returns count of pages on the freelist
  70. func (f *freelist) count() int {
  71. return f.free_count() + f.pending_count()
  72. }
  73. // arrayFreeCount returns count of free pages(array version)
  74. func (f *freelist) arrayFreeCount() int {
  75. return len(f.ids)
  76. }
  77. // pending_count returns count of pending pages
  78. func (f *freelist) pending_count() int {
  79. var count int
  80. for _, txp := range f.pending {
  81. count += len(txp.ids)
  82. }
  83. return count
  84. }
  85. // copyallunsafe copies a list of all free ids and all pending ids in one sorted list.
  86. // f.count returns the minimum length required for dst.
  87. func (f *freelist) copyallunsafe(dstptr unsafe.Pointer) { // dstptr is []pgid data pointer
  88. m := make(pgids, 0, f.pending_count())
  89. for _, txp := range f.pending {
  90. m = append(m, txp.ids...)
  91. }
  92. sort.Sort(m)
  93. fpgids := f.getFreePageIDs()
  94. sz := len(fpgids) + len(m)
  95. dst := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
  96. Data: uintptr(dstptr),
  97. Len: sz,
  98. Cap: sz,
  99. }))
  100. mergepgids(dst, fpgids, m)
  101. }
  102. func (f *freelist) copyall(dst []pgid) {
  103. m := make(pgids, 0, f.pending_count())
  104. for _, txp := range f.pending {
  105. m = append(m, txp.ids...)
  106. }
  107. sort.Sort(m)
  108. mergepgids(dst, f.getFreePageIDs(), m)
  109. }
  110. // arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
  111. // If a contiguous block cannot be found then 0 is returned.
  112. func (f *freelist) arrayAllocate(txid txid, n int) pgid {
  113. if len(f.ids) == 0 {
  114. return 0
  115. }
  116. var initial, previd pgid
  117. for i, id := range f.ids {
  118. if id <= 1 {
  119. panic(fmt.Sprintf("invalid page allocation: %d", id))
  120. }
  121. // Reset initial page if this is not contiguous.
  122. if previd == 0 || id-previd != 1 {
  123. initial = id
  124. }
  125. // If we found a contiguous block then remove it and return it.
  126. if (id-initial)+1 == pgid(n) {
  127. // If we're allocating off the beginning then take the fast path
  128. // and just adjust the existing slice. This will use extra memory
  129. // temporarily but the append() in free() will realloc the slice
  130. // as is necessary.
  131. if (i + 1) == n {
  132. f.ids = f.ids[i+1:]
  133. } else {
  134. copy(f.ids[i-n+1:], f.ids[i+1:])
  135. f.ids = f.ids[:len(f.ids)-n]
  136. }
  137. // Remove from the free cache.
  138. for i := pgid(0); i < pgid(n); i++ {
  139. delete(f.cache, initial+i)
  140. }
  141. f.allocs[initial] = txid
  142. return initial
  143. }
  144. previd = id
  145. }
  146. return 0
  147. }
  148. // free releases a page and its overflow for a given transaction id.
  149. // If the page is already free then a panic will occur.
  150. func (f *freelist) free(txid txid, p *page) {
  151. if p.id <= 1 {
  152. panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
  153. }
  154. // Free page and all its overflow pages.
  155. txp := f.pending[txid]
  156. if txp == nil {
  157. txp = &txPending{}
  158. f.pending[txid] = txp
  159. }
  160. allocTxid, ok := f.allocs[p.id]
  161. if ok {
  162. delete(f.allocs, p.id)
  163. } else if (p.flags & freelistPageFlag) != 0 {
  164. // Freelist is always allocated by prior tx.
  165. allocTxid = txid - 1
  166. }
  167. for id := p.id; id <= p.id+pgid(p.overflow); id++ {
  168. // Verify that page is not already free.
  169. if f.cache[id] {
  170. panic(fmt.Sprintf("page %d already freed", id))
  171. }
  172. // Add to the freelist and cache.
  173. txp.ids = append(txp.ids, id)
  174. txp.alloctx = append(txp.alloctx, allocTxid)
  175. f.cache[id] = true
  176. }
  177. }
  178. // release moves all page ids for a transaction id (or older) to the freelist.
  179. func (f *freelist) release(txid txid) {
  180. m := make(pgids, 0)
  181. for tid, txp := range f.pending {
  182. if tid <= txid {
  183. // Move transaction's pending pages to the available freelist.
  184. // Don't remove from the cache since the page is still free.
  185. m = append(m, txp.ids...)
  186. delete(f.pending, tid)
  187. }
  188. }
  189. f.mergeSpans(m)
  190. }
  191. // releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
  192. func (f *freelist) releaseRange(begin, end txid) {
  193. if begin > end {
  194. return
  195. }
  196. var m pgids
  197. for tid, txp := range f.pending {
  198. if tid < begin || tid > end {
  199. continue
  200. }
  201. // Don't recompute freed pages if ranges haven't updated.
  202. if txp.lastReleaseBegin == begin {
  203. continue
  204. }
  205. for i := 0; i < len(txp.ids); i++ {
  206. if atx := txp.alloctx[i]; atx < begin || atx > end {
  207. continue
  208. }
  209. m = append(m, txp.ids[i])
  210. txp.ids[i] = txp.ids[len(txp.ids)-1]
  211. txp.ids = txp.ids[:len(txp.ids)-1]
  212. txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
  213. txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
  214. i--
  215. }
  216. txp.lastReleaseBegin = begin
  217. if len(txp.ids) == 0 {
  218. delete(f.pending, tid)
  219. }
  220. }
  221. f.mergeSpans(m)
  222. }
  223. // rollback removes the pages from a given pending tx.
  224. func (f *freelist) rollback(txid txid) {
  225. // Remove page ids from cache.
  226. txp := f.pending[txid]
  227. if txp == nil {
  228. return
  229. }
  230. var m pgids
  231. for i, pgid := range txp.ids {
  232. delete(f.cache, pgid)
  233. tx := txp.alloctx[i]
  234. if tx == 0 {
  235. continue
  236. }
  237. if tx != txid {
  238. // Pending free aborted; restore page back to alloc list.
  239. f.allocs[pgid] = tx
  240. } else {
  241. // Freed page was allocated by this txn; OK to throw away.
  242. m = append(m, pgid)
  243. }
  244. }
  245. // Remove pages from pending list and mark as free if allocated by txid.
  246. delete(f.pending, txid)
  247. f.mergeSpans(m)
  248. }
  249. // freed returns whether a given page is in the free list.
  250. func (f *freelist) freed(pgid pgid) bool {
  251. return f.cache[pgid]
  252. }
  253. // read initializes the freelist from a freelist page.
  254. func (f *freelist) read(p *page) {
  255. if (p.flags & freelistPageFlag) == 0 {
  256. panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
  257. }
  258. // If the page.count is at the max uint16 value (64k) then it's considered
  259. // an overflow and the size of the freelist is stored as the first element.
  260. var idx, count uintptr = 0, uintptr(p.count)
  261. if count == 0xFFFF {
  262. idx = 1
  263. count = uintptr(*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))))
  264. }
  265. // Copy the list of page ids from the freelist.
  266. if count == 0 {
  267. f.ids = nil
  268. } else {
  269. ids := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
  270. Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + idx*unsafe.Sizeof(pgid(0)),
  271. Len: int(count),
  272. Cap: int(count),
  273. }))
  274. // copy the ids, so we don't modify on the freelist page directly
  275. idsCopy := make([]pgid, count)
  276. copy(idsCopy, ids)
  277. // Make sure they're sorted.
  278. sort.Sort(pgids(idsCopy))
  279. f.readIDs(idsCopy)
  280. }
  281. }
  282. // arrayReadIDs initializes the freelist from a given list of ids.
  283. func (f *freelist) arrayReadIDs(ids []pgid) {
  284. f.ids = ids
  285. f.reindex()
  286. }
  287. func (f *freelist) arrayGetFreePageIDs() []pgid {
  288. return f.ids
  289. }
  290. // write writes the page ids onto a freelist page. All free and pending ids are
  291. // saved to disk since in the event of a program crash, all pending ids will
  292. // become free.
  293. func (f *freelist) write(p *page) error {
  294. // Combine the old free pgids and pgids waiting on an open transaction.
  295. // Update the header flag.
  296. p.flags |= freelistPageFlag
  297. // The page.count can only hold up to 64k elements so if we overflow that
  298. // number then we handle it by putting the size in the first element.
  299. lenids := f.count()
  300. if lenids == 0 {
  301. p.count = uint16(lenids)
  302. } else if lenids < 0xFFFF {
  303. p.count = uint16(lenids)
  304. f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))
  305. } else {
  306. p.count = 0xFFFF
  307. *(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) = pgid(lenids)
  308. f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + unsafe.Sizeof(pgid(0))))
  309. }
  310. return nil
  311. }
  312. // reload reads the freelist from a page and filters out pending items.
  313. func (f *freelist) reload(p *page) {
  314. f.read(p)
  315. // Build a cache of only pending pages.
  316. pcache := make(map[pgid]bool)
  317. for _, txp := range f.pending {
  318. for _, pendingID := range txp.ids {
  319. pcache[pendingID] = true
  320. }
  321. }
  322. // Check each page in the freelist and build a new available freelist
  323. // with any pages not in the pending lists.
  324. var a []pgid
  325. for _, id := range f.getFreePageIDs() {
  326. if !pcache[id] {
  327. a = append(a, id)
  328. }
  329. }
  330. f.readIDs(a)
  331. }
  332. // noSyncReload reads the freelist from pgids and filters out pending items.
  333. func (f *freelist) noSyncReload(pgids []pgid) {
  334. // Build a cache of only pending pages.
  335. pcache := make(map[pgid]bool)
  336. for _, txp := range f.pending {
  337. for _, pendingID := range txp.ids {
  338. pcache[pendingID] = true
  339. }
  340. }
  341. // Check each page in the freelist and build a new available freelist
  342. // with any pages not in the pending lists.
  343. var a []pgid
  344. for _, id := range pgids {
  345. if !pcache[id] {
  346. a = append(a, id)
  347. }
  348. }
  349. f.readIDs(a)
  350. }
  351. // reindex rebuilds the free cache based on available and pending free lists.
  352. func (f *freelist) reindex() {
  353. ids := f.getFreePageIDs()
  354. f.cache = make(map[pgid]bool, len(ids))
  355. for _, id := range ids {
  356. f.cache[id] = true
  357. }
  358. for _, txp := range f.pending {
  359. for _, pendingID := range txp.ids {
  360. f.cache[pendingID] = true
  361. }
  362. }
  363. }
  364. // arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
  365. func (f *freelist) arrayMergeSpans(ids pgids) {
  366. sort.Sort(ids)
  367. f.ids = pgids(f.ids).merge(ids)
  368. }