您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. package bolt
  2. import (
  3. "bytes"
  4. "fmt"
  5. "unsafe"
  6. )
  7. const (
  8. // MaxKeySize is the maximum length of a key, in bytes.
  9. MaxKeySize = 32768
  10. // MaxValueSize is the maximum length of a value, in bytes.
  11. MaxValueSize = (1 << 31) - 2
  12. )
  13. const (
  14. maxUint = ^uint(0)
  15. minUint = 0
  16. maxInt = int(^uint(0) >> 1)
  17. minInt = -maxInt - 1
  18. )
  19. const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
  20. const (
  21. minFillPercent = 0.1
  22. maxFillPercent = 1.0
  23. )
  24. // DefaultFillPercent is the percentage that split pages are filled.
  25. // This value can be changed by setting Bucket.FillPercent.
  26. const DefaultFillPercent = 0.5
  27. // Bucket represents a collection of key/value pairs inside the database.
  28. type Bucket struct {
  29. *bucket
  30. tx *Tx // the associated transaction
  31. buckets map[string]*Bucket // subbucket cache
  32. page *page // inline page reference
  33. rootNode *node // materialized node for the root page.
  34. nodes map[pgid]*node // node cache
  35. // Sets the threshold for filling nodes when they split. By default,
  36. // the bucket will fill to 50% but it can be useful to increase this
  37. // amount if you know that your write workloads are mostly append-only.
  38. //
  39. // This is non-persisted across transactions so it must be set in every Tx.
  40. FillPercent float64
  41. }
  42. // bucket represents the on-file representation of a bucket.
  43. // This is stored as the "value" of a bucket key. If the bucket is small enough,
  44. // then its root page can be stored inline in the "value", after the bucket
  45. // header. In the case of inline buckets, the "root" will be 0.
  46. type bucket struct {
  47. root pgid // page id of the bucket's root-level page
  48. sequence uint64 // monotonically incrementing, used by NextSequence()
  49. }
  50. // newBucket returns a new bucket associated with a transaction.
  51. func newBucket(tx *Tx) Bucket {
  52. var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
  53. if tx.writable {
  54. b.buckets = make(map[string]*Bucket)
  55. b.nodes = make(map[pgid]*node)
  56. }
  57. return b
  58. }
  59. // Tx returns the tx of the bucket.
  60. func (b *Bucket) Tx() *Tx {
  61. return b.tx
  62. }
  63. // Root returns the root of the bucket.
  64. func (b *Bucket) Root() pgid {
  65. return b.root
  66. }
  67. // Writable returns whether the bucket is writable.
  68. func (b *Bucket) Writable() bool {
  69. return b.tx.writable
  70. }
  71. // Cursor creates a cursor associated with the bucket.
  72. // The cursor is only valid as long as the transaction is open.
  73. // Do not use a cursor after the transaction is closed.
  74. func (b *Bucket) Cursor() *Cursor {
  75. // Update transaction statistics.
  76. b.tx.stats.CursorCount++
  77. // Allocate and return a cursor.
  78. return &Cursor{
  79. bucket: b,
  80. stack: make([]elemRef, 0),
  81. }
  82. }
  83. // Bucket retrieves a nested bucket by name.
  84. // Returns nil if the bucket does not exist.
  85. // The bucket instance is only valid for the lifetime of the transaction.
  86. func (b *Bucket) Bucket(name []byte) *Bucket {
  87. if b.buckets != nil {
  88. if child := b.buckets[string(name)]; child != nil {
  89. return child
  90. }
  91. }
  92. // Move cursor to key.
  93. c := b.Cursor()
  94. k, v, flags := c.seek(name)
  95. // Return nil if the key doesn't exist or it is not a bucket.
  96. if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
  97. return nil
  98. }
  99. // Otherwise create a bucket and cache it.
  100. var child = b.openBucket(v)
  101. if b.buckets != nil {
  102. b.buckets[string(name)] = child
  103. }
  104. return child
  105. }
  106. // Helper method that re-interprets a sub-bucket value
  107. // from a parent into a Bucket
  108. func (b *Bucket) openBucket(value []byte) *Bucket {
  109. var child = newBucket(b.tx)
  110. // If unaligned load/stores are broken on this arch and value is
  111. // unaligned simply clone to an aligned byte array.
  112. unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
  113. if unaligned {
  114. value = cloneBytes(value)
  115. }
  116. // If this is a writable transaction then we need to copy the bucket entry.
  117. // Read-only transactions can point directly at the mmap entry.
  118. if b.tx.writable && !unaligned {
  119. child.bucket = &bucket{}
  120. *child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
  121. } else {
  122. child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
  123. }
  124. // Save a reference to the inline page if the bucket is inline.
  125. if child.root == 0 {
  126. child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
  127. }
  128. return &child
  129. }
  130. // CreateBucket creates a new bucket at the given key and returns the new bucket.
  131. // Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
  132. // The bucket instance is only valid for the lifetime of the transaction.
  133. func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
  134. if b.tx.db == nil {
  135. return nil, ErrTxClosed
  136. } else if !b.tx.writable {
  137. return nil, ErrTxNotWritable
  138. } else if len(key) == 0 {
  139. return nil, ErrBucketNameRequired
  140. }
  141. // Move cursor to correct position.
  142. c := b.Cursor()
  143. k, _, flags := c.seek(key)
  144. // Return an error if there is an existing key.
  145. if bytes.Equal(key, k) {
  146. if (flags & bucketLeafFlag) != 0 {
  147. return nil, ErrBucketExists
  148. }
  149. return nil, ErrIncompatibleValue
  150. }
  151. // Create empty, inline bucket.
  152. var bucket = Bucket{
  153. bucket: &bucket{},
  154. rootNode: &node{isLeaf: true},
  155. FillPercent: DefaultFillPercent,
  156. }
  157. var value = bucket.write()
  158. // Insert into node.
  159. key = cloneBytes(key)
  160. c.node().put(key, key, value, 0, bucketLeafFlag)
  161. // Since subbuckets are not allowed on inline buckets, we need to
  162. // dereference the inline page, if it exists. This will cause the bucket
  163. // to be treated as a regular, non-inline bucket for the rest of the tx.
  164. b.page = nil
  165. return b.Bucket(key), nil
  166. }
  167. // CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
  168. // Returns an error if the bucket name is blank, or if the bucket name is too long.
  169. // The bucket instance is only valid for the lifetime of the transaction.
  170. func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
  171. child, err := b.CreateBucket(key)
  172. if err == ErrBucketExists {
  173. return b.Bucket(key), nil
  174. } else if err != nil {
  175. return nil, err
  176. }
  177. return child, nil
  178. }
  179. // DeleteBucket deletes a bucket at the given key.
  180. // Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
  181. func (b *Bucket) DeleteBucket(key []byte) error {
  182. if b.tx.db == nil {
  183. return ErrTxClosed
  184. } else if !b.Writable() {
  185. return ErrTxNotWritable
  186. }
  187. // Move cursor to correct position.
  188. c := b.Cursor()
  189. k, _, flags := c.seek(key)
  190. // Return an error if bucket doesn't exist or is not a bucket.
  191. if !bytes.Equal(key, k) {
  192. return ErrBucketNotFound
  193. } else if (flags & bucketLeafFlag) == 0 {
  194. return ErrIncompatibleValue
  195. }
  196. // Recursively delete all child buckets.
  197. child := b.Bucket(key)
  198. err := child.ForEach(func(k, v []byte) error {
  199. if v == nil {
  200. if err := child.DeleteBucket(k); err != nil {
  201. return fmt.Errorf("delete bucket: %s", err)
  202. }
  203. }
  204. return nil
  205. })
  206. if err != nil {
  207. return err
  208. }
  209. // Remove cached copy.
  210. delete(b.buckets, string(key))
  211. // Release all bucket pages to freelist.
  212. child.nodes = nil
  213. child.rootNode = nil
  214. child.free()
  215. // Delete the node if we have a matching key.
  216. c.node().del(key)
  217. return nil
  218. }
  219. // Get retrieves the value for a key in the bucket.
  220. // Returns a nil value if the key does not exist or if the key is a nested bucket.
  221. // The returned value is only valid for the life of the transaction.
  222. func (b *Bucket) Get(key []byte) []byte {
  223. k, v, flags := b.Cursor().seek(key)
  224. // Return nil if this is a bucket.
  225. if (flags & bucketLeafFlag) != 0 {
  226. return nil
  227. }
  228. // If our target node isn't the same key as what's passed in then return nil.
  229. if !bytes.Equal(key, k) {
  230. return nil
  231. }
  232. return v
  233. }
  234. // Put sets the value for a key in the bucket.
  235. // If the key exist then its previous value will be overwritten.
  236. // Supplied value must remain valid for the life of the transaction.
  237. // Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
  238. func (b *Bucket) Put(key []byte, value []byte) error {
  239. if b.tx.db == nil {
  240. return ErrTxClosed
  241. } else if !b.Writable() {
  242. return ErrTxNotWritable
  243. } else if len(key) == 0 {
  244. return ErrKeyRequired
  245. } else if len(key) > MaxKeySize {
  246. return ErrKeyTooLarge
  247. } else if int64(len(value)) > MaxValueSize {
  248. return ErrValueTooLarge
  249. }
  250. // Move cursor to correct position.
  251. c := b.Cursor()
  252. k, _, flags := c.seek(key)
  253. // Return an error if there is an existing key with a bucket value.
  254. if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
  255. return ErrIncompatibleValue
  256. }
  257. // Insert into node.
  258. key = cloneBytes(key)
  259. c.node().put(key, key, value, 0, 0)
  260. return nil
  261. }
  262. // Delete removes a key from the bucket.
  263. // If the key does not exist then nothing is done and a nil error is returned.
  264. // Returns an error if the bucket was created from a read-only transaction.
  265. func (b *Bucket) Delete(key []byte) error {
  266. if b.tx.db == nil {
  267. return ErrTxClosed
  268. } else if !b.Writable() {
  269. return ErrTxNotWritable
  270. }
  271. // Move cursor to correct position.
  272. c := b.Cursor()
  273. _, _, flags := c.seek(key)
  274. // Return an error if there is already existing bucket value.
  275. if (flags & bucketLeafFlag) != 0 {
  276. return ErrIncompatibleValue
  277. }
  278. // Delete the node if we have a matching key.
  279. c.node().del(key)
  280. return nil
  281. }
  282. // Sequence returns the current integer for the bucket without incrementing it.
  283. func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
  284. // SetSequence updates the sequence number for the bucket.
  285. func (b *Bucket) SetSequence(v uint64) error {
  286. if b.tx.db == nil {
  287. return ErrTxClosed
  288. } else if !b.Writable() {
  289. return ErrTxNotWritable
  290. }
  291. // Materialize the root node if it hasn't been already so that the
  292. // bucket will be saved during commit.
  293. if b.rootNode == nil {
  294. _ = b.node(b.root, nil)
  295. }
  296. // Increment and return the sequence.
  297. b.bucket.sequence = v
  298. return nil
  299. }
  300. // NextSequence returns an autoincrementing integer for the bucket.
  301. func (b *Bucket) NextSequence() (uint64, error) {
  302. if b.tx.db == nil {
  303. return 0, ErrTxClosed
  304. } else if !b.Writable() {
  305. return 0, ErrTxNotWritable
  306. }
  307. // Materialize the root node if it hasn't been already so that the
  308. // bucket will be saved during commit.
  309. if b.rootNode == nil {
  310. _ = b.node(b.root, nil)
  311. }
  312. // Increment and return the sequence.
  313. b.bucket.sequence++
  314. return b.bucket.sequence, nil
  315. }
  316. // ForEach executes a function for each key/value pair in a bucket.
  317. // If the provided function returns an error then the iteration is stopped and
  318. // the error is returned to the caller. The provided function must not modify
  319. // the bucket; this will result in undefined behavior.
  320. func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
  321. if b.tx.db == nil {
  322. return ErrTxClosed
  323. }
  324. c := b.Cursor()
  325. for k, v := c.First(); k != nil; k, v = c.Next() {
  326. if err := fn(k, v); err != nil {
  327. return err
  328. }
  329. }
  330. return nil
  331. }
  332. // Stat returns stats on a bucket.
  333. func (b *Bucket) Stats() BucketStats {
  334. var s, subStats BucketStats
  335. pageSize := b.tx.db.pageSize
  336. s.BucketN += 1
  337. if b.root == 0 {
  338. s.InlineBucketN += 1
  339. }
  340. b.forEachPage(func(p *page, depth int) {
  341. if (p.flags & leafPageFlag) != 0 {
  342. s.KeyN += int(p.count)
  343. // used totals the used bytes for the page
  344. used := pageHeaderSize
  345. if p.count != 0 {
  346. // If page has any elements, add all element headers.
  347. used += leafPageElementSize * int(p.count-1)
  348. // Add all element key, value sizes.
  349. // The computation takes advantage of the fact that the position
  350. // of the last element's key/value equals to the total of the sizes
  351. // of all previous elements' keys and values.
  352. // It also includes the last element's header.
  353. lastElement := p.leafPageElement(p.count - 1)
  354. used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
  355. }
  356. if b.root == 0 {
  357. // For inlined bucket just update the inline stats
  358. s.InlineBucketInuse += used
  359. } else {
  360. // For non-inlined bucket update all the leaf stats
  361. s.LeafPageN++
  362. s.LeafInuse += used
  363. s.LeafOverflowN += int(p.overflow)
  364. // Collect stats from sub-buckets.
  365. // Do that by iterating over all element headers
  366. // looking for the ones with the bucketLeafFlag.
  367. for i := uint16(0); i < p.count; i++ {
  368. e := p.leafPageElement(i)
  369. if (e.flags & bucketLeafFlag) != 0 {
  370. // For any bucket element, open the element value
  371. // and recursively call Stats on the contained bucket.
  372. subStats.Add(b.openBucket(e.value()).Stats())
  373. }
  374. }
  375. }
  376. } else if (p.flags & branchPageFlag) != 0 {
  377. s.BranchPageN++
  378. lastElement := p.branchPageElement(p.count - 1)
  379. // used totals the used bytes for the page
  380. // Add header and all element headers.
  381. used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
  382. // Add size of all keys and values.
  383. // Again, use the fact that last element's position equals to
  384. // the total of key, value sizes of all previous elements.
  385. used += int(lastElement.pos + lastElement.ksize)
  386. s.BranchInuse += used
  387. s.BranchOverflowN += int(p.overflow)
  388. }
  389. // Keep track of maximum page depth.
  390. if depth+1 > s.Depth {
  391. s.Depth = (depth + 1)
  392. }
  393. })
  394. // Alloc stats can be computed from page counts and pageSize.
  395. s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
  396. s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
  397. // Add the max depth of sub-buckets to get total nested depth.
  398. s.Depth += subStats.Depth
  399. // Add the stats for all sub-buckets
  400. s.Add(subStats)
  401. return s
  402. }
  403. // forEachPage iterates over every page in a bucket, including inline pages.
  404. func (b *Bucket) forEachPage(fn func(*page, int)) {
  405. // If we have an inline page then just use that.
  406. if b.page != nil {
  407. fn(b.page, 0)
  408. return
  409. }
  410. // Otherwise traverse the page hierarchy.
  411. b.tx.forEachPage(b.root, 0, fn)
  412. }
  413. // forEachPageNode iterates over every page (or node) in a bucket.
  414. // This also includes inline pages.
  415. func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
  416. // If we have an inline page or root node then just use that.
  417. if b.page != nil {
  418. fn(b.page, nil, 0)
  419. return
  420. }
  421. b._forEachPageNode(b.root, 0, fn)
  422. }
  423. func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
  424. var p, n = b.pageNode(pgid)
  425. // Execute function.
  426. fn(p, n, depth)
  427. // Recursively loop over children.
  428. if p != nil {
  429. if (p.flags & branchPageFlag) != 0 {
  430. for i := 0; i < int(p.count); i++ {
  431. elem := p.branchPageElement(uint16(i))
  432. b._forEachPageNode(elem.pgid, depth+1, fn)
  433. }
  434. }
  435. } else {
  436. if !n.isLeaf {
  437. for _, inode := range n.inodes {
  438. b._forEachPageNode(inode.pgid, depth+1, fn)
  439. }
  440. }
  441. }
  442. }
  443. // spill writes all the nodes for this bucket to dirty pages.
  444. func (b *Bucket) spill() error {
  445. // Spill all child buckets first.
  446. for name, child := range b.buckets {
  447. // If the child bucket is small enough and it has no child buckets then
  448. // write it inline into the parent bucket's page. Otherwise spill it
  449. // like a normal bucket and make the parent value a pointer to the page.
  450. var value []byte
  451. if child.inlineable() {
  452. child.free()
  453. value = child.write()
  454. } else {
  455. if err := child.spill(); err != nil {
  456. return err
  457. }
  458. // Update the child bucket header in this bucket.
  459. value = make([]byte, unsafe.Sizeof(bucket{}))
  460. var bucket = (*bucket)(unsafe.Pointer(&value[0]))
  461. *bucket = *child.bucket
  462. }
  463. // Skip writing the bucket if there are no materialized nodes.
  464. if child.rootNode == nil {
  465. continue
  466. }
  467. // Update parent node.
  468. var c = b.Cursor()
  469. k, _, flags := c.seek([]byte(name))
  470. if !bytes.Equal([]byte(name), k) {
  471. panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
  472. }
  473. if flags&bucketLeafFlag == 0 {
  474. panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
  475. }
  476. c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
  477. }
  478. // Ignore if there's not a materialized root node.
  479. if b.rootNode == nil {
  480. return nil
  481. }
  482. // Spill nodes.
  483. if err := b.rootNode.spill(); err != nil {
  484. return err
  485. }
  486. b.rootNode = b.rootNode.root()
  487. // Update the root node for this bucket.
  488. if b.rootNode.pgid >= b.tx.meta.pgid {
  489. panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
  490. }
  491. b.root = b.rootNode.pgid
  492. return nil
  493. }
  494. // inlineable returns true if a bucket is small enough to be written inline
  495. // and if it contains no subbuckets. Otherwise returns false.
  496. func (b *Bucket) inlineable() bool {
  497. var n = b.rootNode
  498. // Bucket must only contain a single leaf node.
  499. if n == nil || !n.isLeaf {
  500. return false
  501. }
  502. // Bucket is not inlineable if it contains subbuckets or if it goes beyond
  503. // our threshold for inline bucket size.
  504. var size = pageHeaderSize
  505. for _, inode := range n.inodes {
  506. size += leafPageElementSize + len(inode.key) + len(inode.value)
  507. if inode.flags&bucketLeafFlag != 0 {
  508. return false
  509. } else if size > b.maxInlineBucketSize() {
  510. return false
  511. }
  512. }
  513. return true
  514. }
  515. // Returns the maximum total size of a bucket to make it a candidate for inlining.
  516. func (b *Bucket) maxInlineBucketSize() int {
  517. return b.tx.db.pageSize / 4
  518. }
  519. // write allocates and writes a bucket to a byte slice.
  520. func (b *Bucket) write() []byte {
  521. // Allocate the appropriate size.
  522. var n = b.rootNode
  523. var value = make([]byte, bucketHeaderSize+n.size())
  524. // Write a bucket header.
  525. var bucket = (*bucket)(unsafe.Pointer(&value[0]))
  526. *bucket = *b.bucket
  527. // Convert byte slice to a fake page and write the root node.
  528. var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
  529. n.write(p)
  530. return value
  531. }
  532. // rebalance attempts to balance all nodes.
  533. func (b *Bucket) rebalance() {
  534. for _, n := range b.nodes {
  535. n.rebalance()
  536. }
  537. for _, child := range b.buckets {
  538. child.rebalance()
  539. }
  540. }
  541. // node creates a node from a page and associates it with a given parent.
  542. func (b *Bucket) node(pgid pgid, parent *node) *node {
  543. _assert(b.nodes != nil, "nodes map expected")
  544. // Retrieve node if it's already been created.
  545. if n := b.nodes[pgid]; n != nil {
  546. return n
  547. }
  548. // Otherwise create a node and cache it.
  549. n := &node{bucket: b, parent: parent}
  550. if parent == nil {
  551. b.rootNode = n
  552. } else {
  553. parent.children = append(parent.children, n)
  554. }
  555. // Use the inline page if this is an inline bucket.
  556. var p = b.page
  557. if p == nil {
  558. p = b.tx.page(pgid)
  559. }
  560. // Read the page into the node and cache it.
  561. n.read(p)
  562. b.nodes[pgid] = n
  563. // Update statistics.
  564. b.tx.stats.NodeCount++
  565. return n
  566. }
  567. // free recursively frees all pages in the bucket.
  568. func (b *Bucket) free() {
  569. if b.root == 0 {
  570. return
  571. }
  572. var tx = b.tx
  573. b.forEachPageNode(func(p *page, n *node, _ int) {
  574. if p != nil {
  575. tx.db.freelist.free(tx.meta.txid, p)
  576. } else {
  577. n.free()
  578. }
  579. })
  580. b.root = 0
  581. }
  582. // dereference removes all references to the old mmap.
  583. func (b *Bucket) dereference() {
  584. if b.rootNode != nil {
  585. b.rootNode.root().dereference()
  586. }
  587. for _, child := range b.buckets {
  588. child.dereference()
  589. }
  590. }
  591. // pageNode returns the in-memory node, if it exists.
  592. // Otherwise returns the underlying page.
  593. func (b *Bucket) pageNode(id pgid) (*page, *node) {
  594. // Inline buckets have a fake page embedded in their value so treat them
  595. // differently. We'll return the rootNode (if available) or the fake page.
  596. if b.root == 0 {
  597. if id != 0 {
  598. panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
  599. }
  600. if b.rootNode != nil {
  601. return nil, b.rootNode
  602. }
  603. return b.page, nil
  604. }
  605. // Check the node cache for non-inline buckets.
  606. if b.nodes != nil {
  607. if n := b.nodes[id]; n != nil {
  608. return nil, n
  609. }
  610. }
  611. // Finally lookup the page from the transaction if no node is materialized.
  612. return b.tx.page(id), nil
  613. }
  614. // BucketStats records statistics about resources used by a bucket.
  615. type BucketStats struct {
  616. // Page count statistics.
  617. BranchPageN int // number of logical branch pages
  618. BranchOverflowN int // number of physical branch overflow pages
  619. LeafPageN int // number of logical leaf pages
  620. LeafOverflowN int // number of physical leaf overflow pages
  621. // Tree statistics.
  622. KeyN int // number of keys/value pairs
  623. Depth int // number of levels in B+tree
  624. // Page size utilization.
  625. BranchAlloc int // bytes allocated for physical branch pages
  626. BranchInuse int // bytes actually used for branch data
  627. LeafAlloc int // bytes allocated for physical leaf pages
  628. LeafInuse int // bytes actually used for leaf data
  629. // Bucket statistics
  630. BucketN int // total number of buckets including the top bucket
  631. InlineBucketN int // total number on inlined buckets
  632. InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
  633. }
  634. func (s *BucketStats) Add(other BucketStats) {
  635. s.BranchPageN += other.BranchPageN
  636. s.BranchOverflowN += other.BranchOverflowN
  637. s.LeafPageN += other.LeafPageN
  638. s.LeafOverflowN += other.LeafOverflowN
  639. s.KeyN += other.KeyN
  640. if s.Depth < other.Depth {
  641. s.Depth = other.Depth
  642. }
  643. s.BranchAlloc += other.BranchAlloc
  644. s.BranchInuse += other.BranchInuse
  645. s.LeafAlloc += other.LeafAlloc
  646. s.LeafInuse += other.LeafInuse
  647. s.BucketN += other.BucketN
  648. s.InlineBucketN += other.InlineBucketN
  649. s.InlineBucketInuse += other.InlineBucketInuse
  650. }
  651. // cloneBytes returns a copy of a given slice.
  652. func cloneBytes(v []byte) []byte {
  653. var clone = make([]byte, len(v))
  654. copy(clone, v)
  655. return clone
  656. }