27 files changed, 5025 insertions, 0 deletions
diff --git a/vendor/github.com/etcd-io/bbolt/LICENSE b/vendor/github.com/etcd-io/bbolt/LICENSE
new file mode 100644
index 0000000000..004e77fe5d
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/LICENSE
@@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Ben Johnson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_386.go b/vendor/github.com/etcd-io/bbolt/bolt_386.go
new file mode 100644
index 0000000000..4d35ee7cf3
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_386.go
@@ -0,0 +1,10 @@
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x7FFFFFFF // 2GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0xFFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_amd64.go b/vendor/github.com/etcd-io/bbolt/bolt_amd64.go
new file mode 100644
index 0000000000..60a52dad56
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_amd64.go
@@ -0,0 +1,10 @@
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_arm.go b/vendor/github.com/etcd-io/bbolt/bolt_arm.go
new file mode 100644
index 0000000000..105d27ddb7
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_arm.go
@@ -0,0 +1,28 @@
+package bbolt
+
+import "unsafe"
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x7FFFFFFF // 2GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0xFFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned bool
+
+func init() {
+	// Simple check to see whether this arch handles unaligned load/stores
+	// correctly.
+
+	// ARM9 and older devices require load/stores to be from/to aligned
+	// addresses. If not, the lower 2 bits are cleared and that address is
+	// read in a jumbled up order.
+
+	// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html
+
+	raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11}
+	val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2))
+
+	brokenUnaligned = val != 0x11222211
+}
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_arm64.go b/vendor/github.com/etcd-io/bbolt/bolt_arm64.go
new file mode 100644
index 0000000000..f5aa2a5ee2
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_arm64.go
@@ -0,0 +1,12 @@
+// +build arm64
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_linux.go b/vendor/github.com/etcd-io/bbolt/bolt_linux.go
new file mode 100644
index 0000000000..7707bcacf0
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_linux.go
@@ -0,0 +1,10 @@
+package bbolt
+
+import (
+	"syscall"
+)
+
+// fdatasync flushes written data to a file descriptor.
+func fdatasync(db *DB) error {
+	return syscall.Fdatasync(int(db.file.Fd()))
+}
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_mips64x.go b/vendor/github.com/etcd-io/bbolt/bolt_mips64x.go
new file mode 100644
index 0000000000..baeb289fd9
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_mips64x.go
@@ -0,0 +1,12 @@
+// +build mips64 mips64le
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x8000000000 // 512GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_mipsx.go b/vendor/github.com/etcd-io/bbolt/bolt_mipsx.go
new file mode 100644
index 0000000000..2d9b1a91f3
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_mipsx.go
@@ -0,0 +1,12 @@
+// +build mips mipsle
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x40000000 // 1GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0xFFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_openbsd.go b/vendor/github.com/etcd-io/bbolt/bolt_openbsd.go
new file mode 100644
index 0000000000..d7f50358ef
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_openbsd.go
@@ -0,0 +1,27 @@
+package bbolt
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+const (
+	msAsync      = 1 << iota // perform asynchronous writes
+	msSync                   // perform synchronous writes
+	msInvalidate             // invalidate cached data
+)
+
+func msync(db *DB) error {
+	_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+func fdatasync(db *DB) error {
+	if db.data != nil {
+		return msync(db)
+	}
+	return db.file.Sync()
+}
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_ppc.go b/vendor/github.com/etcd-io/bbolt/bolt_ppc.go
new file mode 100644
index 0000000000..69804714aa
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_ppc.go
@@ -0,0 +1,12 @@
+// +build ppc
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x7FFFFFFF // 2GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0xFFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_ppc64.go b/vendor/github.com/etcd-io/bbolt/bolt_ppc64.go
new file mode 100644
index 0000000000..3565908576
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_ppc64.go
@@ -0,0 +1,12 @@
+// +build ppc64
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_ppc64le.go b/vendor/github.com/etcd-io/bbolt/bolt_ppc64le.go
new file mode 100644
index 0000000000..422c7c69d6
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_ppc64le.go
@@ -0,0 +1,12 @@
+// +build ppc64le
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_s390x.go b/vendor/github.com/etcd-io/bbolt/bolt_s390x.go
new file mode 100644
index 0000000000..6d3fcb825d
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_s390x.go
@@ -0,0 +1,12 @@
+// +build s390x
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_unix.go b/vendor/github.com/etcd-io/bbolt/bolt_unix.go
new file mode 100644
index 0000000000..5f2bb51451
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_unix.go
@@ -0,0 +1,93 @@
+// +build !windows,!plan9,!solaris
+
+package bbolt
+
+import (
+	"fmt"
+	"syscall"
+	"time"
+	"unsafe"
+)
+
+// flock acquires an advisory lock on a file descriptor.
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
+	var t time.Time
+	if timeout != 0 {
+		t = time.Now()
+	}
+	fd := db.file.Fd()
+	flag := syscall.LOCK_NB
+	if exclusive {
+		flag |= syscall.LOCK_EX
+	} else {
+		flag |= syscall.LOCK_SH
+	}
+	for {
+		// Attempt to obtain an exclusive lock.
+		err := syscall.Flock(int(fd), flag)
+		if err == nil {
+			return nil
+		} else if err != syscall.EWOULDBLOCK {
+			return err
+		}
+
+		// If we timed out then return an error.
+		if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+			return ErrTimeout
+		}
+
+		// Wait for a bit and try again.
+		time.Sleep(flockRetryTimeout)
+	}
+}
+
+// funlock releases an advisory lock on a file descriptor.
+func funlock(db *DB) error {
+	return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
+}
+
+// mmap memory maps a DB's data file.
+func mmap(db *DB, sz int) error {
+	// Map the data file to memory.
+	b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
+	if err != nil {
+		return err
+	}
+
+	// Advise the kernel that the mmap is accessed randomly.
+	err = madvise(b, syscall.MADV_RANDOM)
+	if err != nil && err != syscall.ENOSYS {
+		// Ignore not implemented error in kernel because it still works.
+		return fmt.Errorf("madvise: %s", err)
+	}
+
+	// Save the original byte slice and convert to a byte array pointer.
+	db.dataref = b
+	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
+	db.datasz = sz
+	return nil
+}
+
+// munmap unmaps a DB's data file from memory.
+func munmap(db *DB) error {
+	// Ignore the unmap if we have no mapped data.
+	if db.dataref == nil {
+		return nil
+	}
+
+	// Unmap using the original byte slice.
+	err := syscall.Munmap(db.dataref)
+	db.dataref = nil
+	db.data = nil
+	db.datasz = 0
+	return err
+}
+
+// NOTE: This function is copied from stdlib because it is not available on darwin.
+func madvise(b []byte, advice int) (err error) {
+	_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
+	if e1 != 0 {
+		err = e1
+	}
+	return
+}
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_unix_solaris.go b/vendor/github.com/etcd-io/bbolt/bolt_unix_solaris.go
new file mode 100644
index 0000000000..babad65786
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_unix_solaris.go
@@ -0,0 +1,88 @@
+package bbolt
+
+import (
+	"fmt"
+	"syscall"
+	"time"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+// flock acquires an advisory lock on a file descriptor.
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
+	var t time.Time
+	if timeout != 0 {
+		t = time.Now()
+	}
+	fd := db.file.Fd()
+	var lockType int16
+	if exclusive {
+		lockType = syscall.F_WRLCK
+	} else {
+		lockType = syscall.F_RDLCK
+	}
+	for {
+		// Attempt to obtain an exclusive lock.
+		lock := syscall.Flock_t{Type: lockType}
+		err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
+		if err == nil {
+			return nil
+		} else if err != syscall.EAGAIN {
+			return err
+		}
+
+		// If we timed out then return an error.
+		if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+			return ErrTimeout
+		}
+
+		// Wait for a bit and try again.
+		time.Sleep(flockRetryTimeout)
+	}
+}
+
+// funlock releases an advisory lock on a file descriptor.
+func funlock(db *DB) error {
+	var lock syscall.Flock_t
+	lock.Start = 0
+	lock.Len = 0
+	lock.Type = syscall.F_UNLCK
+	lock.Whence = 0
+	return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
+}
+
+// mmap memory maps a DB's data file.
+func mmap(db *DB, sz int) error {
+	// Map the data file to memory.
+	b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
+	if err != nil {
+		return err
+	}
+
+	// Advise the kernel that the mmap is accessed randomly.
+	if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
+		return fmt.Errorf("madvise: %s", err)
+	}
+
+	// Save the original byte slice and convert to a byte array pointer.
+	db.dataref = b
+	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
+	db.datasz = sz
+	return nil
+}
+
+// munmap unmaps a DB's data file from memory.
+func munmap(db *DB) error {
+	// Ignore the unmap if we have no mapped data.
+	if db.dataref == nil {
+		return nil
+	}
+
+	// Unmap using the original byte slice.
+	err := unix.Munmap(db.dataref)
+	db.dataref = nil
+	db.data = nil
+	db.datasz = 0
+	return err
+}
diff --git a/vendor/github.com/etcd-io/bbolt/bolt_windows.go b/vendor/github.com/etcd-io/bbolt/bolt_windows.go
new file mode 100644
index 0000000000..fca178bd29
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bolt_windows.go
@@ -0,0 +1,141 @@
+package bbolt
+
+import (
+	"fmt"
+	"os"
+	"syscall"
+	"time"
+	"unsafe"
+)
+
+// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
+var (
+	modkernel32      = syscall.NewLazyDLL("kernel32.dll")
+	procLockFileEx   = modkernel32.NewProc("LockFileEx")
+	procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
+)
+
+const (
+	// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
+	flagLockExclusive       = 2
+	flagLockFailImmediately = 1
+
+	// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
+	errLockViolation syscall.Errno = 0x21
+)
+
+func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
+	r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
+	if r == 0 {
+		return err
+	}
+	return nil
+}
+
+func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
+	r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
+	if r == 0 {
+		return err
+	}
+	return nil
+}
+
+// fdatasync flushes written data to a file descriptor.
+func fdatasync(db *DB) error {
+	return db.file.Sync()
+}
+
+// flock acquires an advisory lock on a file descriptor.
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
+	var t time.Time
+	if timeout != 0 {
+		t = time.Now()
+	}
+	var flag uint32 = flagLockFailImmediately
+	if exclusive {
+		flag |= flagLockExclusive
+	}
+	for {
+		// Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range
+		// -1..0 as the lock on the database file.
+		var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
+		err := lockFileEx(syscall.Handle(db.file.Fd()), flag, 0, 1, 0, &syscall.Overlapped{
+			Offset:     m1,
+			OffsetHigh: m1,
+		})
+
+		if err == nil {
+			return nil
+		} else if err != errLockViolation {
+			return err
+		}
+
+		// If we timed oumercit then return an error.
+		if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+			return ErrTimeout
+		}
+
+		// Wait for a bit and try again.
+		time.Sleep(flockRetryTimeout)
+	}
+}
+
+// funlock releases an advisory lock on a file descriptor.
+func funlock(db *DB) error {
+	var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
+	err := unlockFileEx(syscall.Handle(db.file.Fd()), 0, 1, 0, &syscall.Overlapped{
+		Offset:     m1,
+		OffsetHigh: m1,
+	})
+	return err
+}
+
+// mmap memory maps a DB's data file.
+// Based on: https://github.com/edsrzf/mmap-go
+func mmap(db *DB, sz int) error {
+	if !db.readOnly {
+		// Truncate the database to the size of the mmap.
+		if err := db.file.Truncate(int64(sz)); err != nil {
+			return fmt.Errorf("truncate: %s", err)
+		}
+	}
+
+	// Open a file mapping handle.
+	sizelo := uint32(sz >> 32)
+	sizehi := uint32(sz) & 0xffffffff
+	h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil)
+	if h == 0 {
+		return os.NewSyscallError("CreateFileMapping", errno)
+	}
+
+	// Create the memory map.
+	addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz))
+	if addr == 0 {
+		return os.NewSyscallError("MapViewOfFile", errno)
+	}
+
+	// Close mapping handle.
+	if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
+		return os.NewSyscallError("CloseHandle", err)
+	}
+
+	// Convert to a byte array.
+	db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
+	db.datasz = sz
+
+	return nil
+}
+
+// munmap unmaps a pointer from a file.
+// Based on: https://github.com/edsrzf/mmap-go
+func munmap(db *DB) error {
+	if db.data == nil {
+		return nil
+	}
+
+	addr := (uintptr)(unsafe.Pointer(&db.data[0]))
+	if err := syscall.UnmapViewOfFile(addr); err != nil {
+		return os.NewSyscallError("UnmapViewOfFile", err)
+	}
+	return nil
+}
diff --git a/vendor/github.com/etcd-io/bbolt/boltsync_unix.go b/vendor/github.com/etcd-io/bbolt/boltsync_unix.go
new file mode 100644
index 0000000000..9587afefee
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/boltsync_unix.go
@@ -0,0 +1,8 @@
+// +build !windows,!plan9,!linux,!openbsd
+
+package bbolt
+
+// fdatasync flushes written data to a file descriptor.
+func fdatasync(db *DB) error {
+	return db.file.Sync()
+}
diff --git a/vendor/github.com/etcd-io/bbolt/bucket.go b/vendor/github.com/etcd-io/bbolt/bucket.go
new file mode 100644
index 0000000000..84bfd4d6a2
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/bucket.go
@@ -0,0 +1,775 @@
+package bbolt
+
+import (
+	"bytes"
+	"fmt"
+	"unsafe"
+)
+
+const (
+	// MaxKeySize is the maximum length of a key, in bytes.
+	MaxKeySize = 32768
+
+	// MaxValueSize is the maximum length of a value, in bytes.
+	MaxValueSize = (1 << 31) - 2
+)
+
+const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
+
+const (
+	minFillPercent = 0.1
+	maxFillPercent = 1.0
+)
+
+// DefaultFillPercent is the percentage that split pages are filled.
+// This value can be changed by setting Bucket.FillPercent.
+const DefaultFillPercent = 0.5
+
+// Bucket represents a collection of key/value pairs inside the database.
+type Bucket struct {
+	*bucket
+	tx       *Tx                // the associated transaction
+	buckets  map[string]*Bucket // subbucket cache
+	page     *page              // inline page reference
+	rootNode *node              // materialized node for the root page.
+	nodes    map[pgid]*node     // node cache
+
+	// Sets the threshold for filling nodes when they split. By default,
+	// the bucket will fill to 50% but it can be useful to increase this
+	// amount if you know that your write workloads are mostly append-only.
+	//
+	// This is non-persisted across transactions so it must be set in every Tx.
+	FillPercent float64
+}
+
+// bucket represents the on-file representation of a bucket.
+// This is stored as the "value" of a bucket key. If the bucket is small enough,
+// then its root page can be stored inline in the "value", after the bucket
+// header. In the case of inline buckets, the "root" will be 0.
+type bucket struct {
+	root     pgid   // page id of the bucket's root-level page
+	sequence uint64 // monotonically incrementing, used by NextSequence()
+}
+
+// newBucket returns a new bucket associated with a transaction.
+func newBucket(tx *Tx) Bucket {
+	var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
+	if tx.writable {
+		b.buckets = make(map[string]*Bucket)
+		b.nodes = make(map[pgid]*node)
+	}
+	return b
+}
+
+// Tx returns the tx of the bucket.
+func (b *Bucket) Tx() *Tx {
+	return b.tx
+}
+
+// Root returns the root of the bucket.
+func (b *Bucket) Root() pgid {
+	return b.root
+}
+
+// Writable returns whether the bucket is writable.
+func (b *Bucket) Writable() bool {
+	return b.tx.writable
+}
+
+// Cursor creates a cursor associated with the bucket.
+// The cursor is only valid as long as the transaction is open.
+// Do not use a cursor after the transaction is closed.
+func (b *Bucket) Cursor() *Cursor {
+	// Update transaction statistics.
+	b.tx.stats.CursorCount++
+
+	// Allocate and return a cursor.
+	return &Cursor{
+		bucket: b,
+		stack:  make([]elemRef, 0),
+	}
+}
+
+// Bucket retrieves a nested bucket by name.
+// Returns nil if the bucket does not exist.
+// The bucket instance is only valid for the lifetime of the transaction.
+func (b *Bucket) Bucket(name []byte) *Bucket {
+	if b.buckets != nil {
+		if child := b.buckets[string(name)]; child != nil {
+			return child
+		}
+	}
+
+	// Move cursor to key.
+	c := b.Cursor()
+	k, v, flags := c.seek(name)
+
+	// Return nil if the key doesn't exist or it is not a bucket.
+	if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
+		return nil
+	}
+
+	// Otherwise create a bucket and cache it.
+	var child = b.openBucket(v)
+	if b.buckets != nil {
+		b.buckets[string(name)] = child
+	}
+
+	return child
+}
+
+// Helper method that re-interprets a sub-bucket value
+// from a parent into a Bucket
+func (b *Bucket) openBucket(value []byte) *Bucket {
+	var child = newBucket(b.tx)
+
+	// If unaligned load/stores are broken on this arch and value is
+	// unaligned simply clone to an aligned byte array.
+	unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
+
+	if unaligned {
+		value = cloneBytes(value)
+	}
+
+	// If this is a writable transaction then we need to copy the bucket entry.
+	// Read-only transactions can point directly at the mmap entry.
+	if b.tx.writable && !unaligned {
+		child.bucket = &bucket{}
+		*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
+	} else {
+		child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
+	}
+
+	// Save a reference to the inline page if the bucket is inline.
+	if child.root == 0 {
+		child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
+	}
+
+	return &child
+}
+
+// CreateBucket creates a new bucket at the given key and returns the new bucket.
+// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
+// The bucket instance is only valid for the lifetime of the transaction.
+func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
+	if b.tx.db == nil {
+		return nil, ErrTxClosed
+	} else if !b.tx.writable {
+		return nil, ErrTxNotWritable
+	} else if len(key) == 0 {
+		return nil, ErrBucketNameRequired
+	}
+
+	// Move cursor to correct position.
+	c := b.Cursor()
+	k, _, flags := c.seek(key)
+
+	// Return an error if there is an existing key.
+	if bytes.Equal(key, k) {
+		if (flags & bucketLeafFlag) != 0 {
+			return nil, ErrBucketExists
+		}
+		return nil, ErrIncompatibleValue
+	}
+
+	// Create empty, inline bucket.
+	var bucket = Bucket{
+		bucket:      &bucket{},
+		rootNode:    &node{isLeaf: true},
+		FillPercent: DefaultFillPercent,
+	}
+	var value = bucket.write()
+
+	// Insert into node.
+	key = cloneBytes(key)
+	c.node().put(key, key, value, 0, bucketLeafFlag)
+
+	// Since subbuckets are not allowed on inline buckets, we need to
+	// dereference the inline page, if it exists. This will cause the bucket
+	// to be treated as a regular, non-inline bucket for the rest of the tx.
+	b.page = nil
+
+	return b.Bucket(key), nil
+}
+
+// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
+// Returns an error if the bucket name is blank, or if the bucket name is too long.
+// The bucket instance is only valid for the lifetime of the transaction.
+func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
+	child, err := b.CreateBucket(key)
+	if err == ErrBucketExists {
+		return b.Bucket(key), nil
+	} else if err != nil {
+		return nil, err
+	}
+	return child, nil
+}
+
+// DeleteBucket deletes a bucket at the given key.
+// Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
+func (b *Bucket) DeleteBucket(key []byte) error {
+	if b.tx.db == nil {
+		return ErrTxClosed
+	} else if !b.Writable() {
+		return ErrTxNotWritable
+	}
+
+	// Move cursor to correct position.
+	c := b.Cursor()
+	k, _, flags := c.seek(key)
+
+	// Return an error if bucket doesn't exist or is not a bucket.
+	if !bytes.Equal(key, k) {
+		return ErrBucketNotFound
+	} else if (flags & bucketLeafFlag) == 0 {
+		return ErrIncompatibleValue
+	}
+
+	// Recursively delete all child buckets.
+	child := b.Bucket(key)
+	err := child.ForEach(func(k, v []byte) error {
+		if v == nil {
+			if err := child.DeleteBucket(k); err != nil {
+				return fmt.Errorf("delete bucket: %s", err)
+			}
+		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	// Remove cached copy.
+	delete(b.buckets, string(key))
+
+	// Release all bucket pages to freelist.
+	child.nodes = nil
+	child.rootNode = nil
+	child.free()
+
+	// Delete the node if we have a matching key.
+	c.node().del(key)
+
+	return nil
+}
+
+// Get retrieves the value for a key in the bucket.
+// Returns a nil value if the key does not exist or if the key is a nested bucket.
+// The returned value is only valid for the life of the transaction.
+func (b *Bucket) Get(key []byte) []byte {
+	k, v, flags := b.Cursor().seek(key)
+
+	// Return nil if this is a bucket.
+	if (flags & bucketLeafFlag) != 0 {
+		return nil
+	}
+
+	// If our target node isn't the same key as what's passed in then return nil.
+	if !bytes.Equal(key, k) {
+		return nil
+	}
+	return v
+}
+
+// Put sets the value for a key in the bucket.
+// If the key exist then its previous value will be overwritten.
+// Supplied value must remain valid for the life of the transaction.
+// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
+func (b *Bucket) Put(key []byte, value []byte) error {
+	if b.tx.db == nil {
+		return ErrTxClosed
+	} else if !b.Writable() {
+		return ErrTxNotWritable
+	} else if len(key) == 0 {
+		return ErrKeyRequired
+	} else if len(key) > MaxKeySize {
+		return ErrKeyTooLarge
+	} else if int64(len(value)) > MaxValueSize {
+		return ErrValueTooLarge
+	}
+
+	// Move cursor to correct position.
+	c := b.Cursor()
+	k, _, flags := c.seek(key)
+
+	// Return an error if there is an existing key with a bucket value.
+	if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
+		return ErrIncompatibleValue
+	}
+
+	// Insert into node.
+	key = cloneBytes(key)
+	c.node().put(key, key, value, 0, 0)
+
+	return nil
+}
+
+// Delete removes a key from the bucket.
+// If the key does not exist then nothing is done and a nil error is returned.
+// Returns an error if the bucket was created from a read-only transaction.
+func (b *Bucket) Delete(key []byte) error {
+	if b.tx.db == nil {
+		return ErrTxClosed
+	} else if !b.Writable() {
+		return ErrTxNotWritable
+	}
+
+	// Move cursor to correct position.
+	c := b.Cursor()
+	k, _, flags := c.seek(key)
+
+	// Return nil if the key doesn't exist.
+	if !bytes.Equal(key, k) {
+		return nil
+	}
+
+	// Return an error if there is already existing bucket value.
+	if (flags & bucketLeafFlag) != 0 {
+		return ErrIncompatibleValue
+	}
+
+	// Delete the node if we have a matching key.
+	c.node().del(key)
+
+	return nil
+}
+
+// Sequence returns the current integer for the bucket without incrementing it.
+func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
+
+// SetSequence updates the sequence number for the bucket.
+func (b *Bucket) SetSequence(v uint64) error {
+	if b.tx.db == nil {
+		return ErrTxClosed
+	} else if !b.Writable() {
+		return ErrTxNotWritable
+	}
+
+	// Materialize the root node if it hasn't been already so that the
+	// bucket will be saved during commit.
+	if b.rootNode == nil {
+		_ = b.node(b.root, nil)
+	}
+
+	// Increment and return the sequence.
+	b.bucket.sequence = v
+	return nil
+}
+
+// NextSequence returns an autoincrementing integer for the bucket.
+func (b *Bucket) NextSequence() (uint64, error) {
+	if b.tx.db == nil {
+		return 0, ErrTxClosed
+	} else if !b.Writable() {
+		return 0, ErrTxNotWritable
+	}
+
+	// Materialize the root node if it hasn't been already so that the
+	// bucket will be saved during commit.
+	if b.rootNode == nil {
+		_ = b.node(b.root, nil)
+	}
+
+	// Increment and return the sequence.
+	b.bucket.sequence++
+	return b.bucket.sequence, nil
+}
+
+// ForEach executes a function for each key/value pair in a bucket.
+// If the provided function returns an error then the iteration is stopped and
+// the error is returned to the caller. The provided function must not modify
+// the bucket; this will result in undefined behavior.
+func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
+	if b.tx.db == nil {
+		return ErrTxClosed
+	}
+	c := b.Cursor()
+	for k, v := c.First(); k != nil; k, v = c.Next() {
+		if err := fn(k, v); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Stat returns stats on a bucket.
+func (b *Bucket) Stats() BucketStats {
+	var s, subStats BucketStats
+	pageSize := b.tx.db.pageSize
+	s.BucketN += 1
+	if b.root == 0 {
+		s.InlineBucketN += 1
+	}
+	b.forEachPage(func(p *page, depth int) {
+		if (p.flags & leafPageFlag) != 0 {
+			s.KeyN += int(p.count)
+
+			// used totals the used bytes for the page
+			used := pageHeaderSize
+
+			if p.count != 0 {
+				// If page has any elements, add all element headers.
+				used += leafPageElementSize * int(p.count-1)
+
+				// Add all element key, value sizes.
+				// The computation takes advantage of the fact that the position
+				// of the last element's key/value equals to the total of the sizes
+				// of all previous elements' keys and values.
+				// It also includes the last element's header.
+				lastElement := p.leafPageElement(p.count - 1)
+				used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
+			}
+
+			if b.root == 0 {
+				// For inlined bucket just update the inline stats
+				s.InlineBucketInuse += used
+			} else {
+				// For non-inlined bucket update all the leaf stats
+				s.LeafPageN++
+				s.LeafInuse += used
+				s.LeafOverflowN += int(p.overflow)
+
+				// Collect stats from sub-buckets.
+				// Do that by iterating over all element headers
+				// looking for the ones with the bucketLeafFlag.
+				for i := uint16(0); i < p.count; i++ {
+					e := p.leafPageElement(i)
+					if (e.flags & bucketLeafFlag) != 0 {
+						// For any bucket element, open the element value
+						// and recursively call Stats on the contained bucket.
+						subStats.Add(b.openBucket(e.value()).Stats())
+					}
+				}
+			}
+		} else if (p.flags & branchPageFlag) != 0 {
+			s.BranchPageN++
+			lastElement := p.branchPageElement(p.count - 1)
+
+			// used totals the used bytes for the page
+			// Add header and all element headers.
+			used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
+
+			// Add size of all keys and values.
+			// Again, use the fact that last element's position equals to
+			// the total of key, value sizes of all previous elements.
+			used += int(lastElement.pos + lastElement.ksize)
+			s.BranchInuse += used
+			s.BranchOverflowN += int(p.overflow)
+		}
+
+		// Keep track of maximum page depth.
+		if depth+1 > s.Depth {
+			s.Depth = (depth + 1)
+		}
+	})
+
+	// Alloc stats can be computed from page counts and pageSize.
+	s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
+	s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
+
+	// Add the max depth of sub-buckets to get total nested depth.
+	s.Depth += subStats.Depth
+	// Add the stats for all sub-buckets
+	s.Add(subStats)
+	return s
+}
+
+// forEachPage iterates over every page in a bucket, including inline pages.
+func (b *Bucket) forEachPage(fn func(*page, int)) {
+	// If we have an inline page then just use that.
+	if b.page != nil {
+		fn(b.page, 0)
+		return
+	}
+
+	// Otherwise traverse the page hierarchy.
+	b.tx.forEachPage(b.root, 0, fn)
+}
+
+// forEachPageNode iterates over every page (or node) in a bucket.
+// This also includes inline pages.
+func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
+	// If we have an inline page or root node then just use that.
+	if b.page != nil {
+		fn(b.page, nil, 0)
+		return
+	}
+	b._forEachPageNode(b.root, 0, fn)
+}
+
+func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
+	var p, n = b.pageNode(pgid)
+
+	// Execute function.
+	fn(p, n, depth)
+
+	// Recursively loop over children.
+	if p != nil {
+		if (p.flags & branchPageFlag) != 0 {
+			for i := 0; i < int(p.count); i++ {
+				elem := p.branchPageElement(uint16(i))
+				b._forEachPageNode(elem.pgid, depth+1, fn)
+			}
+		}
+	} else {
+		if !n.isLeaf {
+			for _, inode := range n.inodes {
+				b._forEachPageNode(inode.pgid, depth+1, fn)
+			}
+		}
+	}
+}
+
+// spill writes all the nodes for this bucket to dirty pages.
+func (b *Bucket) spill() error {
+	// Spill all child buckets first.
+	for name, child := range b.buckets {
+		// If the child bucket is small enough and it has no child buckets then
+		// write it inline into the parent bucket's page. Otherwise spill it
+		// like a normal bucket and make the parent value a pointer to the page.
+		var value []byte
+		if child.inlineable() {
+			child.free()
+			value = child.write()
+		} else {
+			if err := child.spill(); err != nil {
+				return err
+			}
+
+			// Update the child bucket header in this bucket.
+			value = make([]byte, unsafe.Sizeof(bucket{}))
+			var bucket = (*bucket)(unsafe.Pointer(&value[0]))
+			*bucket = *child.bucket
+		}
+
+		// Skip writing the bucket if there are no materialized nodes.
+		if child.rootNode == nil {
+			continue
+		}
+
+		// Update parent node.
+		var c = b.Cursor()
+		k, _, flags := c.seek([]byte(name))
+		if !bytes.Equal([]byte(name), k) {
+			panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
+		}
+		if flags&bucketLeafFlag == 0 {
+			panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
+		}
+		c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
+	}
+
+	// Ignore if there's not a materialized root node.
+	if b.rootNode == nil {
+		return nil
+	}
+
+	// Spill nodes.
+	if err := b.rootNode.spill(); err != nil {
+		return err
+	}
+	b.rootNode = b.rootNode.root()
+
+	// Update the root node for this bucket.
+	if b.rootNode.pgid >= b.tx.meta.pgid {
+		panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
+	}
+	b.root = b.rootNode.pgid
+
+	return nil
+}
+
+// inlineable returns true if a bucket is small enough to be written inline
+// and if it contains no subbuckets. Otherwise returns false.
+func (b *Bucket) inlineable() bool {
+	var n = b.rootNode
+
+	// Bucket must only contain a single leaf node.
+	if n == nil || !n.isLeaf {
+		return false
+	}
+
+	// Bucket is not inlineable if it contains subbuckets or if it goes beyond
+	// our threshold for inline bucket size.
+	var size = pageHeaderSize
+	for _, inode := range n.inodes {
+		size += leafPageElementSize + len(inode.key) + len(inode.value)
+
+		if inode.flags&bucketLeafFlag != 0 {
+			return false
+		} else if size > b.maxInlineBucketSize() {
+			return false
+		}
+	}
+
+	return true
+}
+
+// Returns the maximum total size of a bucket to make it a candidate for inlining.
+func (b *Bucket) maxInlineBucketSize() int {
+	return b.tx.db.pageSize / 4
+}
+
+// write allocates and writes a bucket to a byte slice.
+func (b *Bucket) write() []byte {
+	// Allocate the appropriate size.
+	var n = b.rootNode
+	var value = make([]byte, bucketHeaderSize+n.size())
+
+	// Write a bucket header.
+	var bucket = (*bucket)(unsafe.Pointer(&value[0]))
+	*bucket = *b.bucket
+
+	// Convert byte slice to a fake page and write the root node.
+	var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
+	n.write(p)
+
+	return value
+}
+
+// rebalance attempts to balance all nodes.
+func (b *Bucket) rebalance() {
+	for _, n := range b.nodes {
+		n.rebalance()
+	}
+	for _, child := range b.buckets {
+		child.rebalance()
+	}
+}
+
+// node creates a node from a page and associates it with a given parent.
+func (b *Bucket) node(pgid pgid, parent *node) *node {
+	_assert(b.nodes != nil, "nodes map expected")
+
+	// Retrieve node if it's already been created.
+	if n := b.nodes[pgid]; n != nil {
+		return n
+	}
+
+	// Otherwise create a node and cache it.
+	n := &node{bucket: b, parent: parent}
+	if parent == nil {
+		b.rootNode = n
+	} else {
+		parent.children = append(parent.children, n)
+	}
+
+	// Use the inline page if this is an inline bucket.
+	var p = b.page
+	if p == nil {
+		p = b.tx.page(pgid)
+	}
+
+	// Read the page into the node and cache it.
+	n.read(p)
+	b.nodes[pgid] = n
+
+	// Update statistics.
+	b.tx.stats.NodeCount++
+
+	return n
+}
+
+// free recursively frees all pages in the bucket.
+func (b *Bucket) free() {
+	if b.root == 0 {
+		return
+	}
+
+	var tx = b.tx
+	b.forEachPageNode(func(p *page, n *node, _ int) {
+		if p != nil {
+			tx.db.freelist.free(tx.meta.txid, p)
+		} else {
+			n.free()
+		}
+	})
+	b.root = 0
+}
+
+// dereference removes all references to the old mmap.
+func (b *Bucket) dereference() {
+	if b.rootNode != nil {
+		b.rootNode.root().dereference()
+	}
+
+	for _, child := range b.buckets {
+		child.dereference()
+	}
+}
+
+// pageNode returns the in-memory node, if it exists.
+// Otherwise returns the underlying page.
+func (b *Bucket) pageNode(id pgid) (*page, *node) {
+	// Inline buckets have a fake page embedded in their value so treat them
+	// differently. We'll return the rootNode (if available) or the fake page.
+	if b.root == 0 {
+		if id != 0 {
+			panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
+		}
+		if b.rootNode != nil {
+			return nil, b.rootNode
+		}
+		return b.page, nil
+	}
+
+	// Check the node cache for non-inline buckets.
+	if b.nodes != nil {
+		if n := b.nodes[id]; n != nil {
+			return nil, n
+		}
+	}
+
+	// Finally lookup the page from the transaction if no node is materialized.
+	return b.tx.page(id), nil
+}
+
+// BucketStats records statistics about resources used by a bucket.
+type BucketStats struct {
+	// Page count statistics.
+	BranchPageN     int // number of logical branch pages
+	BranchOverflowN int // number of physical branch overflow pages
+	LeafPageN       int // number of logical leaf pages
+	LeafOverflowN   int // number of physical leaf overflow pages
+
+	// Tree statistics.
+	KeyN  int // number of keys/value pairs
+	Depth int // number of levels in B+tree
+
+	// Page size utilization.
+	BranchAlloc int // bytes allocated for physical branch pages
+	BranchInuse int // bytes actually used for branch data
+	LeafAlloc   int // bytes allocated for physical leaf pages
+	LeafInuse   int // bytes actually used for leaf data
+
+	// Bucket statistics
+	BucketN           int // total number of buckets including the top bucket
+	InlineBucketN     int // total number on inlined buckets
+	InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
+}
+
+func (s *BucketStats) Add(other BucketStats) {
+	s.BranchPageN += other.BranchPageN
+	s.BranchOverflowN += other.BranchOverflowN
+	s.LeafPageN += other.LeafPageN
+	s.LeafOverflowN += other.LeafOverflowN
+	s.KeyN += other.KeyN
+	if s.Depth < other.Depth {
+		s.Depth = other.Depth
+	}
+	s.BranchAlloc += other.BranchAlloc
+	s.BranchInuse += other.BranchInuse
+	s.LeafAlloc += other.LeafAlloc
+	s.LeafInuse += other.LeafInuse
+
+	s.BucketN += other.BucketN
+	s.InlineBucketN += other.InlineBucketN
+	s.InlineBucketInuse += other.InlineBucketInuse
+}
+
+// cloneBytes returns a copy of a given slice.
+func cloneBytes(v []byte) []byte {
+	var clone = make([]byte, len(v))
+	copy(clone, v)
+	return clone
+}
diff --git a/vendor/github.com/etcd-io/bbolt/cursor.go b/vendor/github.com/etcd-io/bbolt/cursor.go
new file mode 100644
index 0000000000..3000aced6c
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/cursor.go
@@ -0,0 +1,396 @@
+package bbolt
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+)
+
+// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
+// Cursors see nested buckets with value == nil.
+// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
+//
+// Keys and values returned from the cursor are only valid for the life of the transaction.
+//
+// Changing data while traversing with a cursor may cause it to be invalidated
+// and return unexpected keys and/or values. You must reposition your cursor
+// after mutating data.
+type Cursor struct {
+	bucket *Bucket
+	stack  []elemRef
+}
+
+// Bucket returns the bucket that this cursor was created from.
+func (c *Cursor) Bucket() *Bucket {
+	return c.bucket
+}
+
+// First moves the cursor to the first item in the bucket and returns its key and value.
+// If the bucket is empty then a nil key and value are returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) First() (key []byte, value []byte) {
+	_assert(c.bucket.tx.db != nil, "tx closed")
+	c.stack = c.stack[:0]
+	p, n := c.bucket.pageNode(c.bucket.root)
+	c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
+	c.first()
+
+	// If we land on an empty page then move to the next value.
+	// https://github.com/boltdb/bolt/issues/450
+	if c.stack[len(c.stack)-1].count() == 0 {
+		c.next()
+	}
+
+	k, v, flags := c.keyValue()
+	if (flags & uint32(bucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+
+}
+
+// Last moves the cursor to the last item in the bucket and returns its key and value.
+// If the bucket is empty then a nil key and value are returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) Last() (key []byte, value []byte) {
+	_assert(c.bucket.tx.db != nil, "tx closed")
+	c.stack = c.stack[:0]
+	p, n := c.bucket.pageNode(c.bucket.root)
+	ref := elemRef{page: p, node: n}
+	ref.index = ref.count() - 1
+	c.stack = append(c.stack, ref)
+	c.last()
+	k, v, flags := c.keyValue()
+	if (flags & uint32(bucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+// Next moves the cursor to the next item in the bucket and returns its key and value.
+// If the cursor is at the end of the bucket then a nil key and value are returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) Next() (key []byte, value []byte) {
+	_assert(c.bucket.tx.db != nil, "tx closed")
+	k, v, flags := c.next()
+	if (flags & uint32(bucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+// Prev moves the cursor to the previous item in the bucket and returns its key and value.
+// If the cursor is at the beginning of the bucket then a nil key and value are returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) Prev() (key []byte, value []byte) {
+	_assert(c.bucket.tx.db != nil, "tx closed")
+
+	// Attempt to move back one element until we're successful.
+	// Move up the stack as we hit the beginning of each page in our stack.
+	for i := len(c.stack) - 1; i >= 0; i-- {
+		elem := &c.stack[i]
+		if elem.index > 0 {
+			elem.index--
+			break
+		}
+		c.stack = c.stack[:i]
+	}
+
+	// If we've hit the end then return nil.
+	if len(c.stack) == 0 {
+		return nil, nil
+	}
+
+	// Move down the stack to find the last element of the last leaf under this branch.
+	c.last()
+	k, v, flags := c.keyValue()
+	if (flags & uint32(bucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+// Seek moves the cursor to a given key and returns it.
+// If the key does not exist then the next key is used. If no keys
+// follow, a nil key is returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
+	k, v, flags := c.seek(seek)
+
+	// If we ended up after the last element of a page then move to the next one.
+	if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
+		k, v, flags = c.next()
+	}
+
+	if k == nil {
+		return nil, nil
+	} else if (flags & uint32(bucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+// Delete removes the current key/value under the cursor from the bucket.
+// Delete fails if current key/value is a bucket or if the transaction is not writable.
+func (c *Cursor) Delete() error {
+	if c.bucket.tx.db == nil {
+		return ErrTxClosed
+	} else if !c.bucket.Writable() {
+		return ErrTxNotWritable
+	}
+
+	key, _, flags := c.keyValue()
+	// Return an error if current value is a bucket.
+	if (flags & bucketLeafFlag) != 0 {
+		return ErrIncompatibleValue
+	}
+	c.node().del(key)
+
+	return nil
+}
+
+// seek moves the cursor to a given key and returns it.
+// If the key does not exist then the next key is used.
+func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
+	_assert(c.bucket.tx.db != nil, "tx closed")
+
+	// Start from root page/node and traverse to correct page.
+	c.stack = c.stack[:0]
+	c.search(seek, c.bucket.root)
+
+	// If this is a bucket then return a nil value.
+	return c.keyValue()
+}
+
+// first moves the cursor to the first leaf element under the last page in the stack.
+func (c *Cursor) first() {
+	for {
+		// Exit when we hit a leaf page.
+		var ref = &c.stack[len(c.stack)-1]
+		if ref.isLeaf() {
+			break
+		}
+
+		// Keep adding pages pointing to the first element to the stack.
+		var pgid pgid
+		if ref.node != nil {
+			pgid = ref.node.inodes[ref.index].pgid
+		} else {
+			pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
+		}
+		p, n := c.bucket.pageNode(pgid)
+		c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
+	}
+}
+
+// last moves the cursor to the last leaf element under the last page in the stack.
+func (c *Cursor) last() {
+	for {
+		// Exit when we hit a leaf page.
+		ref := &c.stack[len(c.stack)-1]
+		if ref.isLeaf() {
+			break
+		}
+
+		// Keep adding pages pointing to the last element in the stack.
+		var pgid pgid
+		if ref.node != nil {
+			pgid = ref.node.inodes[ref.index].pgid
+		} else {
+			pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
+		}
+		p, n := c.bucket.pageNode(pgid)
+
+		var nextRef = elemRef{page: p, node: n}
+		nextRef.index = nextRef.count() - 1
+		c.stack = append(c.stack, nextRef)
+	}
+}
+
+// next moves to the next leaf element and returns the key and value.
+// If the cursor is at the last leaf element then it stays there and returns nil.
+func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
+	for {
+		// Attempt to move over one element until we're successful.
+		// Move up the stack as we hit the end of each page in our stack.
+		var i int
+		for i = len(c.stack) - 1; i >= 0; i-- {
+			elem := &c.stack[i]
+			if elem.index < elem.count()-1 {
+				elem.index++
+				break
+			}
+		}
+
+		// If we've hit the root page then stop and return. This will leave the
+		// cursor on the last element of the last page.
+		if i == -1 {
+			return nil, nil, 0
+		}
+
+		// Otherwise start from where we left off in the stack and find the
+		// first element of the first leaf page.
+		c.stack = c.stack[:i+1]
+		c.first()
+
+		// If this is an empty page then restart and move back up the stack.
+		// https://github.com/boltdb/bolt/issues/450
+		if c.stack[len(c.stack)-1].count() == 0 {
+			continue
+		}
+
+		return c.keyValue()
+	}
+}
+
+// search recursively performs a binary search against a given page/node until it finds a given key.
+func (c *Cursor) search(key []byte, pgid pgid) {
+	p, n := c.bucket.pageNode(pgid)
+	if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
+		panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
+	}
+	e := elemRef{page: p, node: n}
+	c.stack = append(c.stack, e)
+
+	// If we're on a leaf page/node then find the specific node.
+	if e.isLeaf() {
+		c.nsearch(key)
+		return
+	}
+
+	if n != nil {
+		c.searchNode(key, n)
+		return
+	}
+	c.searchPage(key, p)
+}
+
+func (c *Cursor) searchNode(key []byte, n *node) {
+	var exact bool
+	index := sort.Search(len(n.inodes), func(i int) bool {
+		// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
+		// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
+		ret := bytes.Compare(n.inodes[i].key, key)
+		if ret == 0 {
+			exact = true
+		}
+		return ret != -1
+	})
+	if !exact && index > 0 {
+		index--
+	}
+	c.stack[len(c.stack)-1].index = index
+
+	// Recursively search to the next page.
+	c.search(key, n.inodes[index].pgid)
+}
+
+func (c *Cursor) searchPage(key []byte, p *page) {
+	// Binary search for the correct range.
+	inodes := p.branchPageElements()
+
+	var exact bool
+	index := sort.Search(int(p.count), func(i int) bool {
+		// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
+		// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
+		ret := bytes.Compare(inodes[i].key(), key)
+		if ret == 0 {
+			exact = true
+		}
+		return ret != -1
+	})
+	if !exact && index > 0 {
+		index--
+	}
+	c.stack[len(c.stack)-1].index = index
+
+	// Recursively search to the next page.
+	c.search(key, inodes[index].pgid)
+}
+
+// nsearch searches the leaf node on the top of the stack for a key.
+func (c *Cursor) nsearch(key []byte) {
+	e := &c.stack[len(c.stack)-1]
+	p, n := e.page, e.node
+
+	// If we have a node then search its inodes.
+	if n != nil {
+		index := sort.Search(len(n.inodes), func(i int) bool {
+			return bytes.Compare(n.inodes[i].key, key) != -1
+		})
+		e.index = index
+		return
+	}
+
+	// If we have a page then search its leaf elements.
+	inodes := p.leafPageElements()
+	index := sort.Search(int(p.count), func(i int) bool {
+		return bytes.Compare(inodes[i].key(), key) != -1
+	})
+	e.index = index
+}
+
+// keyValue returns the key and value of the current leaf element.
+func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
+	ref := &c.stack[len(c.stack)-1]
+
+	// If the cursor is pointing to the end of page/node then return nil.
+	if ref.count() == 0 || ref.index >= ref.count() {
+		return nil, nil, 0
+	}
+
+	// Retrieve value from node.
+	if ref.node != nil {
+		inode := &ref.node.inodes[ref.index]
+		return inode.key, inode.value, inode.flags
+	}
+
+	// Or retrieve value from page.
+	elem := ref.page.leafPageElement(uint16(ref.index))
+	return elem.key(), elem.value(), elem.flags
+}
+
+// node returns the node that the cursor is currently positioned on.
+func (c *Cursor) node() *node {
+	_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
+
+	// If the top of the stack is a leaf node then just return it.
+	if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
+		return ref.node
+	}
+
+	// Start from root and traverse down the hierarchy.
+	var n = c.stack[0].node
+	if n == nil {
+		n = c.bucket.node(c.stack[0].page.id, nil)
+	}
+	for _, ref := range c.stack[:len(c.stack)-1] {
+		_assert(!n.isLeaf, "expected branch node")
+		n = n.childAt(int(ref.index))
+	}
+	_assert(n.isLeaf, "expected leaf node")
+	return n
+}
+
+// elemRef represents a reference to an element on a given page/node.
+type elemRef struct {
+	page  *page
+	node  *node
+	index int
+}
+
+// isLeaf returns whether the ref is pointing at a leaf page/node.
+func (r *elemRef) isLeaf() bool {
+	if r.node != nil {
+		return r.node.isLeaf
+	}
+	return (r.page.flags & leafPageFlag) != 0
+}
+
+// count returns the number of inodes or page elements.
+func (r *elemRef) count() int {
+	if r.node != nil {
+		return len(r.node.inodes)
+	}
+	return int(r.page.count)
+}
diff --git a/vendor/github.com/etcd-io/bbolt/db.go b/vendor/github.com/etcd-io/bbolt/db.go
new file mode 100644
index 0000000000..962248c99f
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/db.go
@@ -0,0 +1,1164 @@
+package bbolt
+
+import (
+	"errors"
+	"fmt"
+	"hash/fnv"
+	"log"
+	"os"
+	"runtime"
+	"sort"
+	"sync"
+	"time"
+	"unsafe"
+)
+
+// The largest step that can be taken when remapping the mmap.
+const maxMmapStep = 1 << 30 // 1GB
+
+// The data file format version.
+const version = 2
+
+// Represents a marker value to indicate that a file is a Bolt DB.
+const magic uint32 = 0xED0CDAED
+
+const pgidNoFreelist pgid = 0xffffffffffffffff
+
+// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
+// syncing changes to a file.  This is required as some operating systems,
+// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
+// must be synchronized using the msync(2) syscall.
+const IgnoreNoSync = runtime.GOOS == "openbsd"
+
+// Default values if not set in a DB instance.
+const (
+	DefaultMaxBatchSize  int = 1000
+	DefaultMaxBatchDelay     = 10 * time.Millisecond
+	DefaultAllocSize         = 16 * 1024 * 1024
+)
+
+// default page size for db is set to the OS page size.
+var defaultPageSize = os.Getpagesize()
+
+// The time elapsed between consecutive file locking attempts.
+const flockRetryTimeout = 50 * time.Millisecond
+
+// FreelistType is the type of the freelist backend
+type FreelistType string
+
+const (
+	// FreelistArrayType indicates backend freelist type is array
+	FreelistArrayType = FreelistType("array")
+	// FreelistMapType indicates backend freelist type is hashmap
+	FreelistMapType = FreelistType("hashmap")
+)
+
+// DB represents a collection of buckets persisted to a file on disk.
+// All data access is performed through transactions which can be obtained through the DB.
+// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
+type DB struct {
+	// When enabled, the database will perform a Check() after every commit.
+	// A panic is issued if the database is in an inconsistent state. This
+	// flag has a large performance impact so it should only be used for
+	// debugging purposes.
+	StrictMode bool
+
+	// Setting the NoSync flag will cause the database to skip fsync()
+	// calls after each commit. This can be useful when bulk loading data
+	// into a database and you can restart the bulk load in the event of
+	// a system failure or database corruption. Do not set this flag for
+	// normal use.
+	//
+	// If the package global IgnoreNoSync constant is true, this value is
+	// ignored.  See the comment on that constant for more details.
+	//
+	// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
+	NoSync bool
+
+	// When true, skips syncing freelist to disk. This improves the database
+	// write performance under normal operation, but requires a full database
+	// re-sync during recovery.
+	NoFreelistSync bool
+
+	// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
+	// dramatic performance degradation if database is large and framentation in freelist is common.
+	// The alternative one is using hashmap, it is faster in almost all circumstances
+	// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
+	// The default type is array
+	FreelistType FreelistType
+
+	// When true, skips the truncate call when growing the database.
+	// Setting this to true is only safe on non-ext3/ext4 systems.
+	// Skipping truncation avoids preallocation of hard drive space and
+	// bypasses a truncate() and fsync() syscall on remapping.
+	//
+	// https://github.com/boltdb/bolt/issues/284
+	NoGrowSync bool
+
+	// If you want to read the entire database fast, you can set MmapFlag to
+	// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
+	MmapFlags int
+
+	// MaxBatchSize is the maximum size of a batch. Default value is
+	// copied from DefaultMaxBatchSize in Open.
+	//
+	// If <=0, disables batching.
+	//
+	// Do not change concurrently with calls to Batch.
+	MaxBatchSize int
+
+	// MaxBatchDelay is the maximum delay before a batch starts.
+	// Default value is copied from DefaultMaxBatchDelay in Open.
+	//
+	// If <=0, effectively disables batching.
+	//
+	// Do not change concurrently with calls to Batch.
+	MaxBatchDelay time.Duration
+
+	// AllocSize is the amount of space allocated when the database
+	// needs to create new pages. This is done to amortize the cost
+	// of truncate() and fsync() when growing the data file.
+	AllocSize int
+
+	path     string
+	file     *os.File
+	dataref  []byte // mmap'ed readonly, write throws SEGV
+	data     *[maxMapSize]byte
+	datasz   int
+	filesz   int // current on disk file size
+	meta0    *meta
+	meta1    *meta
+	pageSize int
+	opened   bool
+	rwtx     *Tx
+	txs      []*Tx
+	stats    Stats
+
+	freelist     *freelist
+	freelistLoad sync.Once
+
+	pagePool sync.Pool
+
+	batchMu sync.Mutex
+	batch   *batch
+
+	rwlock   sync.Mutex   // Allows only one writer at a time.
+	metalock sync.Mutex   // Protects meta page access.
+	mmaplock sync.RWMutex // Protects mmap access during remapping.
+	statlock sync.RWMutex // Protects stats access.
+
+	ops struct {
+		writeAt func(b []byte, off int64) (n int, err error)
+	}
+
+	// Read only mode.
+	// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
+	readOnly bool
+}
+
+// Path returns the path to currently open database file.
+func (db *DB) Path() string {
+	return db.path
+}
+
+// GoString returns the Go string representation of the database.
+func (db *DB) GoString() string {
+	return fmt.Sprintf("bolt.DB{path:%q}", db.path)
+}
+
+// String returns the string representation of the database.
+func (db *DB) String() string {
+	return fmt.Sprintf("DB<%q>", db.path)
+}
+
+// Open creates and opens a database at the given path.
+// If the file does not exist then it will be created automatically.
+// Passing in nil options will cause Bolt to open the database with the default options.
+func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
+	db := &DB{
+		opened: true,
+	}
+	// Set default options if no options are provided.
+	if options == nil {
+		options = DefaultOptions
+	}
+	db.NoSync = options.NoSync
+	db.NoGrowSync = options.NoGrowSync
+	db.MmapFlags = options.MmapFlags
+	db.NoFreelistSync = options.NoFreelistSync
+	db.FreelistType = options.FreelistType
+
+	// Set default values for later DB operations.
+	db.MaxBatchSize = DefaultMaxBatchSize
+	db.MaxBatchDelay = DefaultMaxBatchDelay
+	db.AllocSize = DefaultAllocSize
+
+	flag := os.O_RDWR
+	if options.ReadOnly {
+		flag = os.O_RDONLY
+		db.readOnly = true
+	}
+
+	// Open data file and separate sync handler for metadata writes.
+	db.path = path
+	var err error
+	if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
+		_ = db.close()
+		return nil, err
+	}
+
+	// Lock file so that other processes using Bolt in read-write mode cannot
+	// use the database  at the same time. This would cause corruption since
+	// the two processes would write meta pages and free pages separately.
+	// The database file is locked exclusively (only one process can grab the lock)
+	// if !options.ReadOnly.
+	// The database file is locked using the shared lock (more than one process may
+	// hold a lock at the same time) otherwise (options.ReadOnly is set).
+	if err := flock(db, !db.readOnly, options.Timeout); err != nil {
+		_ = db.close()
+		return nil, err
+	}
+
+	// Default values for test hooks
+	db.ops.writeAt = db.file.WriteAt
+
+	if db.pageSize = options.PageSize; db.pageSize == 0 {
+		// Set the default page size to the OS page size.
+		db.pageSize = defaultPageSize
+	}
+
+	// Initialize the database if it doesn't exist.
+	if info, err := db.file.Stat(); err != nil {
+		_ = db.close()
+		return nil, err
+	} else if info.Size() == 0 {
+		// Initialize new files with meta pages.
+		if err := db.init(); err != nil {
+			// clean up file descriptor on initialization fail
+			_ = db.close()
+			return nil, err
+		}
+	} else {
+		// Read the first meta page to determine the page size.
+		var buf [0x1000]byte
+		// If we can't read the page size, but can read a page, assume
+		// it's the same as the OS or one given -- since that's how the
+		// page size was chosen in the first place.
+		//
+		// If the first page is invalid and this OS uses a different
+		// page size than what the database was created with then we
+		// are out of luck and cannot access the database.
+		//
+		// TODO: scan for next page
+		if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
+			if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
+				db.pageSize = int(m.pageSize)
+			}
+		} else {
+			_ = db.close()
+			return nil, ErrInvalid
+		}
+	}
+
+	// Initialize page pool.
+	db.pagePool = sync.Pool{
+		New: func() interface{} {
+			return make([]byte, db.pageSize)
+		},
+	}
+
+	// Memory map the data file.
+	if err := db.mmap(options.InitialMmapSize); err != nil {
+		_ = db.close()
+		return nil, err
+	}
+
+	if db.readOnly {
+		return db, nil
+	}
+
+	db.loadFreelist()
+
+	// Flush freelist when transitioning from no sync to sync so
+	// NoFreelistSync unaware boltdb can open the db later.
+	if !db.NoFreelistSync && !db.hasSyncedFreelist() {
+		tx, err := db.Begin(true)
+		if tx != nil {
+			err = tx.Commit()
+		}
+		if err != nil {
+			_ = db.close()
+			return nil, err
+		}
+	}
+
+	// Mark the database as opened and return.
+	return db, nil
+}
+
+// loadFreelist reads the freelist if it is synced, or reconstructs it
+// by scanning the DB if it is not synced. It assumes there are no
+// concurrent accesses being made to the freelist.
+func (db *DB) loadFreelist() {
+	db.freelistLoad.Do(func() {
+		db.freelist = newFreelist(db.FreelistType)
+		if !db.hasSyncedFreelist() {
+			// Reconstruct free list by scanning the DB.
+			db.freelist.readIDs(db.freepages())
+		} else {
+			// Read free list from freelist page.
+			db.freelist.read(db.page(db.meta().freelist))
+		}
+		db.stats.FreePageN = db.freelist.free_count()
+	})
+}
+
+func (db *DB) hasSyncedFreelist() bool {
+	return db.meta().freelist != pgidNoFreelist
+}
+
+// mmap opens the underlying memory-mapped file and initializes the meta references.
+// minsz is the minimum size that the new mmap can be.
+func (db *DB) mmap(minsz int) error {
+	db.mmaplock.Lock()
+	defer db.mmaplock.Unlock()
+
+	info, err := db.file.Stat()
+	if err != nil {
+		return fmt.Errorf("mmap stat error: %s", err)
+	} else if int(info.Size()) < db.pageSize*2 {
+		return fmt.Errorf("file size too small")
+	}
+
+	// Ensure the size is at least the minimum size.
+	var size = int(info.Size())
+	if size < minsz {
+		size = minsz
+	}
+	size, err = db.mmapSize(size)
+	if err != nil {
+		return err
+	}
+
+	// Dereference all mmap references before unmapping.
+	if db.rwtx != nil {
+		db.rwtx.root.dereference()
+	}
+
+	// Unmap existing data before continuing.
+	if err := db.munmap(); err != nil {
+		return err
+	}
+
+	// Memory-map the data file as a byte slice.
+	if err := mmap(db, size); err != nil {
+		return err
+	}
+
+	// Save references to the meta pages.
+	db.meta0 = db.page(0).meta()
+	db.meta1 = db.page(1).meta()
+
+	// Validate the meta pages. We only return an error if both meta pages fail
+	// validation, since meta0 failing validation means that it wasn't saved
+	// properly -- but we can recover using meta1. And vice-versa.
+	err0 := db.meta0.validate()
+	err1 := db.meta1.validate()
+	if err0 != nil && err1 != nil {
+		return err0
+	}
+
+	return nil
+}
+
+// munmap unmaps the data file from memory.
+func (db *DB) munmap() error {
+	if err := munmap(db); err != nil {
+		return fmt.Errorf("unmap error: " + err.Error())
+	}
+	return nil
+}
+
+// mmapSize determines the appropriate size for the mmap given the current size
+// of the database. The minimum size is 32KB and doubles until it reaches 1GB.
+// Returns an error if the new mmap size is greater than the max allowed.
+func (db *DB) mmapSize(size int) (int, error) {
+	// Double the size from 32KB until 1GB.
+	for i := uint(15); i <= 30; i++ {
+		if size <= 1<<i {
+			return 1 << i, nil
+		}
+	}
+
+	// Verify the requested size is not above the maximum allowed.
+	if size > maxMapSize {
+		return 0, fmt.Errorf("mmap too large")
+	}
+
+	// If larger than 1GB then grow by 1GB at a time.
+	sz := int64(size)
+	if remainder := sz % int64(maxMmapStep); remainder > 0 {
+		sz += int64(maxMmapStep) - remainder
+	}
+
+	// Ensure that the mmap size is a multiple of the page size.
+	// This should always be true since we're incrementing in MBs.
+	pageSize := int64(db.pageSize)
+	if (sz % pageSize) != 0 {
+		sz = ((sz / pageSize) + 1) * pageSize
+	}
+
+	// If we've exceeded the max size then only grow up to the max size.
+	if sz > maxMapSize {
+		sz = maxMapSize
+	}
+
+	return int(sz), nil
+}
+
+// init creates a new database file and initializes its meta pages.
+func (db *DB) init() error {
+	// Create two meta pages on a buffer.
+	buf := make([]byte, db.pageSize*4)
+	for i := 0; i < 2; i++ {
+		p := db.pageInBuffer(buf[:], pgid(i))
+		p.id = pgid(i)
+		p.flags = metaPageFlag
+
+		// Initialize the meta page.
+		m := p.meta()
+		m.magic = magic
+		m.version = version
+		m.pageSize = uint32(db.pageSize)
+		m.freelist = 2
+		m.root = bucket{root: 3}
+		m.pgid = 4
+		m.txid = txid(i)
+		m.checksum = m.sum64()
+	}
+
+	// Write an empty freelist at page 3.
+	p := db.pageInBuffer(buf[:], pgid(2))
+	p.id = pgid(2)
+	p.flags = freelistPageFlag
+	p.count = 0
+
+	// Write an empty leaf page at page 4.
+	p = db.pageInBuffer(buf[:], pgid(3))
+	p.id = pgid(3)
+	p.flags = leafPageFlag
+	p.count = 0
+
+	// Write the buffer to our data file.
+	if _, err := db.ops.writeAt(buf, 0); err != nil {
+		return err
+	}
+	if err := fdatasync(db); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Close releases all database resources.
+// It will block waiting for any open transactions to finish
+// before closing the database and returning.
+func (db *DB) Close() error {
+	db.rwlock.Lock()
+	defer db.rwlock.Unlock()
+
+	db.metalock.Lock()
+	defer db.metalock.Unlock()
+
+	db.mmaplock.Lock()
+	defer db.mmaplock.Unlock()
+
+	return db.close()
+}
+
+func (db *DB) close() error {
+	if !db.opened {
+		return nil
+	}
+
+	db.opened = false
+
+	db.freelist = nil
+
+	// Clear ops.
+	db.ops.writeAt = nil
+
+	// Close the mmap.
+	if err := db.munmap(); err != nil {
+		return err
+	}
+
+	// Close file handles.
+	if db.file != nil {
+		// No need to unlock read-only file.
+		if !db.readOnly {
+			// Unlock the file.
+			if err := funlock(db); err != nil {
+				log.Printf("bolt.Close(): funlock error: %s", err)
+			}
+		}
+
+		// Close the file descriptor.
+		if err := db.file.Close(); err != nil {
+			return fmt.Errorf("db file close: %s", err)
+		}
+		db.file = nil
+	}
+
+	db.path = ""
+	return nil
+}
+
+// Begin starts a new transaction.
+// Multiple read-only transactions can be used concurrently but only one
+// write transaction can be used at a time. Starting multiple write transactions
+// will cause the calls to block and be serialized until the current write
+// transaction finishes.
+//
+// Transactions should not be dependent on one another. Opening a read
+// transaction and a write transaction in the same goroutine can cause the
+// writer to deadlock because the database periodically needs to re-mmap itself
+// as it grows and it cannot do that while a read transaction is open.
+//
+// If a long running read transaction (for example, a snapshot transaction) is
+// needed, you might want to set DB.InitialMmapSize to a large enough value
+// to avoid potential blocking of write transaction.
+//
+// IMPORTANT: You must close read-only transactions after you are finished or
+// else the database will not reclaim old pages.
+func (db *DB) Begin(writable bool) (*Tx, error) {
+	if writable {
+		return db.beginRWTx()
+	}
+	return db.beginTx()
+}
+
+func (db *DB) beginTx() (*Tx, error) {
+	// Lock the meta pages while we initialize the transaction. We obtain
+	// the meta lock before the mmap lock because that's the order that the
+	// write transaction will obtain them.
+	db.metalock.Lock()
+
+	// Obtain a read-only lock on the mmap. When the mmap is remapped it will
+	// obtain a write lock so all transactions must finish before it can be
+	// remapped.
+	db.mmaplock.RLock()
+
+	// Exit if the database is not open yet.
+	if !db.opened {
+		db.mmaplock.RUnlock()
+		db.metalock.Unlock()
+		return nil, ErrDatabaseNotOpen
+	}
+
+	// Create a transaction associated with the database.
+	t := &Tx{}
+	t.init(db)
+
+	// Keep track of transaction until it closes.
+	db.txs = append(db.txs, t)
+	n := len(db.txs)
+
+	// Unlock the meta pages.
+	db.metalock.Unlock()
+
+	// Update the transaction stats.
+	db.statlock.Lock()
+	db.stats.TxN++
+	db.stats.OpenTxN = n
+	db.statlock.Unlock()
+
+	return t, nil
+}
+
+func (db *DB) beginRWTx() (*Tx, error) {
+	// If the database was opened with Options.ReadOnly, return an error.
+	if db.readOnly {
+		return nil, ErrDatabaseReadOnly
+	}
+
+	// Obtain writer lock. This is released by the transaction when it closes.
+	// This enforces only one writer transaction at a time.
+	db.rwlock.Lock()
+
+	// Once we have the writer lock then we can lock the meta pages so that
+	// we can set up the transaction.
+	db.metalock.Lock()
+	defer db.metalock.Unlock()
+
+	// Exit if the database is not open yet.
+	if !db.opened {
+		db.rwlock.Unlock()
+		return nil, ErrDatabaseNotOpen
+	}
+
+	// Create a transaction associated with the database.
+	t := &Tx{writable: true}
+	t.init(db)
+	db.rwtx = t
+	db.freePages()
+	return t, nil
+}
+
+// freePages releases any pages associated with closed read-only transactions.
+func (db *DB) freePages() {
+	// Free all pending pages prior to earliest open transaction.
+	sort.Sort(txsById(db.txs))
+	minid := txid(0xFFFFFFFFFFFFFFFF)
+	if len(db.txs) > 0 {
+		minid = db.txs[0].meta.txid
+	}
+	if minid > 0 {
+		db.freelist.release(minid - 1)
+	}
+	// Release unused txid extents.
+	for _, t := range db.txs {
+		db.freelist.releaseRange(minid, t.meta.txid-1)
+		minid = t.meta.txid + 1
+	}
+	db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
+	// Any page both allocated and freed in an extent is safe to release.
+}
+
+type txsById []*Tx
+
+func (t txsById) Len() int           { return len(t) }
+func (t txsById) Swap(i, j int)      { t[i], t[j] = t[j], t[i] }
+func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
+
+// removeTx removes a transaction from the database.
+func (db *DB) removeTx(tx *Tx) {
+	// Release the read lock on the mmap.
+	db.mmaplock.RUnlock()
+
+	// Use the meta lock to restrict access to the DB object.
+	db.metalock.Lock()
+
+	// Remove the transaction.
+	for i, t := range db.txs {
+		if t == tx {
+			last := len(db.txs) - 1
+			db.txs[i] = db.txs[last]
+			db.txs[last] = nil
+			db.txs = db.txs[:last]
+			break
+		}
+	}
+	n := len(db.txs)
+
+	// Unlock the meta pages.
+	db.metalock.Unlock()
+
+	// Merge statistics.
+	db.statlock.Lock()
+	db.stats.OpenTxN = n
+	db.stats.TxStats.add(&tx.stats)
+	db.statlock.Unlock()
+}
+
+// Update executes a function within the context of a read-write managed transaction.
+// If no error is returned from the function then the transaction is committed.
+// If an error is returned then the entire transaction is rolled back.
+// Any error that is returned from the function or returned from the commit is
+// returned from the Update() method.
+//
+// Attempting to manually commit or rollback within the function will cause a panic.
+func (db *DB) Update(fn func(*Tx) error) error {
+	t, err := db.Begin(true)
+	if err != nil {
+		return err
+	}
+
+	// Make sure the transaction rolls back in the event of a panic.
+	defer func() {
+		if t.db != nil {
+			t.rollback()
+		}
+	}()
+
+	// Mark as a managed tx so that the inner function cannot manually commit.
+	t.managed = true
+
+	// If an error is returned from the function then rollback and return error.
+	err = fn(t)
+	t.managed = false
+	if err != nil {
+		_ = t.Rollback()
+		return err
+	}
+
+	return t.Commit()
+}
+
+// View executes a function within the context of a managed read-only transaction.
+// Any error that is returned from the function is returned from the View() method.
+//
+// Attempting to manually rollback within the function will cause a panic.
+func (db *DB) View(fn func(*Tx) error) error {
+	t, err := db.Begin(false)
+	if err != nil {
+		return err
+	}
+
+	// Make sure the transaction rolls back in the event of a panic.
+	defer func() {
+		if t.db != nil {
+			t.rollback()
+		}
+	}()
+
+	// Mark as a managed tx so that the inner function cannot manually rollback.
+	t.managed = true
+
+	// If an error is returned from the function then pass it through.
+	err = fn(t)
+	t.managed = false
+	if err != nil {
+		_ = t.Rollback()
+		return err
+	}
+
+	return t.Rollback()
+}
+
+// Batch calls fn as part of a batch. It behaves similar to Update,
+// except:
+//
+// 1. concurrent Batch calls can be combined into a single Bolt
+// transaction.
+//
+// 2. the function passed to Batch may be called multiple times,
+// regardless of whether it returns error or not.
+//
+// This means that Batch function side effects must be idempotent and
+// take permanent effect only after a successful return is seen in
+// caller.
+//
+// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
+// and DB.MaxBatchDelay, respectively.
+//
+// Batch is only useful when there are multiple goroutines calling it.
+func (db *DB) Batch(fn func(*Tx) error) error {
+	errCh := make(chan error, 1)
+
+	db.batchMu.Lock()
+	if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
+		// There is no existing batch, or the existing batch is full; start a new one.
+		db.batch = &batch{
+			db: db,
+		}
+		db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
+	}
+	db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
+	if len(db.batch.calls) >= db.MaxBatchSize {
+		// wake up batch, it's ready to run
+		go db.batch.trigger()
+	}
+	db.batchMu.Unlock()
+
+	err := <-errCh
+	if err == trySolo {
+		err = db.Update(fn)
+	}
+	return err
+}
+
+type call struct {
+	fn  func(*Tx) error
+	err chan<- error
+}
+
+type batch struct {
+	db    *DB
+	timer *time.Timer
+	start sync.Once
+	calls []call
+}
+
+// trigger runs the batch if it hasn't already been run.
+func (b *batch) trigger() {
+	b.start.Do(b.run)
+}
+
+// run performs the transactions in the batch and communicates results
+// back to DB.Batch.
+func (b *batch) run() {
+	b.db.batchMu.Lock()
+	b.timer.Stop()
+	// Make sure no new work is added to this batch, but don't break
+	// other batches.
+	if b.db.batch == b {
+		b.db.batch = nil
+	}
+	b.db.batchMu.Unlock()
+
+retry:
+	for len(b.calls) > 0 {
+		var failIdx = -1
+		err := b.db.Update(func(tx *Tx) error {
+			for i, c := range b.calls {
+				if err := safelyCall(c.fn, tx); err != nil {
+					failIdx = i
+					return err
+				}
+			}
+			return nil
+		})
+
+		if failIdx >= 0 {
+			// take the failing transaction out of the batch. it's
+			// safe to shorten b.calls here because db.batch no longer
+			// points to us, and we hold the mutex anyway.
+			c := b.calls[failIdx]
+			b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
+			// tell the submitter re-run it solo, continue with the rest of the batch
+			c.err <- trySolo
+			continue retry
+		}
+
+		// pass success, or bolt internal errors, to all callers
+		for _, c := range b.calls {
+			c.err <- err
+		}
+		break retry
+	}
+}
+
+// trySolo is a special sentinel error value used for signaling that a
+// transaction function should be re-run. It should never be seen by
+// callers.
+var trySolo = errors.New("batch function returned an error and should be re-run solo")
+
+type panicked struct {
+	reason interface{}
+}
+
+func (p panicked) Error() string {
+	if err, ok := p.reason.(error); ok {
+		return err.Error()
+	}
+	return fmt.Sprintf("panic: %v", p.reason)
+}
+
+func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
+	defer func() {
+		if p := recover(); p != nil {
+			err = panicked{p}
+		}
+	}()
+	return fn(tx)
+}
+
+// Sync executes fdatasync() against the database file handle.
+//
+// This is not necessary under normal operation, however, if you use NoSync
+// then it allows you to force the database file to sync against the disk.
+func (db *DB) Sync() error { return fdatasync(db) }
+
+// Stats retrieves ongoing performance stats for the database.
+// This is only updated when a transaction closes.
+func (db *DB) Stats() Stats {
+	db.statlock.RLock()
+	defer db.statlock.RUnlock()
+	return db.stats
+}
+
+// This is for internal access to the raw data bytes from the C cursor, use
+// carefully, or not at all.
+func (db *DB) Info() *Info {
+	return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
+}
+
+// page retrieves a page reference from the mmap based on the current page size.
+func (db *DB) page(id pgid) *page {
+	pos := id * pgid(db.pageSize)
+	return (*page)(unsafe.Pointer(&db.data[pos]))
+}
+
+// pageInBuffer retrieves a page reference from a given byte array based on the current page size.
+func (db *DB) pageInBuffer(b []byte, id pgid) *page {
+	return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
+}
+
+// meta retrieves the current meta page reference.
+func (db *DB) meta() *meta {
+	// We have to return the meta with the highest txid which doesn't fail
+	// validation. Otherwise, we can cause errors when in fact the database is
+	// in a consistent state. metaA is the one with the higher txid.
+	metaA := db.meta0
+	metaB := db.meta1
+	if db.meta1.txid > db.meta0.txid {
+		metaA = db.meta1
+		metaB = db.meta0
+	}
+
+	// Use higher meta page if valid. Otherwise fallback to previous, if valid.
+	if err := metaA.validate(); err == nil {
+		return metaA
+	} else if err := metaB.validate(); err == nil {
+		return metaB
+	}
+
+	// This should never be reached, because both meta1 and meta0 were validated
+	// on mmap() and we do fsync() on every write.
+	panic("bolt.DB.meta(): invalid meta pages")
+}
+
+// allocate returns a contiguous block of memory starting at a given page.
+func (db *DB) allocate(txid txid, count int) (*page, error) {
+	// Allocate a temporary buffer for the page.
+	var buf []byte
+	if count == 1 {
+		buf = db.pagePool.Get().([]byte)
+	} else {
+		buf = make([]byte, count*db.pageSize)
+	}
+	p := (*page)(unsafe.Pointer(&buf[0]))
+	p.overflow = uint32(count - 1)
+
+	// Use pages from the freelist if they are available.
+	if p.id = db.freelist.allocate(txid, count); p.id != 0 {
+		return p, nil
+	}
+
+	// Resize mmap() if we're at the end.
+	p.id = db.rwtx.meta.pgid
+	var minsz = int((p.id+pgid(count))+1) * db.pageSize
+	if minsz >= db.datasz {
+		if err := db.mmap(minsz); err != nil {
+			return nil, fmt.Errorf("mmap allocate error: %s", err)
+		}
+	}
+
+	// Move the page id high water mark.
+	db.rwtx.meta.pgid += pgid(count)
+
+	return p, nil
+}
+
+// grow grows the size of the database to the given sz.
+func (db *DB) grow(sz int) error {
+	// Ignore if the new size is less than available file size.
+	if sz <= db.filesz {
+		return nil
+	}
+
+	// If the data is smaller than the alloc size then only allocate what's needed.
+	// Once it goes over the allocation size then allocate in chunks.
+	if db.datasz < db.AllocSize {
+		sz = db.datasz
+	} else {
+		sz += db.AllocSize
+	}
+
+	// Truncate and fsync to ensure file size metadata is flushed.
+	// https://github.com/boltdb/bolt/issues/284
+	if !db.NoGrowSync && !db.readOnly {
+		if runtime.GOOS != "windows" {
+			if err := db.file.Truncate(int64(sz)); err != nil {
+				return fmt.Errorf("file resize error: %s", err)
+			}
+		}
+		if err := db.file.Sync(); err != nil {
+			return fmt.Errorf("file sync error: %s", err)
+		}
+	}
+
+	db.filesz = sz
+	return nil
+}
+
+func (db *DB) IsReadOnly() bool {
+	return db.readOnly
+}
+
+func (db *DB) freepages() []pgid {
+	tx, err := db.beginTx()
+	defer func() {
+		err = tx.Rollback()
+		if err != nil {
+			panic("freepages: failed to rollback tx")
+		}
+	}()
+	if err != nil {
+		panic("freepages: failed to open read only tx")
+	}
+
+	reachable := make(map[pgid]*page)
+	nofreed := make(map[pgid]bool)
+	ech := make(chan error)
+	go func() {
+		for e := range ech {
+			panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
+		}
+	}()
+	tx.checkBucket(&tx.root, reachable, nofreed, ech)
+	close(ech)
+
+	var fids []pgid
+	for i := pgid(2); i < db.meta().pgid; i++ {
+		if _, ok := reachable[i]; !ok {
+			fids = append(fids, i)
+		}
+	}
+	return fids
+}
+
+// Options represents the options that can be set when opening a database.
+type Options struct {
+	// Timeout is the amount of time to wait to obtain a file lock.
+	// When set to zero it will wait indefinitely. This option is only
+	// available on Darwin and Linux.
+	Timeout time.Duration
+
+	// Sets the DB.NoGrowSync flag before memory mapping the file.
+	NoGrowSync bool
+
+	// Do not sync freelist to disk. This improves the database write performance
+	// under normal operation, but requires a full database re-sync during recovery.
+	NoFreelistSync bool
+
+	// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
+	// dramatic performance degradation if database is large and framentation in freelist is common.
+	// The alternative one is using hashmap, it is faster in almost all circumstances
+	// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
+	// The default type is array
+	FreelistType FreelistType
+
+	// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
+	// grab a shared lock (UNIX).
+	ReadOnly bool
+
+	// Sets the DB.MmapFlags flag before memory mapping the file.
+	MmapFlags int
+
+	// InitialMmapSize is the initial mmap size of the database
+	// in bytes. Read transactions won't block write transaction
+	// if the InitialMmapSize is large enough to hold database mmap
+	// size. (See DB.Begin for more information)
+	//
+	// If <=0, the initial map size is 0.
+	// If initialMmapSize is smaller than the previous database size,
+	// it takes no effect.
+	InitialMmapSize int
+
+	// PageSize overrides the default OS page size.
+	PageSize int
+
+	// NoSync sets the initial value of DB.NoSync. Normally this can just be
+	// set directly on the DB itself when returned from Open(), but this option
+	// is useful in APIs which expose Options but not the underlying DB.
+	NoSync bool
+}
+
+// DefaultOptions represent the options used if nil options are passed into Open().
+// No timeout is used which will cause Bolt to wait indefinitely for a lock.
+var DefaultOptions = &Options{
+	Timeout:      0,
+	NoGrowSync:   false,
+	FreelistType: FreelistArrayType,
+}
+
+// Stats represents statistics about the database.
+type Stats struct {
+	// Freelist stats
+	FreePageN     int // total number of free pages on the freelist
+	PendingPageN  int // total number of pending pages on the freelist
+	FreeAlloc     int // total bytes allocated in free pages
+	FreelistInuse int // total bytes used by the freelist
+
+	// Transaction stats
+	TxN     int // total number of started read transactions
+	OpenTxN int // number of currently open read transactions
+
+	TxStats TxStats // global, ongoing stats.
+}
+
+// Sub calculates and returns the difference between two sets of database stats.
+// This is useful when obtaining stats at two different points and time and
+// you need the performance counters that occurred within that time span.
+func (s *Stats) Sub(other *Stats) Stats {
+	if other == nil {
+		return *s
+	}
+	var diff Stats
+	diff.FreePageN = s.FreePageN
+	diff.PendingPageN = s.PendingPageN
+	diff.FreeAlloc = s.FreeAlloc
+	diff.FreelistInuse = s.FreelistInuse
+	diff.TxN = s.TxN - other.TxN
+	diff.TxStats = s.TxStats.Sub(&other.TxStats)
+	return diff
+}
+
+type Info struct {
+	Data     uintptr
+	PageSize int
+}
+
+type meta struct {
+	magic    uint32
+	version  uint32
+	pageSize uint32
+	flags    uint32
+	root     bucket
+	freelist pgid
+	pgid     pgid
+	txid     txid
+	checksum uint64
+}
+
+// validate checks the marker bytes and version of the meta page to ensure it matches this binary.
+func (m *meta) validate() error {
+	if m.magic != magic {
+		return ErrInvalid
+	} else if m.version != version {
+		return ErrVersionMismatch
+	} else if m.checksum != 0 && m.checksum != m.sum64() {
+		return ErrChecksum
+	}
+	return nil
+}
+
+// copy copies one meta object to another.
+func (m *meta) copy(dest *meta) {
+	*dest = *m
+}
+
+// write writes the meta onto a page.
+func (m *meta) write(p *page) {
+	if m.root.root >= m.pgid {
+		panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
+	} else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
+		// TODO: reject pgidNoFreeList if !NoFreelistSync
+		panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
+	}
+
+	// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
+	p.id = pgid(m.txid % 2)
+	p.flags |= metaPageFlag
+
+	// Calculate the checksum.
+	m.checksum = m.sum64()
+
+	m.copy(p.meta())
+}
+
+// generates the checksum for the meta.
+func (m *meta) sum64() uint64 {
+	var h = fnv.New64a()
+	_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
+	return h.Sum64()
+}
+
+// _assert will panic with a given formatted message if the given condition is false.
+func _assert(condition bool, msg string, v ...interface{}) {
+	if !condition {
+		panic(fmt.Sprintf("assertion failed: "+msg, v...))
+	}
+}
diff --git a/vendor/github.com/etcd-io/bbolt/doc.go b/vendor/github.com/etcd-io/bbolt/doc.go
new file mode 100644
index 0000000000..95f25f01c6
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/doc.go
@@ -0,0 +1,44 @@
+/*
+package bbolt implements a low-level key/value store in pure Go. It supports
+fully serializable transactions, ACID semantics, and lock-free MVCC with
+multiple readers and a single writer. Bolt can be used for projects that
+want a simple data store without the need to add large dependencies such as
+Postgres or MySQL.
+
+Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
+optimized for fast read access and does not require recovery in the event of a
+system crash. Transactions which have not finished committing will simply be
+rolled back in the event of a crash.
+
+The design of Bolt is based on Howard Chu's LMDB database project.
+
+Bolt currently works on Windows, Mac OS X, and Linux.
+
+
+Basics
+
+There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
+a collection of buckets and is represented by a single file on disk. A bucket is
+a collection of unique keys that are associated with values.
+
+Transactions provide either read-only or read-write access to the database.
+Read-only transactions can retrieve key/value pairs and can use Cursors to
+iterate over the dataset sequentially. Read-write transactions can create and
+delete buckets and can insert and remove keys. Only one read-write transaction
+is allowed at a time.
+
+
+Caveats
+
+The database uses a read-only, memory-mapped data file to ensure that
+applications cannot corrupt the database, however, this means that keys and
+values returned from Bolt cannot be changed. Writing to a read-only byte slice
+will cause Go to panic.
+
+Keys and values retrieved from the database are only valid for the life of
+the transaction. When used outside the transaction, these byte slices can
+point to different data or can point to invalid memory which will cause a panic.
+
+
+*/
+package bbolt
diff --git a/vendor/github.com/etcd-io/bbolt/errors.go b/vendor/github.com/etcd-io/bbolt/errors.go
new file mode 100644
index 0000000000..48758ca577
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/errors.go
@@ -0,0 +1,71 @@
+package bbolt
+
+import "errors"
+
+// These errors can be returned when opening or calling methods on a DB.
+var (
+	// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
+	// is opened or after it is closed.
+	ErrDatabaseNotOpen = errors.New("database not open")
+
+	// ErrDatabaseOpen is returned when opening a database that is
+	// already open.
+	ErrDatabaseOpen = errors.New("database already open")
+
+	// ErrInvalid is returned when both meta pages on a database are invalid.
+	// This typically occurs when a file is not a bolt database.
+	ErrInvalid = errors.New("invalid database")
+
+	// ErrVersionMismatch is returned when the data file was created with a
+	// different version of Bolt.
+	ErrVersionMismatch = errors.New("version mismatch")
+
+	// ErrChecksum is returned when either meta page checksum does not match.
+	ErrChecksum = errors.New("checksum error")
+
+	// ErrTimeout is returned when a database cannot obtain an exclusive lock
+	// on the data file after the timeout passed to Open().
+	ErrTimeout = errors.New("timeout")
+)
+
+// These errors can occur when beginning or committing a Tx.
+var (
+	// ErrTxNotWritable is returned when performing a write operation on a
+	// read-only transaction.
+	ErrTxNotWritable = errors.New("tx not writable")
+
+	// ErrTxClosed is returned when committing or rolling back a transaction
+	// that has already been committed or rolled back.
+	ErrTxClosed = errors.New("tx closed")
+
+	// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
+	// read-only database.
+	ErrDatabaseReadOnly = errors.New("database is in read-only mode")
+)
+
+// These errors can occur when putting or deleting a value or a bucket.
+var (
+	// ErrBucketNotFound is returned when trying to access a bucket that has
+	// not been created yet.
+	ErrBucketNotFound = errors.New("bucket not found")
+
+	// ErrBucketExists is returned when creating a bucket that already exists.
+	ErrBucketExists = errors.New("bucket already exists")
+
+	// ErrBucketNameRequired is returned when creating a bucket with a blank name.
+	ErrBucketNameRequired = errors.New("bucket name required")
+
+	// ErrKeyRequired is returned when inserting a zero-length key.
+	ErrKeyRequired = errors.New("key required")
+
+	// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
+	ErrKeyTooLarge = errors.New("key too large")
+
+	// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
+	ErrValueTooLarge = errors.New("value too large")
+
+	// ErrIncompatibleValue is returned when trying create or delete a bucket
+	// on an existing non-bucket key or when trying to create or delete a
+	// non-bucket key on an existing bucket key.
+	ErrIncompatibleValue = errors.New("incompatible value")
+)
diff --git a/vendor/github.com/etcd-io/bbolt/freelist.go b/vendor/github.com/etcd-io/bbolt/freelist.go
new file mode 100644
index 0000000000..93fd85d504
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/freelist.go
@@ -0,0 +1,370 @@
+package bbolt
+
+import (
+	"fmt"
+	"sort"
+	"unsafe"
+)
+
+// txPending holds a list of pgids and corresponding allocation txns
+// that are pending to be freed.
+type txPending struct {
+	ids              []pgid
+	alloctx          []txid // txids allocating the ids
+	lastReleaseBegin txid   // beginning txid of last matching releaseRange
+}
+
+// pidSet holds the set of starting pgids which have the same span size
+type pidSet map[pgid]struct{}
+
+// freelist represents a list of all pages that are available for allocation.
+// It also tracks pages that have been freed but are still in use by open transactions.
+type freelist struct {
+	freelistType   FreelistType                // freelist type
+	ids            []pgid                      // all free and available free page ids.
+	allocs         map[pgid]txid               // mapping of txid that allocated a pgid.
+	pending        map[txid]*txPending         // mapping of soon-to-be free page ids by tx.
+	cache          map[pgid]bool               // fast lookup of all free and pending page ids.
+	freemaps       map[uint64]pidSet           // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
+	forwardMap     map[pgid]uint64             // key is start pgid, value is its span size
+	backwardMap    map[pgid]uint64             // key is end pgid, value is its span size
+	allocate       func(txid txid, n int) pgid // the freelist allocate func
+	free_count     func() int                  // the function which gives you free page number
+	mergeSpans     func(ids pgids)             // the mergeSpan func
+	getFreePageIDs func() []pgid               // get free pgids func
+	readIDs        func(pgids []pgid)          // readIDs func reads list of pages and init the freelist
+}
+
+// newFreelist returns an empty, initialized freelist.
+func newFreelist(freelistType FreelistType) *freelist {
+	f := &freelist{
+		freelistType: freelistType,
+		allocs:       make(map[pgid]txid),
+		pending:      make(map[txid]*txPending),
+		cache:        make(map[pgid]bool),
+		freemaps:     make(map[uint64]pidSet),
+		forwardMap:   make(map[pgid]uint64),
+		backwardMap:  make(map[pgid]uint64),
+	}
+
+	if freelistType == FreelistMapType {
+		f.allocate = f.hashmapAllocate
+		f.free_count = f.hashmapFreeCount
+		f.mergeSpans = f.hashmapMergeSpans
+		f.getFreePageIDs = f.hashmapGetFreePageIDs
+		f.readIDs = f.hashmapReadIDs
+	} else {
+		f.allocate = f.arrayAllocate
+		f.free_count = f.arrayFreeCount
+		f.mergeSpans = f.arrayMergeSpans
+		f.getFreePageIDs = f.arrayGetFreePageIDs
+		f.readIDs = f.arrayReadIDs
+	}
+
+	return f
+}
+
+// size returns the size of the page after serialization.
+func (f *freelist) size() int {
+	n := f.count()
+	if n >= 0xFFFF {
+		// The first element will be used to store the count. See freelist.write.
+		n++
+	}
+	return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
+}
+
+// count returns count of pages on the freelist
+func (f *freelist) count() int {
+	return f.free_count() + f.pending_count()
+}
+
+// arrayFreeCount returns count of free pages(array version)
+func (f *freelist) arrayFreeCount() int {
+	return len(f.ids)
+}
+
+// pending_count returns count of pending pages
+func (f *freelist) pending_count() int {
+	var count int
+	for _, txp := range f.pending {
+		count += len(txp.ids)
+	}
+	return count
+}
+
+// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
+// f.count returns the minimum length required for dst.
+func (f *freelist) copyall(dst []pgid) {
+	m := make(pgids, 0, f.pending_count())
+	for _, txp := range f.pending {
+		m = append(m, txp.ids...)
+	}
+	sort.Sort(m)
+	mergepgids(dst, f.getFreePageIDs(), m)
+}
+
+// arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
+// If a contiguous block cannot be found then 0 is returned.
+func (f *freelist) arrayAllocate(txid txid, n int) pgid {
+	if len(f.ids) == 0 {
+		return 0
+	}
+
+	var initial, previd pgid
+	for i, id := range f.ids {
+		if id <= 1 {
+			panic(fmt.Sprintf("invalid page allocation: %d", id))
+		}
+
+		// Reset initial page if this is not contiguous.
+		if previd == 0 || id-previd != 1 {
+			initial = id
+		}
+
+		// If we found a contiguous block then remove it and return it.
+		if (id-initial)+1 == pgid(n) {
+			// If we're allocating off the beginning then take the fast path
+			// and just adjust the existing slice. This will use extra memory
+			// temporarily but the append() in free() will realloc the slice
+			// as is necessary.
+			if (i + 1) == n {
+				f.ids = f.ids[i+1:]
+			} else {
+				copy(f.ids[i-n+1:], f.ids[i+1:])
+				f.ids = f.ids[:len(f.ids)-n]
+			}
+
+			// Remove from the free cache.
+			for i := pgid(0); i < pgid(n); i++ {
+				delete(f.cache, initial+i)
+			}
+			f.allocs[initial] = txid
+			return initial
+		}
+
+		previd = id
+	}
+	return 0
+}
+
+// free releases a page and its overflow for a given transaction id.
+// If the page is already free then a panic will occur.
+func (f *freelist) free(txid txid, p *page) {
+	if p.id <= 1 {
+		panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
+	}
+
+	// Free page and all its overflow pages.
+	txp := f.pending[txid]
+	if txp == nil {
+		txp = &txPending{}
+		f.pending[txid] = txp
+	}
+	allocTxid, ok := f.allocs[p.id]
+	if ok {
+		delete(f.allocs, p.id)
+	} else if (p.flags & freelistPageFlag) != 0 {
+		// Freelist is always allocated by prior tx.
+		allocTxid = txid - 1
+	}
+
+	for id := p.id; id <= p.id+pgid(p.overflow); id++ {
+		// Verify that page is not already free.
+		if f.cache[id] {
+			panic(fmt.Sprintf("page %d already freed", id))
+		}
+		// Add to the freelist and cache.
+		txp.ids = append(txp.ids, id)
+		txp.alloctx = append(txp.alloctx, allocTxid)
+		f.cache[id] = true
+	}
+}
+
+// release moves all page ids for a transaction id (or older) to the freelist.
+func (f *freelist) release(txid txid) {
+	m := make(pgids, 0)
+	for tid, txp := range f.pending {
+		if tid <= txid {
+			// Move transaction's pending pages to the available freelist.
+			// Don't remove from the cache since the page is still free.
+			m = append(m, txp.ids...)
+			delete(f.pending, tid)
+		}
+	}
+	f.mergeSpans(m)
+}
+
+// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
+func (f *freelist) releaseRange(begin, end txid) {
+	if begin > end {
+		return
+	}
+	var m pgids
+	for tid, txp := range f.pending {
+		if tid < begin || tid > end {
+			continue
+		}
+		// Don't recompute freed pages if ranges haven't updated.
+		if txp.lastReleaseBegin == begin {
+			continue
+		}
+		for i := 0; i < len(txp.ids); i++ {
+			if atx := txp.alloctx[i]; atx < begin || atx > end {
+				continue
+			}
+			m = append(m, txp.ids[i])
+			txp.ids[i] = txp.ids[len(txp.ids)-1]
+			txp.ids = txp.ids[:len(txp.ids)-1]
+			txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
+			txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
+			i--
+		}
+		txp.lastReleaseBegin = begin
+		if len(txp.ids) == 0 {
+			delete(f.pending, tid)
+		}
+	}
+	f.mergeSpans(m)
+}
+
+// rollback removes the pages from a given pending tx.
+func (f *freelist) rollback(txid txid) {
+	// Remove page ids from cache.
+	txp := f.pending[txid]
+	if txp == nil {
+		return
+	}
+	var m pgids
+	for i, pgid := range txp.ids {
+		delete(f.cache, pgid)
+		tx := txp.alloctx[i]
+		if tx == 0 {
+			continue
+		}
+		if tx != txid {
+			// Pending free aborted; restore page back to alloc list.
+			f.allocs[pgid] = tx
+		} else {
+			// Freed page was allocated by this txn; OK to throw away.
+			m = append(m, pgid)
+		}
+	}
+	// Remove pages from pending list and mark as free if allocated by txid.
+	delete(f.pending, txid)
+	f.mergeSpans(m)
+}
+
+// freed returns whether a given page is in the free list.
+func (f *freelist) freed(pgid pgid) bool {
+	return f.cache[pgid]
+}
+
+// read initializes the freelist from a freelist page.
+func (f *freelist) read(p *page) {
+	if (p.flags & freelistPageFlag) == 0 {
+		panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
+	}
+	// If the page.count is at the max uint16 value (64k) then it's considered
+	// an overflow and the size of the freelist is stored as the first element.
+	idx, count := 0, int(p.count)
+	if count == 0xFFFF {
+		idx = 1
+		count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
+	}
+
+	// Copy the list of page ids from the freelist.
+	if count == 0 {
+		f.ids = nil
+	} else {
+		ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
+
+		// copy the ids, so we don't modify on the freelist page directly
+		idsCopy := make([]pgid, count)
+		copy(idsCopy, ids)
+		// Make sure they're sorted.
+		sort.Sort(pgids(idsCopy))
+
+		f.readIDs(idsCopy)
+	}
+}
+
+// arrayReadIDs initializes the freelist from a given list of ids.
+func (f *freelist) arrayReadIDs(ids []pgid) {
+	f.ids = ids
+	f.reindex()
+}
+
+func (f *freelist) arrayGetFreePageIDs() []pgid {
+	return f.ids
+}
+
+// write writes the page ids onto a freelist page. All free and pending ids are
+// saved to disk since in the event of a program crash, all pending ids will
+// become free.
+func (f *freelist) write(p *page) error {
+	// Combine the old free pgids and pgids waiting on an open transaction.
+
+	// Update the header flag.
+	p.flags |= freelistPageFlag
+
+	// The page.count can only hold up to 64k elements so if we overflow that
+	// number then we handle it by putting the size in the first element.
+	lenids := f.count()
+	if lenids == 0 {
+		p.count = uint16(lenids)
+	} else if lenids < 0xFFFF {
+		p.count = uint16(lenids)
+		f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
+	} else {
+		p.count = 0xFFFF
+		((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
+		f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
+	}
+
+	return nil
+}
+
+// reload reads the freelist from a page and filters out pending items.
+func (f *freelist) reload(p *page) {
+	f.read(p)
+
+	// Build a cache of only pending pages.
+	pcache := make(map[pgid]bool)
+	for _, txp := range f.pending {
+		for _, pendingID := range txp.ids {
+			pcache[pendingID] = true
+		}
+	}
+
+	// Check each page in the freelist and build a new available freelist
+	// with any pages not in the pending lists.
+	var a []pgid
+	for _, id := range f.getFreePageIDs() {
+		if !pcache[id] {
+			a = append(a, id)
+		}
+	}
+
+	f.readIDs(a)
+}
+
+// reindex rebuilds the free cache based on available and pending free lists.
+func (f *freelist) reindex() {
+	ids := f.getFreePageIDs()
+	f.cache = make(map[pgid]bool, len(ids))
+	for _, id := range ids {
+		f.cache[id] = true
+	}
+	for _, txp := range f.pending {
+		for _, pendingID := range txp.ids {
+			f.cache[pendingID] = true
+		}
+	}
+}
+
+// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
+func (f *freelist) arrayMergeSpans(ids pgids) {
+	sort.Sort(ids)
+	f.ids = pgids(f.ids).merge(ids)
+}
diff --git a/vendor/github.com/etcd-io/bbolt/freelist_hmap.go b/vendor/github.com/etcd-io/bbolt/freelist_hmap.go
new file mode 100644
index 0000000000..6a03a6c3c8
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/freelist_hmap.go
@@ -0,0 +1,178 @@
+package bbolt
+
+import "sort"
+
+// hashmapFreeCount returns count of free pages(hashmap version)
+func (f *freelist) hashmapFreeCount() int {
+	// use the forwardmap to get the total count
+	count := 0
+	for _, size := range f.forwardMap {
+		count += int(size)
+	}
+	return count
+}
+
+// hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend
+func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
+	if n == 0 {
+		return 0
+	}
+
+	// if we have a exact size match just return short path
+	if bm, ok := f.freemaps[uint64(n)]; ok {
+		for pid := range bm {
+			// remove the span
+			f.delSpan(pid, uint64(n))
+
+			f.allocs[pid] = txid
+
+			for i := pgid(0); i < pgid(n); i++ {
+				delete(f.cache, pid+pgid(i))
+			}
+			return pid
+		}
+	}
+
+	// lookup the map to find larger span
+	for size, bm := range f.freemaps {
+		if size < uint64(n) {
+			continue
+		}
+
+		for pid := range bm {
+			// remove the initial
+			f.delSpan(pid, uint64(size))
+
+			f.allocs[pid] = txid
+
+			remain := size - uint64(n)
+
+			// add remain span
+			f.addSpan(pid+pgid(n), remain)
+
+			for i := pgid(0); i < pgid(n); i++ {
+				delete(f.cache, pid+pgid(i))
+			}
+			return pid
+		}
+	}
+
+	return 0
+}
+
+// hashmapReadIDs reads pgids as input an initial the freelist(hashmap version)
+func (f *freelist) hashmapReadIDs(pgids []pgid) {
+	f.init(pgids)
+
+	// Rebuild the page cache.
+	f.reindex()
+}
+
+// hashmapGetFreePageIDs returns the sorted free page ids
+func (f *freelist) hashmapGetFreePageIDs() []pgid {
+	count := f.free_count()
+	if count == 0 {
+		return nil
+	}
+
+	m := make([]pgid, 0, count)
+	for start, size := range f.forwardMap {
+		for i := 0; i < int(size); i++ {
+			m = append(m, start+pgid(i))
+		}
+	}
+	sort.Sort(pgids(m))
+
+	return m
+}
+
+// hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans
+func (f *freelist) hashmapMergeSpans(ids pgids) {
+	for _, id := range ids {
+		// try to see if we can merge and update
+		f.mergeWithExistingSpan(id)
+	}
+}
+
+// mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward
+func (f *freelist) mergeWithExistingSpan(pid pgid) {
+	prev := pid - 1
+	next := pid + 1
+
+	preSize, mergeWithPrev := f.backwardMap[prev]
+	nextSize, mergeWithNext := f.forwardMap[next]
+	newStart := pid
+	newSize := uint64(1)
+
+	if mergeWithPrev {
+		//merge with previous span
+		start := prev + 1 - pgid(preSize)
+		f.delSpan(start, preSize)
+
+		newStart -= pgid(preSize)
+		newSize += preSize
+	}
+
+	if mergeWithNext {
+		// merge with next span
+		f.delSpan(next, nextSize)
+		newSize += nextSize
+	}
+
+	f.addSpan(newStart, newSize)
+}
+
+func (f *freelist) addSpan(start pgid, size uint64) {
+	f.backwardMap[start-1+pgid(size)] = size
+	f.forwardMap[start] = size
+	if _, ok := f.freemaps[size]; !ok {
+		f.freemaps[size] = make(map[pgid]struct{})
+	}
+
+	f.freemaps[size][start] = struct{}{}
+}
+
+func (f *freelist) delSpan(start pgid, size uint64) {
+	delete(f.forwardMap, start)
+	delete(f.backwardMap, start+pgid(size-1))
+	delete(f.freemaps[size], start)
+	if len(f.freemaps[size]) == 0 {
+		delete(f.freemaps, size)
+	}
+}
+
+// initial from pgids using when use hashmap version
+// pgids must be sorted
+func (f *freelist) init(pgids []pgid) {
+	if len(pgids) == 0 {
+		return
+	}
+
+	size := uint64(1)
+	start := pgids[0]
+
+	if !sort.SliceIsSorted([]pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) {
+		panic("pgids not sorted")
+	}
+
+	f.freemaps = make(map[uint64]pidSet)
+	f.forwardMap = make(map[pgid]uint64)
+	f.backwardMap = make(map[pgid]uint64)
+
+	for i := 1; i < len(pgids); i++ {
+		// continuous page
+		if pgids[i] == pgids[i-1]+1 {
+			size++
+		} else {
+			f.addSpan(start, size)
+
+			size = 1
+			start = pgids[i]
+		}
+	}
+
+	// init the tail
+	if size != 0 && start != 0 {
+		f.addSpan(start, size)
+	}
+}
diff --git a/vendor/github.com/etcd-io/bbolt/node.go b/vendor/github.com/etcd-io/bbolt/node.go
new file mode 100644
index 0000000000..6c3fa553ea
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/node.go
@@ -0,0 +1,604 @@
+package bbolt
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+	"unsafe"
+)
+
+// node represents an in-memory, deserialized page.
+type node struct {
+	bucket     *Bucket
+	isLeaf     bool
+	unbalanced bool
+	spilled    bool
+	key        []byte
+	pgid       pgid
+	parent     *node
+	children   nodes
+	inodes     inodes
+}
+
+// root returns the top-level node this node is attached to.
+func (n *node) root() *node {
+	if n.parent == nil {
+		return n
+	}
+	return n.parent.root()
+}
+
+// minKeys returns the minimum number of inodes this node should have.
+func (n *node) minKeys() int {
+	if n.isLeaf {
+		return 1
+	}
+	return 2
+}
+
+// size returns the size of the node after serialization.
+func (n *node) size() int {
+	sz, elsz := pageHeaderSize, n.pageElementSize()
+	for i := 0; i < len(n.inodes); i++ {
+		item := &n.inodes[i]
+		sz += elsz + len(item.key) + len(item.value)
+	}
+	return sz
+}
+
+// sizeLessThan returns true if the node is less than a given size.
+// This is an optimization to avoid calculating a large node when we only need
+// to know if it fits inside a certain page size.
+func (n *node) sizeLessThan(v int) bool {
+	sz, elsz := pageHeaderSize, n.pageElementSize()
+	for i := 0; i < len(n.inodes); i++ {
+		item := &n.inodes[i]
+		sz += elsz + len(item.key) + len(item.value)
+		if sz >= v {
+			return false
+		}
+	}
+	return true
+}
+
+// pageElementSize returns the size of each page element based on the type of node.
+func (n *node) pageElementSize() int {
+	if n.isLeaf {
+		return leafPageElementSize
+	}
+	return branchPageElementSize
+}
+
+// childAt returns the child node at a given index.
+func (n *node) childAt(index int) *node {
+	if n.isLeaf {
+		panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
+	}
+	return n.bucket.node(n.inodes[index].pgid, n)
+}
+
+// childIndex returns the index of a given child node.
+func (n *node) childIndex(child *node) int {
+	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
+	return index
+}
+
+// numChildren returns the number of children.
+func (n *node) numChildren() int {
+	return len(n.inodes)
+}
+
+// nextSibling returns the next node with the same parent.
+func (n *node) nextSibling() *node {
+	if n.parent == nil {
+		return nil
+	}
+	index := n.parent.childIndex(n)
+	if index >= n.parent.numChildren()-1 {
+		return nil
+	}
+	return n.parent.childAt(index + 1)
+}
+
+// prevSibling returns the previous node with the same parent.
+func (n *node) prevSibling() *node {
+	if n.parent == nil {
+		return nil
+	}
+	index := n.parent.childIndex(n)
+	if index == 0 {
+		return nil
+	}
+	return n.parent.childAt(index - 1)
+}
+
+// put inserts a key/value.
+func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
+	if pgid >= n.bucket.tx.meta.pgid {
+		panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
+	} else if len(oldKey) <= 0 {
+		panic("put: zero-length old key")
+	} else if len(newKey) <= 0 {
+		panic("put: zero-length new key")
+	}
+
+	// Find insertion index.
+	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
+
+	// Add capacity and shift nodes if we don't have an exact match and need to insert.
+	exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
+	if !exact {
+		n.inodes = append(n.inodes, inode{})
+		copy(n.inodes[index+1:], n.inodes[index:])
+	}
+
+	inode := &n.inodes[index]
+	inode.flags = flags
+	inode.key = newKey
+	inode.value = value
+	inode.pgid = pgid
+	_assert(len(inode.key) > 0, "put: zero-length inode key")
+}
+
+// del removes a key from the node.
+func (n *node) del(key []byte) {
+	// Find index of key.
+	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })
+
+	// Exit if the key isn't found.
+	if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
+		return
+	}
+
+	// Delete inode from the node.
+	n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
+
+	// Mark the node as needing rebalancing.
+	n.unbalanced = true
+}
+
+// read initializes the node from a page.
+func (n *node) read(p *page) {
+	n.pgid = p.id
+	n.isLeaf = ((p.flags & leafPageFlag) != 0)
+	n.inodes = make(inodes, int(p.count))
+
+	for i := 0; i < int(p.count); i++ {
+		inode := &n.inodes[i]
+		if n.isLeaf {
+			elem := p.leafPageElement(uint16(i))
+			inode.flags = elem.flags
+			inode.key = elem.key()
+			inode.value = elem.value()
+		} else {
+			elem := p.branchPageElement(uint16(i))
+			inode.pgid = elem.pgid
+			inode.key = elem.key()
+		}
+		_assert(len(inode.key) > 0, "read: zero-length inode key")
+	}
+
+	// Save first key so we can find the node in the parent when we spill.
+	if len(n.inodes) > 0 {
+		n.key = n.inodes[0].key
+		_assert(len(n.key) > 0, "read: zero-length node key")
+	} else {
+		n.key = nil
+	}
+}
+
+// write writes the items onto one or more pages.
+func (n *node) write(p *page) {
+	// Initialize page.
+	if n.isLeaf {
+		p.flags |= leafPageFlag
+	} else {
+		p.flags |= branchPageFlag
+	}
+
+	if len(n.inodes) >= 0xFFFF {
+		panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
+	}
+	p.count = uint16(len(n.inodes))
+
+	// Stop here if there are no items to write.
+	if p.count == 0 {
+		return
+	}
+
+	// Loop over each item and write it to the page.
+	b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
+	for i, item := range n.inodes {
+		_assert(len(item.key) > 0, "write: zero-length inode key")
+
+		// Write the page element.
+		if n.isLeaf {
+			elem := p.leafPageElement(uint16(i))
+			elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
+			elem.flags = item.flags
+			elem.ksize = uint32(len(item.key))
+			elem.vsize = uint32(len(item.value))
+		} else {
+			elem := p.branchPageElement(uint16(i))
+			elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
+			elem.ksize = uint32(len(item.key))
+			elem.pgid = item.pgid
+			_assert(elem.pgid != p.id, "write: circular dependency occurred")
+		}
+
+		// If the length of key+value is larger than the max allocation size
+		// then we need to reallocate the byte array pointer.
+		//
+		// See: https://github.com/boltdb/bolt/pull/335
+		klen, vlen := len(item.key), len(item.value)
+		if len(b) < klen+vlen {
+			b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
+		}
+
+		// Write data for the element to the end of the page.
+		copy(b[0:], item.key)
+		b = b[klen:]
+		copy(b[0:], item.value)
+		b = b[vlen:]
+	}
+
+	// DEBUG ONLY: n.dump()
+}
+
+// split breaks up a node into multiple smaller nodes, if appropriate.
+// This should only be called from the spill() function.
+func (n *node) split(pageSize int) []*node {
+	var nodes []*node
+
+	node := n
+	for {
+		// Split node into two.
+		a, b := node.splitTwo(pageSize)
+		nodes = append(nodes, a)
+
+		// If we can't split then exit the loop.
+		if b == nil {
+			break
+		}
+
+		// Set node to b so it gets split on the next iteration.
+		node = b
+	}
+
+	return nodes
+}
+
+// splitTwo breaks up a node into two smaller nodes, if appropriate.
+// This should only be called from the split() function.
+func (n *node) splitTwo(pageSize int) (*node, *node) {
+	// Ignore the split if the page doesn't have at least enough nodes for
+	// two pages or if the nodes can fit in a single page.
+	if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
+		return n, nil
+	}
+
+	// Determine the threshold before starting a new node.
+	var fillPercent = n.bucket.FillPercent
+	if fillPercent < minFillPercent {
+		fillPercent = minFillPercent
+	} else if fillPercent > maxFillPercent {
+		fillPercent = maxFillPercent
+	}
+	threshold := int(float64(pageSize) * fillPercent)
+
+	// Determine split position and sizes of the two pages.
+	splitIndex, _ := n.splitIndex(threshold)
+
+	// Split node into two separate nodes.
+	// If there's no parent then we'll need to create one.
+	if n.parent == nil {
+		n.parent = &node{bucket: n.bucket, children: []*node{n}}
+	}
+
+	// Create a new node and add it to the parent.
+	next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
+	n.parent.children = append(n.parent.children, next)
+
+	// Split inodes across two nodes.
+	next.inodes = n.inodes[splitIndex:]
+	n.inodes = n.inodes[:splitIndex]
+
+	// Update the statistics.
+	n.bucket.tx.stats.Split++
+
+	return n, next
+}
+
+// splitIndex finds the position where a page will fill a given threshold.
+// It returns the index as well as the size of the first page.
+// This is only be called from split().
+func (n *node) splitIndex(threshold int) (index, sz int) {
+	sz = pageHeaderSize
+
+	// Loop until we only have the minimum number of keys required for the second page.
+	for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
+		index = i
+		inode := n.inodes[i]
+		elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
+
+		// If we have at least the minimum number of keys and adding another
+		// node would put us over the threshold then exit and return.
+		if i >= minKeysPerPage && sz+elsize > threshold {
+			break
+		}
+
+		// Add the element size to the total size.
+		sz += elsize
+	}
+
+	return
+}
+
+// spill writes the nodes to dirty pages and splits nodes as it goes.
+// Returns an error if dirty pages cannot be allocated.
+func (n *node) spill() error {
+	var tx = n.bucket.tx
+	if n.spilled {
+		return nil
+	}
+
+	// Spill child nodes first. Child nodes can materialize sibling nodes in
+	// the case of split-merge so we cannot use a range loop. We have to check
+	// the children size on every loop iteration.
+	sort.Sort(n.children)
+	for i := 0; i < len(n.children); i++ {
+		if err := n.children[i].spill(); err != nil {
+			return err
+		}
+	}
+
+	// We no longer need the child list because it's only used for spill tracking.
+	n.children = nil
+
+	// Split nodes into appropriate sizes. The first node will always be n.
+	var nodes = n.split(tx.db.pageSize)
+	for _, node := range nodes {
+		// Add node's page to the freelist if it's not new.
+		if node.pgid > 0 {
+			tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
+			node.pgid = 0
+		}
+
+		// Allocate contiguous space for the node.
+		p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
+		if err != nil {
+			return err
+		}
+
+		// Write the node.
+		if p.id >= tx.meta.pgid {
+			panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
+		}
+		node.pgid = p.id
+		node.write(p)
+		node.spilled = true
+
+		// Insert into parent inodes.
+		if node.parent != nil {
+			var key = node.key
+			if key == nil {
+				key = node.inodes[0].key
+			}
+
+			node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
+			node.key = node.inodes[0].key
+			_assert(len(node.key) > 0, "spill: zero-length node key")
+		}
+
+		// Update the statistics.
+		tx.stats.Spill++
+	}
+
+	// If the root node split and created a new root then we need to spill that
+	// as well. We'll clear out the children to make sure it doesn't try to respill.
+	if n.parent != nil && n.parent.pgid == 0 {
+		n.children = nil
+		return n.parent.spill()
+	}
+
+	return nil
+}
+
+// rebalance attempts to combine the node with sibling nodes if the node fill
+// size is below a threshold or if there are not enough keys.
+func (n *node) rebalance() {
+	if !n.unbalanced {
+		return
+	}
+	n.unbalanced = false
+
+	// Update statistics.
+	n.bucket.tx.stats.Rebalance++
+
+	// Ignore if node is above threshold (25%) and has enough keys.
+	var threshold = n.bucket.tx.db.pageSize / 4
+	if n.size() > threshold && len(n.inodes) > n.minKeys() {
+		return
+	}
+
+	// Root node has special handling.
+	if n.parent == nil {
+		// If root node is a branch and only has one node then collapse it.
+		if !n.isLeaf && len(n.inodes) == 1 {
+			// Move root's child up.
+			child := n.bucket.node(n.inodes[0].pgid, n)
+			n.isLeaf = child.isLeaf
+			n.inodes = child.inodes[:]
+			n.children = child.children
+
+			// Reparent all child nodes being moved.
+			for _, inode := range n.inodes {
+				if child, ok := n.bucket.nodes[inode.pgid]; ok {
+					child.parent = n
+				}
+			}
+
+			// Remove old child.
+			child.parent = nil
+			delete(n.bucket.nodes, child.pgid)
+			child.free()
+		}
+
+		return
+	}
+
+	// If node has no keys then just remove it.
+	if n.numChildren() == 0 {
+		n.parent.del(n.key)
+		n.parent.removeChild(n)
+		delete(n.bucket.nodes, n.pgid)
+		n.free()
+		n.parent.rebalance()
+		return
+	}
+
+	_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
+
+	// Destination node is right sibling if idx == 0, otherwise left sibling.
+	var target *node
+	var useNextSibling = (n.parent.childIndex(n) == 0)
+	if useNextSibling {
+		target = n.nextSibling()
+	} else {
+		target = n.prevSibling()
+	}
+
+	// If both this node and the target node are too small then merge them.
+	if useNextSibling {
+		// Reparent all child nodes being moved.
+		for _, inode := range target.inodes {
+			if child, ok := n.bucket.nodes[inode.pgid]; ok {
+				child.parent.removeChild(child)
+				child.parent = n
+				child.parent.children = append(child.parent.children, child)
+			}
+		}
+
+		// Copy over inodes from target and remove target.
+		n.inodes = append(n.inodes, target.inodes...)
+		n.parent.del(target.key)
+		n.parent.removeChild(target)
+		delete(n.bucket.nodes, target.pgid)
+		target.free()
+	} else {
+		// Reparent all child nodes being moved.
+		for _, inode := range n.inodes {
+			if child, ok := n.bucket.nodes[inode.pgid]; ok {
+				child.parent.removeChild(child)
+				child.parent = target
+				child.parent.children = append(child.parent.children, child)
+			}
+		}
+
+		// Copy over inodes to target and remove node.
+		target.inodes = append(target.inodes, n.inodes...)
+		n.parent.del(n.key)
+		n.parent.removeChild(n)
+		delete(n.bucket.nodes, n.pgid)
+		n.free()
+	}
+
+	// Either this node or the target node was deleted from the parent so rebalance it.
+	n.parent.rebalance()
+}
+
+// removes a node from the list of in-memory children.
+// This does not affect the inodes.
+func (n *node) removeChild(target *node) {
+	for i, child := range n.children {
+		if child == target {
+			n.children = append(n.children[:i], n.children[i+1:]...)
+			return
+		}
+	}
+}
+
+// dereference causes the node to copy all its inode key/value references to heap memory.
+// This is required when the mmap is reallocated so inodes are not pointing to stale data.
+func (n *node) dereference() {
+	if n.key != nil {
+		key := make([]byte, len(n.key))
+		copy(key, n.key)
+		n.key = key
+		_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
+	}
+
+	for i := range n.inodes {
+		inode := &n.inodes[i]
+
+		key := make([]byte, len(inode.key))
+		copy(key, inode.key)
+		inode.key = key
+		_assert(len(inode.key) > 0, "dereference: zero-length inode key")
+
+		value := make([]byte, len(inode.value))
+		copy(value, inode.value)
+		inode.value = value
+	}
+
+	// Recursively dereference children.
+	for _, child := range n.children {
+		child.dereference()
+	}
+
+	// Update statistics.
+	n.bucket.tx.stats.NodeDeref++
+}
+
+// free adds the node's underlying page to the freelist.
+func (n *node) free() {
+	if n.pgid != 0 {
+		n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
+		n.pgid = 0
+	}
+}
+
+// dump writes the contents of the node to STDERR for debugging purposes.
+/*
+func (n *node) dump() {
+	// Write node header.
+	var typ = "branch"
+	if n.isLeaf {
+		typ = "leaf"
+	}
+	warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))
+
+	// Write out abbreviated version of each item.
+	for _, item := range n.inodes {
+		if n.isLeaf {
+			if item.flags&bucketLeafFlag != 0 {
+				bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
+				warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
+			} else {
+				warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
+			}
+		} else {
+			warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
+		}
+	}
+	warn("")
+}
+*/
+
+type nodes []*node
+
+func (s nodes) Len() int           { return len(s) }
+func (s nodes) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
+
+// inode represents an internal node inside of a node.
+// It can be used to point to elements in a page or point
+// to an element which hasn't been added to a page yet.
+type inode struct {
+	flags uint32
+	pgid  pgid
+	key   []byte
+	value []byte
+}
+
+type inodes []inode
diff --git a/vendor/github.com/etcd-io/bbolt/page.go b/vendor/github.com/etcd-io/bbolt/page.go
new file mode 100644
index 0000000000..bca9615f0f
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/page.go
@@ -0,0 +1,197 @@
+package bbolt
+
+import (
+	"fmt"
+	"os"
+	"sort"
+	"unsafe"
+)
+
+const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
+
+const minKeysPerPage = 2
+
+const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
+const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
+
+const (
+	branchPageFlag   = 0x01
+	leafPageFlag     = 0x02
+	metaPageFlag     = 0x04
+	freelistPageFlag = 0x10
+)
+
+const (
+	bucketLeafFlag = 0x01
+)
+
+type pgid uint64
+
+type page struct {
+	id       pgid
+	flags    uint16
+	count    uint16
+	overflow uint32
+	ptr      uintptr
+}
+
+// typ returns a human readable page type string used for debugging.
+func (p *page) typ() string {
+	if (p.flags & branchPageFlag) != 0 {
+		return "branch"
+	} else if (p.flags & leafPageFlag) != 0 {
+		return "leaf"
+	} else if (p.flags & metaPageFlag) != 0 {
+		return "meta"
+	} else if (p.flags & freelistPageFlag) != 0 {
+		return "freelist"
+	}
+	return fmt.Sprintf("unknown<%02x>", p.flags)
+}
+
+// meta returns a pointer to the metadata section of the page.
+func (p *page) meta() *meta {
+	return (*meta)(unsafe.Pointer(&p.ptr))
+}
+
+// leafPageElement retrieves the leaf node by index
+func (p *page) leafPageElement(index uint16) *leafPageElement {
+	n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
+	return n
+}
+
+// leafPageElements retrieves a list of leaf nodes.
+func (p *page) leafPageElements() []leafPageElement {
+	if p.count == 0 {
+		return nil
+	}
+	return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
+}
+
+// branchPageElement retrieves the branch node by index
+func (p *page) branchPageElement(index uint16) *branchPageElement {
+	return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
+}
+
+// branchPageElements retrieves a list of branch nodes.
+func (p *page) branchPageElements() []branchPageElement {
+	if p.count == 0 {
+		return nil
+	}
+	return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
+}
+
+// dump writes n bytes of the page to STDERR as hex output.
+func (p *page) hexdump(n int) {
+	buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
+	fmt.Fprintf(os.Stderr, "%x\n", buf)
+}
+
+type pages []*page
+
+func (s pages) Len() int           { return len(s) }
+func (s pages) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
+
+// branchPageElement represents a node on a branch page.
+type branchPageElement struct {
+	pos   uint32
+	ksize uint32
+	pgid  pgid
+}
+
+// key returns a byte slice of the node key.
+func (n *branchPageElement) key() []byte {
+	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
+}
+
+// leafPageElement represents a node on a leaf page.
+type leafPageElement struct {
+	flags uint32
+	pos   uint32
+	ksize uint32
+	vsize uint32
+}
+
+// key returns a byte slice of the node key.
+func (n *leafPageElement) key() []byte {
+	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize]
+}
+
+// value returns a byte slice of the node value.
+func (n *leafPageElement) value() []byte {
+	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize]
+}
+
+// PageInfo represents human readable information about a page.
+type PageInfo struct {
+	ID            int
+	Type          string
+	Count         int
+	OverflowCount int
+}
+
+type pgids []pgid
+
+func (s pgids) Len() int           { return len(s) }
+func (s pgids) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
+
+// merge returns the sorted union of a and b.
+func (a pgids) merge(b pgids) pgids {
+	// Return the opposite slice if one is nil.
+	if len(a) == 0 {
+		return b
+	}
+	if len(b) == 0 {
+		return a
+	}
+	merged := make(pgids, len(a)+len(b))
+	mergepgids(merged, a, b)
+	return merged
+}
+
+// mergepgids copies the sorted union of a and b into dst.
+// If dst is too small, it panics.
+func mergepgids(dst, a, b pgids) {
+	if len(dst) < len(a)+len(b) {
+		panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
+	}
+	// Copy in the opposite slice if one is nil.
+	if len(a) == 0 {
+		copy(dst, b)
+		return
+	}
+	if len(b) == 0 {
+		copy(dst, a)
+		return
+	}
+
+	// Merged will hold all elements from both lists.
+	merged := dst[:0]
+
+	// Assign lead to the slice with a lower starting value, follow to the higher value.
+	lead, follow := a, b
+	if b[0] < a[0] {
+		lead, follow = b, a
+	}
+
+	// Continue while there are elements in the lead.
+	for len(lead) > 0 {
+		// Merge largest prefix of lead that is ahead of follow[0].
+		n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
+		merged = append(merged, lead[:n]...)
+		if n >= len(lead) {
+			break
+		}
+
+		// Swap lead and follow.
+		lead, follow = follow, lead[n:]
+	}
+
+	// Append what's left in follow.
+	_ = append(merged, follow...)
+}
diff --git a/vendor/github.com/etcd-io/bbolt/tx.go b/vendor/github.com/etcd-io/bbolt/tx.go
new file mode 100644
index 0000000000..f508641427
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/tx.go
@@ -0,0 +1,707 @@
+package bbolt
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"sort"
+	"strings"
+	"time"
+	"unsafe"
+)
+
+// txid represents the internal transaction identifier.
+type txid uint64
+
+// Tx represents a read-only or read/write transaction on the database.
+// Read-only transactions can be used for retrieving values for keys and creating cursors.
+// Read/write transactions can create and remove buckets and create and remove keys.
+//
+// IMPORTANT: You must commit or rollback transactions when you are done with
+// them. Pages can not be reclaimed by the writer until no more transactions
+// are using them. A long running read transaction can cause the database to
+// quickly grow.
+type Tx struct {
+	writable       bool
+	managed        bool
+	db             *DB
+	meta           *meta
+	root           Bucket
+	pages          map[pgid]*page
+	stats          TxStats
+	commitHandlers []func()
+
+	// WriteFlag specifies the flag for write-related methods like WriteTo().
+	// Tx opens the database file with the specified flag to copy the data.
+	//
+	// By default, the flag is unset, which works well for mostly in-memory
+	// workloads. For databases that are much larger than available RAM,
+	// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
+	WriteFlag int
+}
+
+// init initializes the transaction.
+func (tx *Tx) init(db *DB) {
+	tx.db = db
+	tx.pages = nil
+
+	// Copy the meta page since it can be changed by the writer.
+	tx.meta = &meta{}
+	db.meta().copy(tx.meta)
+
+	// Copy over the root bucket.
+	tx.root = newBucket(tx)
+	tx.root.bucket = &bucket{}
+	*tx.root.bucket = tx.meta.root
+
+	// Increment the transaction id and add a page cache for writable transactions.
+	if tx.writable {
+		tx.pages = make(map[pgid]*page)
+		tx.meta.txid += txid(1)
+	}
+}
+
+// ID returns the transaction id.
+func (tx *Tx) ID() int {
+	return int(tx.meta.txid)
+}
+
+// DB returns a reference to the database that created the transaction.
+func (tx *Tx) DB() *DB {
+	return tx.db
+}
+
+// Size returns current database size in bytes as seen by this transaction.
+func (tx *Tx) Size() int64 {
+	return int64(tx.meta.pgid) * int64(tx.db.pageSize)
+}
+
+// Writable returns whether the transaction can perform write operations.
+func (tx *Tx) Writable() bool {
+	return tx.writable
+}
+
+// Cursor creates a cursor associated with the root bucket.
+// All items in the cursor will return a nil value because all root bucket keys point to buckets.
+// The cursor is only valid as long as the transaction is open.
+// Do not use a cursor after the transaction is closed.
+func (tx *Tx) Cursor() *Cursor {
+	return tx.root.Cursor()
+}
+
+// Stats retrieves a copy of the current transaction statistics.
+func (tx *Tx) Stats() TxStats {
+	return tx.stats
+}
+
+// Bucket retrieves a bucket by name.
+// Returns nil if the bucket does not exist.
+// The bucket instance is only valid for the lifetime of the transaction.
+func (tx *Tx) Bucket(name []byte) *Bucket {
+	return tx.root.Bucket(name)
+}
+
+// CreateBucket creates a new bucket.
+// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
+// The bucket instance is only valid for the lifetime of the transaction.
+func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
+	return tx.root.CreateBucket(name)
+}
+
+// CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
+// Returns an error if the bucket name is blank, or if the bucket name is too long.
+// The bucket instance is only valid for the lifetime of the transaction.
+func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
+	return tx.root.CreateBucketIfNotExists(name)
+}
+
+// DeleteBucket deletes a bucket.
+// Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
+func (tx *Tx) DeleteBucket(name []byte) error {
+	return tx.root.DeleteBucket(name)
+}
+
+// ForEach executes a function for each bucket in the root.
+// If the provided function returns an error then the iteration is stopped and
+// the error is returned to the caller.
+func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
+	return tx.root.ForEach(func(k, v []byte) error {
+		return fn(k, tx.root.Bucket(k))
+	})
+}
+
+// OnCommit adds a handler function to be executed after the transaction successfully commits.
+func (tx *Tx) OnCommit(fn func()) {
+	tx.commitHandlers = append(tx.commitHandlers, fn)
+}
+
+// Commit writes all changes to disk and updates the meta page.
+// Returns an error if a disk write error occurs, or if Commit is
+// called on a read-only transaction.
+func (tx *Tx) Commit() error {
+	_assert(!tx.managed, "managed tx commit not allowed")
+	if tx.db == nil {
+		return ErrTxClosed
+	} else if !tx.writable {
+		return ErrTxNotWritable
+	}
+
+	// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
+
+	// Rebalance nodes which have had deletions.
+	var startTime = time.Now()
+	tx.root.rebalance()
+	if tx.stats.Rebalance > 0 {
+		tx.stats.RebalanceTime += time.Since(startTime)
+	}
+
+	// spill data onto dirty pages.
+	startTime = time.Now()
+	if err := tx.root.spill(); err != nil {
+		tx.rollback()
+		return err
+	}
+	tx.stats.SpillTime += time.Since(startTime)
+
+	// Free the old root bucket.
+	tx.meta.root.root = tx.root.root
+
+	// Free the old freelist because commit writes out a fresh freelist.
+	if tx.meta.freelist != pgidNoFreelist {
+		tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
+	}
+
+	if !tx.db.NoFreelistSync {
+		err := tx.commitFreelist()
+		if err != nil {
+			return err
+		}
+	} else {
+		tx.meta.freelist = pgidNoFreelist
+	}
+
+	// Write dirty pages to disk.
+	startTime = time.Now()
+	if err := tx.write(); err != nil {
+		tx.rollback()
+		return err
+	}
+
+	// If strict mode is enabled then perform a consistency check.
+	// Only the first consistency error is reported in the panic.
+	if tx.db.StrictMode {
+		ch := tx.Check()
+		var errs []string
+		for {
+			err, ok := <-ch
+			if !ok {
+				break
+			}
+			errs = append(errs, err.Error())
+		}
+		if len(errs) > 0 {
+			panic("check fail: " + strings.Join(errs, "\n"))
+		}
+	}
+
+	// Write meta to disk.
+	if err := tx.writeMeta(); err != nil {
+		tx.rollback()
+		return err
+	}
+	tx.stats.WriteTime += time.Since(startTime)
+
+	// Finalize the transaction.
+	tx.close()
+
+	// Execute commit handlers now that the locks have been removed.
+	for _, fn := range tx.commitHandlers {
+		fn()
+	}
+
+	return nil
+}
+
+func (tx *Tx) commitFreelist() error {
+	// Allocate new pages for the new free list. This will overestimate
+	// the size of the freelist but not underestimate the size (which would be bad).
+	opgid := tx.meta.pgid
+	p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
+	if err != nil {
+		tx.rollback()
+		return err
+	}
+	if err := tx.db.freelist.write(p); err != nil {
+		tx.rollback()
+		return err
+	}
+	tx.meta.freelist = p.id
+	// If the high water mark has moved up then attempt to grow the database.
+	if tx.meta.pgid > opgid {
+		if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
+			tx.rollback()
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Rollback closes the transaction and ignores all previous updates. Read-only
+// transactions must be rolled back and not committed.
+func (tx *Tx) Rollback() error {
+	_assert(!tx.managed, "managed tx rollback not allowed")
+	if tx.db == nil {
+		return ErrTxClosed
+	}
+	tx.rollback()
+	return nil
+}
+
+func (tx *Tx) rollback() {
+	if tx.db == nil {
+		return
+	}
+	if tx.writable {
+		tx.db.freelist.rollback(tx.meta.txid)
+		tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
+	}
+	tx.close()
+}
+
+func (tx *Tx) close() {
+	if tx.db == nil {
+		return
+	}
+	if tx.writable {
+		// Grab freelist stats.
+		var freelistFreeN = tx.db.freelist.free_count()
+		var freelistPendingN = tx.db.freelist.pending_count()
+		var freelistAlloc = tx.db.freelist.size()
+
+		// Remove transaction ref & writer lock.
+		tx.db.rwtx = nil
+		tx.db.rwlock.Unlock()
+
+		// Merge statistics.
+		tx.db.statlock.Lock()
+		tx.db.stats.FreePageN = freelistFreeN
+		tx.db.stats.PendingPageN = freelistPendingN
+		tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
+		tx.db.stats.FreelistInuse = freelistAlloc
+		tx.db.stats.TxStats.add(&tx.stats)
+		tx.db.statlock.Unlock()
+	} else {
+		tx.db.removeTx(tx)
+	}
+
+	// Clear all references.
+	tx.db = nil
+	tx.meta = nil
+	tx.root = Bucket{tx: tx}
+	tx.pages = nil
+}
+
+// Copy writes the entire database to a writer.
+// This function exists for backwards compatibility.
+//
+// Deprecated; Use WriteTo() instead.
+func (tx *Tx) Copy(w io.Writer) error {
+	_, err := tx.WriteTo(w)
+	return err
+}
+
+// WriteTo writes the entire database to a writer.
+// If err == nil then exactly tx.Size() bytes will be written into the writer.
+func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
+	// Attempt to open reader with WriteFlag
+	f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
+	if err != nil {
+		return 0, err
+	}
+	defer func() {
+		if cerr := f.Close(); err == nil {
+			err = cerr
+		}
+	}()
+
+	// Generate a meta page. We use the same page data for both meta pages.
+	buf := make([]byte, tx.db.pageSize)
+	page := (*page)(unsafe.Pointer(&buf[0]))
+	page.flags = metaPageFlag
+	*page.meta() = *tx.meta
+
+	// Write meta 0.
+	page.id = 0
+	page.meta().checksum = page.meta().sum64()
+	nn, err := w.Write(buf)
+	n += int64(nn)
+	if err != nil {
+		return n, fmt.Errorf("meta 0 copy: %s", err)
+	}
+
+	// Write meta 1 with a lower transaction id.
+	page.id = 1
+	page.meta().txid -= 1
+	page.meta().checksum = page.meta().sum64()
+	nn, err = w.Write(buf)
+	n += int64(nn)
+	if err != nil {
+		return n, fmt.Errorf("meta 1 copy: %s", err)
+	}
+
+	// Move past the meta pages in the file.
+	if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
+		return n, fmt.Errorf("seek: %s", err)
+	}
+
+	// Copy data pages.
+	wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
+	n += wn
+	if err != nil {
+		return n, err
+	}
+
+	return n, nil
+}
+
+// CopyFile copies the entire database to file at the given path.
+// A reader transaction is maintained during the copy so it is safe to continue
+// using the database while a copy is in progress.
+func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
+	f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
+	if err != nil {
+		return err
+	}
+
+	err = tx.Copy(f)
+	if err != nil {
+		_ = f.Close()
+		return err
+	}
+	return f.Close()
+}
+
+// Check performs several consistency checks on the database for this transaction.
+// An error is returned if any inconsistency is found.
+//
+// It can be safely run concurrently on a writable transaction. However, this
+// incurs a high cost for large databases and databases with a lot of subbuckets
+// because of caching. This overhead can be removed if running on a read-only
+// transaction, however, it is not safe to execute other writer transactions at
+// the same time.
+func (tx *Tx) Check() <-chan error {
+	ch := make(chan error)
+	go tx.check(ch)
+	return ch
+}
+
+func (tx *Tx) check(ch chan error) {
+	// Force loading free list if opened in ReadOnly mode.
+	tx.db.loadFreelist()
+
+	// Check if any pages are double freed.
+	freed := make(map[pgid]bool)
+	all := make([]pgid, tx.db.freelist.count())
+	tx.db.freelist.copyall(all)
+	for _, id := range all {
+		if freed[id] {
+			ch <- fmt.Errorf("page %d: already freed", id)
+		}
+		freed[id] = true
+	}
+
+	// Track every reachable page.
+	reachable := make(map[pgid]*page)
+	reachable[0] = tx.page(0) // meta0
+	reachable[1] = tx.page(1) // meta1
+	if tx.meta.freelist != pgidNoFreelist {
+		for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
+			reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
+		}
+	}
+
+	// Recursively check buckets.
+	tx.checkBucket(&tx.root, reachable, freed, ch)
+
+	// Ensure all pages below high water mark are either reachable or freed.
+	for i := pgid(0); i < tx.meta.pgid; i++ {
+		_, isReachable := reachable[i]
+		if !isReachable && !freed[i] {
+			ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
+		}
+	}
+
+	// Close the channel to signal completion.
+	close(ch)
+}
+
+func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
+	// Ignore inline buckets.
+	if b.root == 0 {
+		return
+	}
+
+	// Check every page used by this bucket.
+	b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
+		if p.id > tx.meta.pgid {
+			ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
+		}
+
+		// Ensure each page is only referenced once.
+		for i := pgid(0); i <= pgid(p.overflow); i++ {
+			var id = p.id + i
+			if _, ok := reachable[id]; ok {
+				ch <- fmt.Errorf("page %d: multiple references", int(id))
+			}
+			reachable[id] = p
+		}
+
+		// We should only encounter un-freed leaf and branch pages.
+		if freed[p.id] {
+			ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
+		} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
+			ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
+		}
+	})
+
+	// Check each bucket within this bucket.
+	_ = b.ForEach(func(k, v []byte) error {
+		if child := b.Bucket(k); child != nil {
+			tx.checkBucket(child, reachable, freed, ch)
+		}
+		return nil
+	})
+}
+
+// allocate returns a contiguous block of memory starting at a given page.
+func (tx *Tx) allocate(count int) (*page, error) {
+	p, err := tx.db.allocate(tx.meta.txid, count)
+	if err != nil {
+		return nil, err
+	}
+
+	// Save to our page cache.
+	tx.pages[p.id] = p
+
+	// Update statistics.
+	tx.stats.PageCount += count
+	tx.stats.PageAlloc += count * tx.db.pageSize
+
+	return p, nil
+}
+
+// write writes any dirty pages to disk.
+func (tx *Tx) write() error {
+	// Sort pages by id.
+	pages := make(pages, 0, len(tx.pages))
+	for _, p := range tx.pages {
+		pages = append(pages, p)
+	}
+	// Clear out page cache early.
+	tx.pages = make(map[pgid]*page)
+	sort.Sort(pages)
+
+	// Write pages to disk in order.
+	for _, p := range pages {
+		size := (int(p.overflow) + 1) * tx.db.pageSize
+		offset := int64(p.id) * int64(tx.db.pageSize)
+
+		// Write out page in "max allocation" sized chunks.
+		ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
+		for {
+			// Limit our write to our max allocation size.
+			sz := size
+			if sz > maxAllocSize-1 {
+				sz = maxAllocSize - 1
+			}
+
+			// Write chunk to disk.
+			buf := ptr[:sz]
+			if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
+				return err
+			}
+
+			// Update statistics.
+			tx.stats.Write++
+
+			// Exit inner for loop if we've written all the chunks.
+			size -= sz
+			if size == 0 {
+				break
+			}
+
+			// Otherwise move offset forward and move pointer to next chunk.
+			offset += int64(sz)
+			ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
+		}
+	}
+
+	// Ignore file sync if flag is set on DB.
+	if !tx.db.NoSync || IgnoreNoSync {
+		if err := fdatasync(tx.db); err != nil {
+			return err
+		}
+	}
+
+	// Put small pages back to page pool.
+	for _, p := range pages {
+		// Ignore page sizes over 1 page.
+		// These are allocated using make() instead of the page pool.
+		if int(p.overflow) != 0 {
+			continue
+		}
+
+		buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize]
+
+		// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
+		for i := range buf {
+			buf[i] = 0
+		}
+		tx.db.pagePool.Put(buf)
+	}
+
+	return nil
+}
+
+// writeMeta writes the meta to the disk.
+func (tx *Tx) writeMeta() error {
+	// Create a temporary buffer for the meta page.
+	buf := make([]byte, tx.db.pageSize)
+	p := tx.db.pageInBuffer(buf, 0)
+	tx.meta.write(p)
+
+	// Write the meta page to file.
+	if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
+		return err
+	}
+	if !tx.db.NoSync || IgnoreNoSync {
+		if err := fdatasync(tx.db); err != nil {
+			return err
+		}
+	}
+
+	// Update statistics.
+	tx.stats.Write++
+
+	return nil
+}
+
+// page returns a reference to the page with a given id.
+// If page has been written to then a temporary buffered page is returned.
+func (tx *Tx) page(id pgid) *page {
+	// Check the dirty pages first.
+	if tx.pages != nil {
+		if p, ok := tx.pages[id]; ok {
+			return p
+		}
+	}
+
+	// Otherwise return directly from the mmap.
+	return tx.db.page(id)
+}
+
+// forEachPage iterates over every page within a given page and executes a function.
+func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
+	p := tx.page(pgid)
+
+	// Execute function.
+	fn(p, depth)
+
+	// Recursively loop over children.
+	if (p.flags & branchPageFlag) != 0 {
+		for i := 0; i < int(p.count); i++ {
+			elem := p.branchPageElement(uint16(i))
+			tx.forEachPage(elem.pgid, depth+1, fn)
+		}
+	}
+}
+
+// Page returns page information for a given page number.
+// This is only safe for concurrent use when used by a writable transaction.
+func (tx *Tx) Page(id int) (*PageInfo, error) {
+	if tx.db == nil {
+		return nil, ErrTxClosed
+	} else if pgid(id) >= tx.meta.pgid {
+		return nil, nil
+	}
+
+	// Build the page info.
+	p := tx.db.page(pgid(id))
+	info := &PageInfo{
+		ID:            id,
+		Count:         int(p.count),
+		OverflowCount: int(p.overflow),
+	}
+
+	// Determine the type (or if it's free).
+	if tx.db.freelist.freed(pgid(id)) {
+		info.Type = "free"
+	} else {
+		info.Type = p.typ()
+	}
+
+	return info, nil
+}
+
+// TxStats represents statistics about the actions performed by the transaction.
+type TxStats struct {
+	// Page statistics.
+	PageCount int // number of page allocations
+	PageAlloc int // total bytes allocated
+
+	// Cursor statistics.
+	CursorCount int // number of cursors created
+
+	// Node statistics
+	NodeCount int // number of node allocations
+	NodeDeref int // number of node dereferences
+
+	// Rebalance statistics.
+	Rebalance     int           // number of node rebalances
+	RebalanceTime time.Duration // total time spent rebalancing
+
+	// Split/Spill statistics.
+	Split     int           // number of nodes split
+	Spill     int           // number of nodes spilled
+	SpillTime time.Duration // total time spent spilling
+
+	// Write statistics.
+	Write     int           // number of writes performed
+	WriteTime time.Duration // total time spent writing to disk
+}
+
+func (s *TxStats) add(other *TxStats) {
+	s.PageCount += other.PageCount
+	s.PageAlloc += other.PageAlloc
+	s.CursorCount += other.CursorCount
+	s.NodeCount += other.NodeCount
+	s.NodeDeref += other.NodeDeref
+	s.Rebalance += other.Rebalance
+	s.RebalanceTime += other.RebalanceTime
+	s.Split += other.Split
+	s.Spill += other.Spill
+	s.SpillTime += other.SpillTime
+	s.Write += other.Write
+	s.WriteTime += other.WriteTime
+}
+
+// Sub calculates and returns the difference between two sets of transaction stats.
+// This is useful when obtaining stats at two different points and time and
+// you need the performance counters that occurred within that time span.
+func (s *TxStats) Sub(other *TxStats) TxStats {
+	var diff TxStats
+	diff.PageCount = s.PageCount - other.PageCount
+	diff.PageAlloc = s.PageAlloc - other.PageAlloc
+	diff.CursorCount = s.CursorCount - other.CursorCount
+	diff.NodeCount = s.NodeCount - other.NodeCount
+	diff.NodeDeref = s.NodeDeref - other.NodeDeref
+	diff.Rebalance = s.Rebalance - other.Rebalance
+	diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
+	diff.Split = s.Split - other.Split
+	diff.Spill = s.Spill - other.Spill
+	diff.SpillTime = s.SpillTime - other.SpillTime
+	diff.Write = s.Write - other.Write
+	diff.WriteTime = s.WriteTime - other.WriteTime
+	return diff
+}