You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

db.go 32KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. package bbolt
  2. import (
  3. "errors"
  4. "fmt"
  5. "hash/fnv"
  6. "log"
  7. "os"
  8. "runtime"
  9. "sort"
  10. "sync"
  11. "time"
  12. "unsafe"
  13. )
  14. // The largest step that can be taken when remapping the mmap.
  15. const maxMmapStep = 1 << 30 // 1GB
  16. // The data file format version.
  17. const version = 2
  18. // Represents a marker value to indicate that a file is a Bolt DB.
  19. const magic uint32 = 0xED0CDAED
  20. const pgidNoFreelist pgid = 0xffffffffffffffff
  21. // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
  22. // syncing changes to a file. This is required as some operating systems,
  23. // such as OpenBSD, do not have a unified buffer cache (UBC) and writes
  24. // must be synchronized using the msync(2) syscall.
  25. const IgnoreNoSync = runtime.GOOS == "openbsd"
  26. // Default values if not set in a DB instance.
  27. const (
  28. DefaultMaxBatchSize int = 1000
  29. DefaultMaxBatchDelay = 10 * time.Millisecond
  30. DefaultAllocSize = 16 * 1024 * 1024
  31. )
  32. // default page size for db is set to the OS page size.
  33. var defaultPageSize = os.Getpagesize()
  34. // The time elapsed between consecutive file locking attempts.
  35. const flockRetryTimeout = 50 * time.Millisecond
  36. // FreelistType is the type of the freelist backend
  37. type FreelistType string
  38. const (
  39. // FreelistArrayType indicates backend freelist type is array
  40. FreelistArrayType = FreelistType("array")
  41. // FreelistMapType indicates backend freelist type is hashmap
  42. FreelistMapType = FreelistType("hashmap")
  43. )
  44. // DB represents a collection of buckets persisted to a file on disk.
  45. // All data access is performed through transactions which can be obtained through the DB.
  46. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
  47. type DB struct {
  48. // When enabled, the database will perform a Check() after every commit.
  49. // A panic is issued if the database is in an inconsistent state. This
  50. // flag has a large performance impact so it should only be used for
  51. // debugging purposes.
  52. StrictMode bool
  53. // Setting the NoSync flag will cause the database to skip fsync()
  54. // calls after each commit. This can be useful when bulk loading data
  55. // into a database and you can restart the bulk load in the event of
  56. // a system failure or database corruption. Do not set this flag for
  57. // normal use.
  58. //
  59. // If the package global IgnoreNoSync constant is true, this value is
  60. // ignored. See the comment on that constant for more details.
  61. //
  62. // THIS IS UNSAFE. PLEASE USE WITH CAUTION.
  63. NoSync bool
  64. // When true, skips syncing freelist to disk. This improves the database
  65. // write performance under normal operation, but requires a full database
  66. // re-sync during recovery.
  67. NoFreelistSync bool
  68. // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
  69. // dramatic performance degradation if database is large and framentation in freelist is common.
  70. // The alternative one is using hashmap, it is faster in almost all circumstances
  71. // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
  72. // The default type is array
  73. FreelistType FreelistType
  74. // When true, skips the truncate call when growing the database.
  75. // Setting this to true is only safe on non-ext3/ext4 systems.
  76. // Skipping truncation avoids preallocation of hard drive space and
  77. // bypasses a truncate() and fsync() syscall on remapping.
  78. //
  79. // https://github.com/boltdb/bolt/issues/284
  80. NoGrowSync bool
  81. // If you want to read the entire database fast, you can set MmapFlag to
  82. // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
  83. MmapFlags int
  84. // MaxBatchSize is the maximum size of a batch. Default value is
  85. // copied from DefaultMaxBatchSize in Open.
  86. //
  87. // If <=0, disables batching.
  88. //
  89. // Do not change concurrently with calls to Batch.
  90. MaxBatchSize int
  91. // MaxBatchDelay is the maximum delay before a batch starts.
  92. // Default value is copied from DefaultMaxBatchDelay in Open.
  93. //
  94. // If <=0, effectively disables batching.
  95. //
  96. // Do not change concurrently with calls to Batch.
  97. MaxBatchDelay time.Duration
  98. // AllocSize is the amount of space allocated when the database
  99. // needs to create new pages. This is done to amortize the cost
  100. // of truncate() and fsync() when growing the data file.
  101. AllocSize int
  102. path string
  103. file *os.File
  104. dataref []byte // mmap'ed readonly, write throws SEGV
  105. data *[maxMapSize]byte
  106. datasz int
  107. filesz int // current on disk file size
  108. meta0 *meta
  109. meta1 *meta
  110. pageSize int
  111. opened bool
  112. rwtx *Tx
  113. txs []*Tx
  114. stats Stats
  115. freelist *freelist
  116. freelistLoad sync.Once
  117. pagePool sync.Pool
  118. batchMu sync.Mutex
  119. batch *batch
  120. rwlock sync.Mutex // Allows only one writer at a time.
  121. metalock sync.Mutex // Protects meta page access.
  122. mmaplock sync.RWMutex // Protects mmap access during remapping.
  123. statlock sync.RWMutex // Protects stats access.
  124. ops struct {
  125. writeAt func(b []byte, off int64) (n int, err error)
  126. }
  127. // Read only mode.
  128. // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
  129. readOnly bool
  130. }
  131. // Path returns the path to currently open database file.
  132. func (db *DB) Path() string {
  133. return db.path
  134. }
  135. // GoString returns the Go string representation of the database.
  136. func (db *DB) GoString() string {
  137. return fmt.Sprintf("bolt.DB{path:%q}", db.path)
  138. }
  139. // String returns the string representation of the database.
  140. func (db *DB) String() string {
  141. return fmt.Sprintf("DB<%q>", db.path)
  142. }
  143. // Open creates and opens a database at the given path.
  144. // If the file does not exist then it will be created automatically.
  145. // Passing in nil options will cause Bolt to open the database with the default options.
  146. func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
  147. db := &DB{
  148. opened: true,
  149. }
  150. // Set default options if no options are provided.
  151. if options == nil {
  152. options = DefaultOptions
  153. }
  154. db.NoSync = options.NoSync
  155. db.NoGrowSync = options.NoGrowSync
  156. db.MmapFlags = options.MmapFlags
  157. db.NoFreelistSync = options.NoFreelistSync
  158. db.FreelistType = options.FreelistType
  159. // Set default values for later DB operations.
  160. db.MaxBatchSize = DefaultMaxBatchSize
  161. db.MaxBatchDelay = DefaultMaxBatchDelay
  162. db.AllocSize = DefaultAllocSize
  163. flag := os.O_RDWR
  164. if options.ReadOnly {
  165. flag = os.O_RDONLY
  166. db.readOnly = true
  167. }
  168. // Open data file and separate sync handler for metadata writes.
  169. db.path = path
  170. var err error
  171. if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
  172. _ = db.close()
  173. return nil, err
  174. }
  175. // Lock file so that other processes using Bolt in read-write mode cannot
  176. // use the database at the same time. This would cause corruption since
  177. // the two processes would write meta pages and free pages separately.
  178. // The database file is locked exclusively (only one process can grab the lock)
  179. // if !options.ReadOnly.
  180. // The database file is locked using the shared lock (more than one process may
  181. // hold a lock at the same time) otherwise (options.ReadOnly is set).
  182. if err := flock(db, !db.readOnly, options.Timeout); err != nil {
  183. _ = db.close()
  184. return nil, err
  185. }
  186. // Default values for test hooks
  187. db.ops.writeAt = db.file.WriteAt
  188. if db.pageSize = options.PageSize; db.pageSize == 0 {
  189. // Set the default page size to the OS page size.
  190. db.pageSize = defaultPageSize
  191. }
  192. // Initialize the database if it doesn't exist.
  193. if info, err := db.file.Stat(); err != nil {
  194. _ = db.close()
  195. return nil, err
  196. } else if info.Size() == 0 {
  197. // Initialize new files with meta pages.
  198. if err := db.init(); err != nil {
  199. // clean up file descriptor on initialization fail
  200. _ = db.close()
  201. return nil, err
  202. }
  203. } else {
  204. // Read the first meta page to determine the page size.
  205. var buf [0x1000]byte
  206. // If we can't read the page size, but can read a page, assume
  207. // it's the same as the OS or one given -- since that's how the
  208. // page size was chosen in the first place.
  209. //
  210. // If the first page is invalid and this OS uses a different
  211. // page size than what the database was created with then we
  212. // are out of luck and cannot access the database.
  213. //
  214. // TODO: scan for next page
  215. if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
  216. if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
  217. db.pageSize = int(m.pageSize)
  218. }
  219. } else {
  220. _ = db.close()
  221. return nil, ErrInvalid
  222. }
  223. }
  224. // Initialize page pool.
  225. db.pagePool = sync.Pool{
  226. New: func() interface{} {
  227. return make([]byte, db.pageSize)
  228. },
  229. }
  230. // Memory map the data file.
  231. if err := db.mmap(options.InitialMmapSize); err != nil {
  232. _ = db.close()
  233. return nil, err
  234. }
  235. if db.readOnly {
  236. return db, nil
  237. }
  238. db.loadFreelist()
  239. // Flush freelist when transitioning from no sync to sync so
  240. // NoFreelistSync unaware boltdb can open the db later.
  241. if !db.NoFreelistSync && !db.hasSyncedFreelist() {
  242. tx, err := db.Begin(true)
  243. if tx != nil {
  244. err = tx.Commit()
  245. }
  246. if err != nil {
  247. _ = db.close()
  248. return nil, err
  249. }
  250. }
  251. // Mark the database as opened and return.
  252. return db, nil
  253. }
  254. // loadFreelist reads the freelist if it is synced, or reconstructs it
  255. // by scanning the DB if it is not synced. It assumes there are no
  256. // concurrent accesses being made to the freelist.
  257. func (db *DB) loadFreelist() {
  258. db.freelistLoad.Do(func() {
  259. db.freelist = newFreelist(db.FreelistType)
  260. if !db.hasSyncedFreelist() {
  261. // Reconstruct free list by scanning the DB.
  262. db.freelist.readIDs(db.freepages())
  263. } else {
  264. // Read free list from freelist page.
  265. db.freelist.read(db.page(db.meta().freelist))
  266. }
  267. db.stats.FreePageN = db.freelist.free_count()
  268. })
  269. }
  270. func (db *DB) hasSyncedFreelist() bool {
  271. return db.meta().freelist != pgidNoFreelist
  272. }
  273. // mmap opens the underlying memory-mapped file and initializes the meta references.
  274. // minsz is the minimum size that the new mmap can be.
  275. func (db *DB) mmap(minsz int) error {
  276. db.mmaplock.Lock()
  277. defer db.mmaplock.Unlock()
  278. info, err := db.file.Stat()
  279. if err != nil {
  280. return fmt.Errorf("mmap stat error: %s", err)
  281. } else if int(info.Size()) < db.pageSize*2 {
  282. return fmt.Errorf("file size too small")
  283. }
  284. // Ensure the size is at least the minimum size.
  285. var size = int(info.Size())
  286. if size < minsz {
  287. size = minsz
  288. }
  289. size, err = db.mmapSize(size)
  290. if err != nil {
  291. return err
  292. }
  293. // Dereference all mmap references before unmapping.
  294. if db.rwtx != nil {
  295. db.rwtx.root.dereference()
  296. }
  297. // Unmap existing data before continuing.
  298. if err := db.munmap(); err != nil {
  299. return err
  300. }
  301. // Memory-map the data file as a byte slice.
  302. if err := mmap(db, size); err != nil {
  303. return err
  304. }
  305. // Save references to the meta pages.
  306. db.meta0 = db.page(0).meta()
  307. db.meta1 = db.page(1).meta()
  308. // Validate the meta pages. We only return an error if both meta pages fail
  309. // validation, since meta0 failing validation means that it wasn't saved
  310. // properly -- but we can recover using meta1. And vice-versa.
  311. err0 := db.meta0.validate()
  312. err1 := db.meta1.validate()
  313. if err0 != nil && err1 != nil {
  314. return err0
  315. }
  316. return nil
  317. }
  318. // munmap unmaps the data file from memory.
  319. func (db *DB) munmap() error {
  320. if err := munmap(db); err != nil {
  321. return fmt.Errorf("unmap error: " + err.Error())
  322. }
  323. return nil
  324. }
  325. // mmapSize determines the appropriate size for the mmap given the current size
  326. // of the database. The minimum size is 32KB and doubles until it reaches 1GB.
  327. // Returns an error if the new mmap size is greater than the max allowed.
  328. func (db *DB) mmapSize(size int) (int, error) {
  329. // Double the size from 32KB until 1GB.
  330. for i := uint(15); i <= 30; i++ {
  331. if size <= 1<<i {
  332. return 1 << i, nil
  333. }
  334. }
  335. // Verify the requested size is not above the maximum allowed.
  336. if size > maxMapSize {
  337. return 0, fmt.Errorf("mmap too large")
  338. }
  339. // If larger than 1GB then grow by 1GB at a time.
  340. sz := int64(size)
  341. if remainder := sz % int64(maxMmapStep); remainder > 0 {
  342. sz += int64(maxMmapStep) - remainder
  343. }
  344. // Ensure that the mmap size is a multiple of the page size.
  345. // This should always be true since we're incrementing in MBs.
  346. pageSize := int64(db.pageSize)
  347. if (sz % pageSize) != 0 {
  348. sz = ((sz / pageSize) + 1) * pageSize
  349. }
  350. // If we've exceeded the max size then only grow up to the max size.
  351. if sz > maxMapSize {
  352. sz = maxMapSize
  353. }
  354. return int(sz), nil
  355. }
  356. // init creates a new database file and initializes its meta pages.
  357. func (db *DB) init() error {
  358. // Create two meta pages on a buffer.
  359. buf := make([]byte, db.pageSize*4)
  360. for i := 0; i < 2; i++ {
  361. p := db.pageInBuffer(buf[:], pgid(i))
  362. p.id = pgid(i)
  363. p.flags = metaPageFlag
  364. // Initialize the meta page.
  365. m := p.meta()
  366. m.magic = magic
  367. m.version = version
  368. m.pageSize = uint32(db.pageSize)
  369. m.freelist = 2
  370. m.root = bucket{root: 3}
  371. m.pgid = 4
  372. m.txid = txid(i)
  373. m.checksum = m.sum64()
  374. }
  375. // Write an empty freelist at page 3.
  376. p := db.pageInBuffer(buf[:], pgid(2))
  377. p.id = pgid(2)
  378. p.flags = freelistPageFlag
  379. p.count = 0
  380. // Write an empty leaf page at page 4.
  381. p = db.pageInBuffer(buf[:], pgid(3))
  382. p.id = pgid(3)
  383. p.flags = leafPageFlag
  384. p.count = 0
  385. // Write the buffer to our data file.
  386. if _, err := db.ops.writeAt(buf, 0); err != nil {
  387. return err
  388. }
  389. if err := fdatasync(db); err != nil {
  390. return err
  391. }
  392. return nil
  393. }
  394. // Close releases all database resources.
  395. // It will block waiting for any open transactions to finish
  396. // before closing the database and returning.
  397. func (db *DB) Close() error {
  398. db.rwlock.Lock()
  399. defer db.rwlock.Unlock()
  400. db.metalock.Lock()
  401. defer db.metalock.Unlock()
  402. db.mmaplock.Lock()
  403. defer db.mmaplock.Unlock()
  404. return db.close()
  405. }
  406. func (db *DB) close() error {
  407. if !db.opened {
  408. return nil
  409. }
  410. db.opened = false
  411. db.freelist = nil
  412. // Clear ops.
  413. db.ops.writeAt = nil
  414. // Close the mmap.
  415. if err := db.munmap(); err != nil {
  416. return err
  417. }
  418. // Close file handles.
  419. if db.file != nil {
  420. // No need to unlock read-only file.
  421. if !db.readOnly {
  422. // Unlock the file.
  423. if err := funlock(db); err != nil {
  424. log.Printf("bolt.Close(): funlock error: %s", err)
  425. }
  426. }
  427. // Close the file descriptor.
  428. if err := db.file.Close(); err != nil {
  429. return fmt.Errorf("db file close: %s", err)
  430. }
  431. db.file = nil
  432. }
  433. db.path = ""
  434. return nil
  435. }
  436. // Begin starts a new transaction.
  437. // Multiple read-only transactions can be used concurrently but only one
  438. // write transaction can be used at a time. Starting multiple write transactions
  439. // will cause the calls to block and be serialized until the current write
  440. // transaction finishes.
  441. //
  442. // Transactions should not be dependent on one another. Opening a read
  443. // transaction and a write transaction in the same goroutine can cause the
  444. // writer to deadlock because the database periodically needs to re-mmap itself
  445. // as it grows and it cannot do that while a read transaction is open.
  446. //
  447. // If a long running read transaction (for example, a snapshot transaction) is
  448. // needed, you might want to set DB.InitialMmapSize to a large enough value
  449. // to avoid potential blocking of write transaction.
  450. //
  451. // IMPORTANT: You must close read-only transactions after you are finished or
  452. // else the database will not reclaim old pages.
  453. func (db *DB) Begin(writable bool) (*Tx, error) {
  454. if writable {
  455. return db.beginRWTx()
  456. }
  457. return db.beginTx()
  458. }
  459. func (db *DB) beginTx() (*Tx, error) {
  460. // Lock the meta pages while we initialize the transaction. We obtain
  461. // the meta lock before the mmap lock because that's the order that the
  462. // write transaction will obtain them.
  463. db.metalock.Lock()
  464. // Obtain a read-only lock on the mmap. When the mmap is remapped it will
  465. // obtain a write lock so all transactions must finish before it can be
  466. // remapped.
  467. db.mmaplock.RLock()
  468. // Exit if the database is not open yet.
  469. if !db.opened {
  470. db.mmaplock.RUnlock()
  471. db.metalock.Unlock()
  472. return nil, ErrDatabaseNotOpen
  473. }
  474. // Create a transaction associated with the database.
  475. t := &Tx{}
  476. t.init(db)
  477. // Keep track of transaction until it closes.
  478. db.txs = append(db.txs, t)
  479. n := len(db.txs)
  480. // Unlock the meta pages.
  481. db.metalock.Unlock()
  482. // Update the transaction stats.
  483. db.statlock.Lock()
  484. db.stats.TxN++
  485. db.stats.OpenTxN = n
  486. db.statlock.Unlock()
  487. return t, nil
  488. }
  489. func (db *DB) beginRWTx() (*Tx, error) {
  490. // If the database was opened with Options.ReadOnly, return an error.
  491. if db.readOnly {
  492. return nil, ErrDatabaseReadOnly
  493. }
  494. // Obtain writer lock. This is released by the transaction when it closes.
  495. // This enforces only one writer transaction at a time.
  496. db.rwlock.Lock()
  497. // Once we have the writer lock then we can lock the meta pages so that
  498. // we can set up the transaction.
  499. db.metalock.Lock()
  500. defer db.metalock.Unlock()
  501. // Exit if the database is not open yet.
  502. if !db.opened {
  503. db.rwlock.Unlock()
  504. return nil, ErrDatabaseNotOpen
  505. }
  506. // Create a transaction associated with the database.
  507. t := &Tx{writable: true}
  508. t.init(db)
  509. db.rwtx = t
  510. db.freePages()
  511. return t, nil
  512. }
  513. // freePages releases any pages associated with closed read-only transactions.
  514. func (db *DB) freePages() {
  515. // Free all pending pages prior to earliest open transaction.
  516. sort.Sort(txsById(db.txs))
  517. minid := txid(0xFFFFFFFFFFFFFFFF)
  518. if len(db.txs) > 0 {
  519. minid = db.txs[0].meta.txid
  520. }
  521. if minid > 0 {
  522. db.freelist.release(minid - 1)
  523. }
  524. // Release unused txid extents.
  525. for _, t := range db.txs {
  526. db.freelist.releaseRange(minid, t.meta.txid-1)
  527. minid = t.meta.txid + 1
  528. }
  529. db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
  530. // Any page both allocated and freed in an extent is safe to release.
  531. }
  532. type txsById []*Tx
  533. func (t txsById) Len() int { return len(t) }
  534. func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
  535. func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
  536. // removeTx removes a transaction from the database.
  537. func (db *DB) removeTx(tx *Tx) {
  538. // Release the read lock on the mmap.
  539. db.mmaplock.RUnlock()
  540. // Use the meta lock to restrict access to the DB object.
  541. db.metalock.Lock()
  542. // Remove the transaction.
  543. for i, t := range db.txs {
  544. if t == tx {
  545. last := len(db.txs) - 1
  546. db.txs[i] = db.txs[last]
  547. db.txs[last] = nil
  548. db.txs = db.txs[:last]
  549. break
  550. }
  551. }
  552. n := len(db.txs)
  553. // Unlock the meta pages.
  554. db.metalock.Unlock()
  555. // Merge statistics.
  556. db.statlock.Lock()
  557. db.stats.OpenTxN = n
  558. db.stats.TxStats.add(&tx.stats)
  559. db.statlock.Unlock()
  560. }
  561. // Update executes a function within the context of a read-write managed transaction.
  562. // If no error is returned from the function then the transaction is committed.
  563. // If an error is returned then the entire transaction is rolled back.
  564. // Any error that is returned from the function or returned from the commit is
  565. // returned from the Update() method.
  566. //
  567. // Attempting to manually commit or rollback within the function will cause a panic.
  568. func (db *DB) Update(fn func(*Tx) error) error {
  569. t, err := db.Begin(true)
  570. if err != nil {
  571. return err
  572. }
  573. // Make sure the transaction rolls back in the event of a panic.
  574. defer func() {
  575. if t.db != nil {
  576. t.rollback()
  577. }
  578. }()
  579. // Mark as a managed tx so that the inner function cannot manually commit.
  580. t.managed = true
  581. // If an error is returned from the function then rollback and return error.
  582. err = fn(t)
  583. t.managed = false
  584. if err != nil {
  585. _ = t.Rollback()
  586. return err
  587. }
  588. return t.Commit()
  589. }
  590. // View executes a function within the context of a managed read-only transaction.
  591. // Any error that is returned from the function is returned from the View() method.
  592. //
  593. // Attempting to manually rollback within the function will cause a panic.
  594. func (db *DB) View(fn func(*Tx) error) error {
  595. t, err := db.Begin(false)
  596. if err != nil {
  597. return err
  598. }
  599. // Make sure the transaction rolls back in the event of a panic.
  600. defer func() {
  601. if t.db != nil {
  602. t.rollback()
  603. }
  604. }()
  605. // Mark as a managed tx so that the inner function cannot manually rollback.
  606. t.managed = true
  607. // If an error is returned from the function then pass it through.
  608. err = fn(t)
  609. t.managed = false
  610. if err != nil {
  611. _ = t.Rollback()
  612. return err
  613. }
  614. return t.Rollback()
  615. }
  616. // Batch calls fn as part of a batch. It behaves similar to Update,
  617. // except:
  618. //
  619. // 1. concurrent Batch calls can be combined into a single Bolt
  620. // transaction.
  621. //
  622. // 2. the function passed to Batch may be called multiple times,
  623. // regardless of whether it returns error or not.
  624. //
  625. // This means that Batch function side effects must be idempotent and
  626. // take permanent effect only after a successful return is seen in
  627. // caller.
  628. //
  629. // The maximum batch size and delay can be adjusted with DB.MaxBatchSize
  630. // and DB.MaxBatchDelay, respectively.
  631. //
  632. // Batch is only useful when there are multiple goroutines calling it.
  633. func (db *DB) Batch(fn func(*Tx) error) error {
  634. errCh := make(chan error, 1)
  635. db.batchMu.Lock()
  636. if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
  637. // There is no existing batch, or the existing batch is full; start a new one.
  638. db.batch = &batch{
  639. db: db,
  640. }
  641. db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
  642. }
  643. db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
  644. if len(db.batch.calls) >= db.MaxBatchSize {
  645. // wake up batch, it's ready to run
  646. go db.batch.trigger()
  647. }
  648. db.batchMu.Unlock()
  649. err := <-errCh
  650. if err == trySolo {
  651. err = db.Update(fn)
  652. }
  653. return err
  654. }
  655. type call struct {
  656. fn func(*Tx) error
  657. err chan<- error
  658. }
  659. type batch struct {
  660. db *DB
  661. timer *time.Timer
  662. start sync.Once
  663. calls []call
  664. }
  665. // trigger runs the batch if it hasn't already been run.
  666. func (b *batch) trigger() {
  667. b.start.Do(b.run)
  668. }
  669. // run performs the transactions in the batch and communicates results
  670. // back to DB.Batch.
  671. func (b *batch) run() {
  672. b.db.batchMu.Lock()
  673. b.timer.Stop()
  674. // Make sure no new work is added to this batch, but don't break
  675. // other batches.
  676. if b.db.batch == b {
  677. b.db.batch = nil
  678. }
  679. b.db.batchMu.Unlock()
  680. retry:
  681. for len(b.calls) > 0 {
  682. var failIdx = -1
  683. err := b.db.Update(func(tx *Tx) error {
  684. for i, c := range b.calls {
  685. if err := safelyCall(c.fn, tx); err != nil {
  686. failIdx = i
  687. return err
  688. }
  689. }
  690. return nil
  691. })
  692. if failIdx >= 0 {
  693. // take the failing transaction out of the batch. it's
  694. // safe to shorten b.calls here because db.batch no longer
  695. // points to us, and we hold the mutex anyway.
  696. c := b.calls[failIdx]
  697. b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
  698. // tell the submitter re-run it solo, continue with the rest of the batch
  699. c.err <- trySolo
  700. continue retry
  701. }
  702. // pass success, or bolt internal errors, to all callers
  703. for _, c := range b.calls {
  704. c.err <- err
  705. }
  706. break retry
  707. }
  708. }
  709. // trySolo is a special sentinel error value used for signaling that a
  710. // transaction function should be re-run. It should never be seen by
  711. // callers.
  712. var trySolo = errors.New("batch function returned an error and should be re-run solo")
  713. type panicked struct {
  714. reason interface{}
  715. }
  716. func (p panicked) Error() string {
  717. if err, ok := p.reason.(error); ok {
  718. return err.Error()
  719. }
  720. return fmt.Sprintf("panic: %v", p.reason)
  721. }
  722. func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
  723. defer func() {
  724. if p := recover(); p != nil {
  725. err = panicked{p}
  726. }
  727. }()
  728. return fn(tx)
  729. }
  730. // Sync executes fdatasync() against the database file handle.
  731. //
  732. // This is not necessary under normal operation, however, if you use NoSync
  733. // then it allows you to force the database file to sync against the disk.
  734. func (db *DB) Sync() error { return fdatasync(db) }
  735. // Stats retrieves ongoing performance stats for the database.
  736. // This is only updated when a transaction closes.
  737. func (db *DB) Stats() Stats {
  738. db.statlock.RLock()
  739. defer db.statlock.RUnlock()
  740. return db.stats
  741. }
  742. // This is for internal access to the raw data bytes from the C cursor, use
  743. // carefully, or not at all.
  744. func (db *DB) Info() *Info {
  745. return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
  746. }
  747. // page retrieves a page reference from the mmap based on the current page size.
  748. func (db *DB) page(id pgid) *page {
  749. pos := id * pgid(db.pageSize)
  750. return (*page)(unsafe.Pointer(&db.data[pos]))
  751. }
  752. // pageInBuffer retrieves a page reference from a given byte array based on the current page size.
  753. func (db *DB) pageInBuffer(b []byte, id pgid) *page {
  754. return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
  755. }
  756. // meta retrieves the current meta page reference.
  757. func (db *DB) meta() *meta {
  758. // We have to return the meta with the highest txid which doesn't fail
  759. // validation. Otherwise, we can cause errors when in fact the database is
  760. // in a consistent state. metaA is the one with the higher txid.
  761. metaA := db.meta0
  762. metaB := db.meta1
  763. if db.meta1.txid > db.meta0.txid {
  764. metaA = db.meta1
  765. metaB = db.meta0
  766. }
  767. // Use higher meta page if valid. Otherwise fallback to previous, if valid.
  768. if err := metaA.validate(); err == nil {
  769. return metaA
  770. } else if err := metaB.validate(); err == nil {
  771. return metaB
  772. }
  773. // This should never be reached, because both meta1 and meta0 were validated
  774. // on mmap() and we do fsync() on every write.
  775. panic("bolt.DB.meta(): invalid meta pages")
  776. }
  777. // allocate returns a contiguous block of memory starting at a given page.
  778. func (db *DB) allocate(txid txid, count int) (*page, error) {
  779. // Allocate a temporary buffer for the page.
  780. var buf []byte
  781. if count == 1 {
  782. buf = db.pagePool.Get().([]byte)
  783. } else {
  784. buf = make([]byte, count*db.pageSize)
  785. }
  786. p := (*page)(unsafe.Pointer(&buf[0]))
  787. p.overflow = uint32(count - 1)
  788. // Use pages from the freelist if they are available.
  789. if p.id = db.freelist.allocate(txid, count); p.id != 0 {
  790. return p, nil
  791. }
  792. // Resize mmap() if we're at the end.
  793. p.id = db.rwtx.meta.pgid
  794. var minsz = int((p.id+pgid(count))+1) * db.pageSize
  795. if minsz >= db.datasz {
  796. if err := db.mmap(minsz); err != nil {
  797. return nil, fmt.Errorf("mmap allocate error: %s", err)
  798. }
  799. }
  800. // Move the page id high water mark.
  801. db.rwtx.meta.pgid += pgid(count)
  802. return p, nil
  803. }
  804. // grow grows the size of the database to the given sz.
  805. func (db *DB) grow(sz int) error {
  806. // Ignore if the new size is less than available file size.
  807. if sz <= db.filesz {
  808. return nil
  809. }
  810. // If the data is smaller than the alloc size then only allocate what's needed.
  811. // Once it goes over the allocation size then allocate in chunks.
  812. if db.datasz < db.AllocSize {
  813. sz = db.datasz
  814. } else {
  815. sz += db.AllocSize
  816. }
  817. // Truncate and fsync to ensure file size metadata is flushed.
  818. // https://github.com/boltdb/bolt/issues/284
  819. if !db.NoGrowSync && !db.readOnly {
  820. if runtime.GOOS != "windows" {
  821. if err := db.file.Truncate(int64(sz)); err != nil {
  822. return fmt.Errorf("file resize error: %s", err)
  823. }
  824. }
  825. if err := db.file.Sync(); err != nil {
  826. return fmt.Errorf("file sync error: %s", err)
  827. }
  828. }
  829. db.filesz = sz
  830. return nil
  831. }
  832. func (db *DB) IsReadOnly() bool {
  833. return db.readOnly
  834. }
  835. func (db *DB) freepages() []pgid {
  836. tx, err := db.beginTx()
  837. defer func() {
  838. err = tx.Rollback()
  839. if err != nil {
  840. panic("freepages: failed to rollback tx")
  841. }
  842. }()
  843. if err != nil {
  844. panic("freepages: failed to open read only tx")
  845. }
  846. reachable := make(map[pgid]*page)
  847. nofreed := make(map[pgid]bool)
  848. ech := make(chan error)
  849. go func() {
  850. for e := range ech {
  851. panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
  852. }
  853. }()
  854. tx.checkBucket(&tx.root, reachable, nofreed, ech)
  855. close(ech)
  856. var fids []pgid
  857. for i := pgid(2); i < db.meta().pgid; i++ {
  858. if _, ok := reachable[i]; !ok {
  859. fids = append(fids, i)
  860. }
  861. }
  862. return fids
  863. }
  864. // Options represents the options that can be set when opening a database.
  865. type Options struct {
  866. // Timeout is the amount of time to wait to obtain a file lock.
  867. // When set to zero it will wait indefinitely. This option is only
  868. // available on Darwin and Linux.
  869. Timeout time.Duration
  870. // Sets the DB.NoGrowSync flag before memory mapping the file.
  871. NoGrowSync bool
  872. // Do not sync freelist to disk. This improves the database write performance
  873. // under normal operation, but requires a full database re-sync during recovery.
  874. NoFreelistSync bool
  875. // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
  876. // dramatic performance degradation if database is large and framentation in freelist is common.
  877. // The alternative one is using hashmap, it is faster in almost all circumstances
  878. // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
  879. // The default type is array
  880. FreelistType FreelistType
  881. // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
  882. // grab a shared lock (UNIX).
  883. ReadOnly bool
  884. // Sets the DB.MmapFlags flag before memory mapping the file.
  885. MmapFlags int
  886. // InitialMmapSize is the initial mmap size of the database
  887. // in bytes. Read transactions won't block write transaction
  888. // if the InitialMmapSize is large enough to hold database mmap
  889. // size. (See DB.Begin for more information)
  890. //
  891. // If <=0, the initial map size is 0.
  892. // If initialMmapSize is smaller than the previous database size,
  893. // it takes no effect.
  894. InitialMmapSize int
  895. // PageSize overrides the default OS page size.
  896. PageSize int
  897. // NoSync sets the initial value of DB.NoSync. Normally this can just be
  898. // set directly on the DB itself when returned from Open(), but this option
  899. // is useful in APIs which expose Options but not the underlying DB.
  900. NoSync bool
  901. }
  902. // DefaultOptions represent the options used if nil options are passed into Open().
  903. // No timeout is used which will cause Bolt to wait indefinitely for a lock.
  904. var DefaultOptions = &Options{
  905. Timeout: 0,
  906. NoGrowSync: false,
  907. FreelistType: FreelistArrayType,
  908. }
  909. // Stats represents statistics about the database.
  910. type Stats struct {
  911. // Freelist stats
  912. FreePageN int // total number of free pages on the freelist
  913. PendingPageN int // total number of pending pages on the freelist
  914. FreeAlloc int // total bytes allocated in free pages
  915. FreelistInuse int // total bytes used by the freelist
  916. // Transaction stats
  917. TxN int // total number of started read transactions
  918. OpenTxN int // number of currently open read transactions
  919. TxStats TxStats // global, ongoing stats.
  920. }
  921. // Sub calculates and returns the difference between two sets of database stats.
  922. // This is useful when obtaining stats at two different points and time and
  923. // you need the performance counters that occurred within that time span.
  924. func (s *Stats) Sub(other *Stats) Stats {
  925. if other == nil {
  926. return *s
  927. }
  928. var diff Stats
  929. diff.FreePageN = s.FreePageN
  930. diff.PendingPageN = s.PendingPageN
  931. diff.FreeAlloc = s.FreeAlloc
  932. diff.FreelistInuse = s.FreelistInuse
  933. diff.TxN = s.TxN - other.TxN
  934. diff.TxStats = s.TxStats.Sub(&other.TxStats)
  935. return diff
  936. }
  937. type Info struct {
  938. Data uintptr
  939. PageSize int
  940. }
  941. type meta struct {
  942. magic uint32
  943. version uint32
  944. pageSize uint32
  945. flags uint32
  946. root bucket
  947. freelist pgid
  948. pgid pgid
  949. txid txid
  950. checksum uint64
  951. }
  952. // validate checks the marker bytes and version of the meta page to ensure it matches this binary.
  953. func (m *meta) validate() error {
  954. if m.magic != magic {
  955. return ErrInvalid
  956. } else if m.version != version {
  957. return ErrVersionMismatch
  958. } else if m.checksum != 0 && m.checksum != m.sum64() {
  959. return ErrChecksum
  960. }
  961. return nil
  962. }
  963. // copy copies one meta object to another.
  964. func (m *meta) copy(dest *meta) {
  965. *dest = *m
  966. }
  967. // write writes the meta onto a page.
  968. func (m *meta) write(p *page) {
  969. if m.root.root >= m.pgid {
  970. panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
  971. } else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
  972. // TODO: reject pgidNoFreeList if !NoFreelistSync
  973. panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
  974. }
  975. // Page id is either going to be 0 or 1 which we can determine by the transaction ID.
  976. p.id = pgid(m.txid % 2)
  977. p.flags |= metaPageFlag
  978. // Calculate the checksum.
  979. m.checksum = m.sum64()
  980. m.copy(p.meta())
  981. }
  982. // generates the checksum for the meta.
  983. func (m *meta) sum64() uint64 {
  984. var h = fnv.New64a()
  985. _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
  986. return h.Sum64()
  987. }
  988. // _assert will panic with a given formatted message if the given condition is false.
  989. func _assert(condition bool, msg string, v ...interface{}) {
  990. if !condition {
  991. panic(fmt.Sprintf("assertion failed: "+msg, v...))
  992. }
  993. }