You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

contentcoder.go 5.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package zap
  15. import (
  16. "bytes"
  17. "encoding/binary"
  18. "io"
  19. "reflect"
  20. "github.com/golang/snappy"
  21. )
  22. var reflectStaticSizeMetaData int
  23. func init() {
  24. var md MetaData
  25. reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
  26. }
  27. var termSeparator byte = 0xff
  28. var termSeparatorSplitSlice = []byte{termSeparator}
  29. type chunkedContentCoder struct {
  30. final []byte
  31. chunkSize uint64
  32. currChunk uint64
  33. chunkLens []uint64
  34. w io.Writer
  35. progressiveWrite bool
  36. chunkMetaBuf bytes.Buffer
  37. chunkBuf bytes.Buffer
  38. chunkMeta []MetaData
  39. compressed []byte // temp buf for snappy compression
  40. }
  41. // MetaData represents the data information inside a
  42. // chunk.
  43. type MetaData struct {
  44. DocNum uint64 // docNum of the data inside the chunk
  45. DocDvOffset uint64 // offset of data inside the chunk for the given docid
  46. }
  47. // newChunkedContentCoder returns a new chunk content coder which
  48. // packs data into chunks based on the provided chunkSize
  49. func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64,
  50. w io.Writer, progressiveWrite bool) *chunkedContentCoder {
  51. total := maxDocNum/chunkSize + 1
  52. rv := &chunkedContentCoder{
  53. chunkSize: chunkSize,
  54. chunkLens: make([]uint64, total),
  55. chunkMeta: make([]MetaData, 0, total),
  56. w: w,
  57. progressiveWrite: progressiveWrite,
  58. }
  59. return rv
  60. }
  61. // Reset lets you reuse this chunked content coder. Buffers are reset
  62. // and re used. You cannot change the chunk size.
  63. func (c *chunkedContentCoder) Reset() {
  64. c.currChunk = 0
  65. c.final = c.final[:0]
  66. c.chunkBuf.Reset()
  67. c.chunkMetaBuf.Reset()
  68. for i := range c.chunkLens {
  69. c.chunkLens[i] = 0
  70. }
  71. c.chunkMeta = c.chunkMeta[:0]
  72. }
  73. // Close indicates you are done calling Add() this allows
  74. // the final chunk to be encoded.
  75. func (c *chunkedContentCoder) Close() error {
  76. return c.flushContents()
  77. }
  78. func (c *chunkedContentCoder) flushContents() error {
  79. // flush the contents, with meta information at first
  80. buf := make([]byte, binary.MaxVarintLen64)
  81. n := binary.PutUvarint(buf, uint64(len(c.chunkMeta)))
  82. _, err := c.chunkMetaBuf.Write(buf[:n])
  83. if err != nil {
  84. return err
  85. }
  86. // write out the metaData slice
  87. for _, meta := range c.chunkMeta {
  88. _, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
  89. if err != nil {
  90. return err
  91. }
  92. }
  93. // write the metadata to final data
  94. metaData := c.chunkMetaBuf.Bytes()
  95. c.final = append(c.final, c.chunkMetaBuf.Bytes()...)
  96. // write the compressed data to the final data
  97. c.compressed = snappy.Encode(c.compressed[:cap(c.compressed)], c.chunkBuf.Bytes())
  98. c.final = append(c.final, c.compressed...)
  99. c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData))
  100. if c.progressiveWrite {
  101. _, err := c.w.Write(c.final)
  102. if err != nil {
  103. return err
  104. }
  105. c.final = c.final[:0]
  106. }
  107. return nil
  108. }
  109. // Add encodes the provided byte slice into the correct chunk for the provided
  110. // doc num. You MUST call Add() with increasing docNums.
  111. func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
  112. chunk := docNum / c.chunkSize
  113. if chunk != c.currChunk {
  114. // flush out the previous chunk details
  115. err := c.flushContents()
  116. if err != nil {
  117. return err
  118. }
  119. // clearing the chunk specific meta for next chunk
  120. c.chunkBuf.Reset()
  121. c.chunkMetaBuf.Reset()
  122. c.chunkMeta = c.chunkMeta[:0]
  123. c.currChunk = chunk
  124. }
  125. // get the starting offset for this doc
  126. dvOffset := c.chunkBuf.Len()
  127. dvSize, err := c.chunkBuf.Write(vals)
  128. if err != nil {
  129. return err
  130. }
  131. c.chunkMeta = append(c.chunkMeta, MetaData{
  132. DocNum: docNum,
  133. DocDvOffset: uint64(dvOffset + dvSize),
  134. })
  135. return nil
  136. }
  137. // Write commits all the encoded chunked contents to the provided writer.
  138. //
  139. // | ..... data ..... | chunk offsets (varints)
  140. // | position of chunk offsets (uint64) | number of offsets (uint64) |
  141. //
  142. func (c *chunkedContentCoder) Write() (int, error) {
  143. var tw int
  144. if c.final != nil {
  145. // write out the data section first
  146. nw, err := c.w.Write(c.final)
  147. tw += nw
  148. if err != nil {
  149. return tw, err
  150. }
  151. }
  152. chunkOffsetsStart := uint64(tw)
  153. if cap(c.final) < binary.MaxVarintLen64 {
  154. c.final = make([]byte, binary.MaxVarintLen64)
  155. } else {
  156. c.final = c.final[0:binary.MaxVarintLen64]
  157. }
  158. chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
  159. // write out the chunk offsets
  160. for _, chunkOffset := range chunkOffsets {
  161. n := binary.PutUvarint(c.final, chunkOffset)
  162. nw, err := c.w.Write(c.final[:n])
  163. tw += nw
  164. if err != nil {
  165. return tw, err
  166. }
  167. }
  168. chunkOffsetsLen := uint64(tw) - chunkOffsetsStart
  169. c.final = c.final[0:8]
  170. // write out the length of chunk offsets
  171. binary.BigEndian.PutUint64(c.final, chunkOffsetsLen)
  172. nw, err := c.w.Write(c.final)
  173. tw += nw
  174. if err != nil {
  175. return tw, err
  176. }
  177. // write out the number of chunks
  178. binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens)))
  179. nw, err = c.w.Write(c.final)
  180. tw += nw
  181. if err != nil {
  182. return tw, err
  183. }
  184. c.final = c.final[:0]
  185. return tw, nil
  186. }
  187. // ReadDocValueBoundary elicits the start, end offsets from a
  188. // metaData header slice
  189. func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
  190. var start uint64
  191. if chunk > 0 {
  192. start = metaHeaders[chunk-1].DocDvOffset
  193. }
  194. return start, metaHeaders[chunk].DocDvOffset
  195. }