You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

decoder_v1.go 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package vellum
  15. import (
  16. "bytes"
  17. "encoding/binary"
  18. "fmt"
  19. "strconv"
  20. )
  21. func init() {
  22. registerDecoder(versionV1, func(data []byte) decoder {
  23. return newDecoderV1(data)
  24. })
  25. }
  26. type decoderV1 struct {
  27. data []byte
  28. }
  29. func newDecoderV1(data []byte) *decoderV1 {
  30. return &decoderV1{
  31. data: data,
  32. }
  33. }
  34. func (d *decoderV1) getRoot() int {
  35. if len(d.data) < footerSizeV1 {
  36. return noneAddr
  37. }
  38. footer := d.data[len(d.data)-footerSizeV1:]
  39. root := binary.LittleEndian.Uint64(footer[8:])
  40. return int(root)
  41. }
  42. func (d *decoderV1) getLen() int {
  43. if len(d.data) < footerSizeV1 {
  44. return 0
  45. }
  46. footer := d.data[len(d.data)-footerSizeV1:]
  47. dlen := binary.LittleEndian.Uint64(footer)
  48. return int(dlen)
  49. }
  50. func (d *decoderV1) stateAt(addr int, prealloc fstState) (fstState, error) {
  51. state, ok := prealloc.(*fstStateV1)
  52. if ok && state != nil {
  53. *state = fstStateV1{} // clear the struct
  54. } else {
  55. state = &fstStateV1{}
  56. }
  57. err := state.at(d.data, addr)
  58. if err != nil {
  59. return nil, err
  60. }
  61. return state, nil
  62. }
  63. type fstStateV1 struct {
  64. data []byte
  65. top int
  66. bottom int
  67. numTrans int
  68. // single trans only
  69. singleTransChar byte
  70. singleTransNext bool
  71. singleTransAddr uint64
  72. singleTransOut uint64
  73. // shared
  74. transSize int
  75. outSize int
  76. // multiple trans only
  77. final bool
  78. transTop int
  79. transBottom int
  80. destTop int
  81. destBottom int
  82. outTop int
  83. outBottom int
  84. outFinal int
  85. }
  86. func (f *fstStateV1) isEncodedSingle() bool {
  87. if f.data[f.top]>>7 > 0 {
  88. return true
  89. }
  90. return false
  91. }
  92. func (f *fstStateV1) at(data []byte, addr int) error {
  93. f.data = data
  94. if addr == emptyAddr {
  95. return f.atZero()
  96. } else if addr == noneAddr {
  97. return f.atNone()
  98. }
  99. if addr > len(data) || addr < 16 {
  100. return fmt.Errorf("invalid address %d/%d", addr, len(data))
  101. }
  102. f.top = addr
  103. f.bottom = addr
  104. if f.isEncodedSingle() {
  105. return f.atSingle(data, addr)
  106. }
  107. return f.atMulti(data, addr)
  108. }
  109. func (f *fstStateV1) atZero() error {
  110. f.top = 0
  111. f.bottom = 1
  112. f.numTrans = 0
  113. f.final = true
  114. f.outFinal = 0
  115. return nil
  116. }
  117. func (f *fstStateV1) atNone() error {
  118. f.top = 0
  119. f.bottom = 1
  120. f.numTrans = 0
  121. f.final = false
  122. f.outFinal = 0
  123. return nil
  124. }
  125. func (f *fstStateV1) atSingle(data []byte, addr int) error {
  126. // handle single transition case
  127. f.numTrans = 1
  128. f.singleTransNext = data[f.top]&transitionNext > 0
  129. f.singleTransChar = data[f.top] & maxCommon
  130. if f.singleTransChar == 0 {
  131. f.bottom-- // extra byte for uncommon
  132. f.singleTransChar = data[f.bottom]
  133. } else {
  134. f.singleTransChar = decodeCommon(f.singleTransChar)
  135. }
  136. if f.singleTransNext {
  137. // now we know the bottom, can compute next addr
  138. f.singleTransAddr = uint64(f.bottom - 1)
  139. f.singleTransOut = 0
  140. } else {
  141. f.bottom-- // extra byte with pack sizes
  142. f.transSize, f.outSize = decodePackSize(data[f.bottom])
  143. f.bottom -= f.transSize // exactly one trans
  144. f.singleTransAddr = readPackedUint(data[f.bottom : f.bottom+f.transSize])
  145. if f.outSize > 0 {
  146. f.bottom -= f.outSize // exactly one out (could be length 0 though)
  147. f.singleTransOut = readPackedUint(data[f.bottom : f.bottom+f.outSize])
  148. } else {
  149. f.singleTransOut = 0
  150. }
  151. // need to wait till we know bottom
  152. if f.singleTransAddr != 0 {
  153. f.singleTransAddr = uint64(f.bottom) - f.singleTransAddr
  154. }
  155. }
  156. return nil
  157. }
  158. func (f *fstStateV1) atMulti(data []byte, addr int) error {
  159. // handle multiple transitions case
  160. f.final = data[f.top]&stateFinal > 0
  161. f.numTrans = int(data[f.top] & maxNumTrans)
  162. if f.numTrans == 0 {
  163. f.bottom-- // extra byte for number of trans
  164. f.numTrans = int(data[f.bottom])
  165. if f.numTrans == 1 {
  166. // can't really be 1 here, this is special case that means 256
  167. f.numTrans = 256
  168. }
  169. }
  170. f.bottom-- // extra byte with pack sizes
  171. f.transSize, f.outSize = decodePackSize(data[f.bottom])
  172. f.transTop = f.bottom
  173. f.bottom -= f.numTrans // one byte for each transition
  174. f.transBottom = f.bottom
  175. f.destTop = f.bottom
  176. f.bottom -= f.numTrans * f.transSize
  177. f.destBottom = f.bottom
  178. if f.outSize > 0 {
  179. f.outTop = f.bottom
  180. f.bottom -= f.numTrans * f.outSize
  181. f.outBottom = f.bottom
  182. if f.final {
  183. f.bottom -= f.outSize
  184. f.outFinal = f.bottom
  185. }
  186. }
  187. return nil
  188. }
  189. func (f *fstStateV1) Address() int {
  190. return f.top
  191. }
  192. func (f *fstStateV1) Final() bool {
  193. return f.final
  194. }
  195. func (f *fstStateV1) FinalOutput() uint64 {
  196. if f.final && f.outSize > 0 {
  197. return readPackedUint(f.data[f.outFinal : f.outFinal+f.outSize])
  198. }
  199. return 0
  200. }
  201. func (f *fstStateV1) NumTransitions() int {
  202. return f.numTrans
  203. }
  204. func (f *fstStateV1) TransitionAt(i int) byte {
  205. if f.isEncodedSingle() {
  206. return f.singleTransChar
  207. }
  208. transitionKeys := f.data[f.transBottom:f.transTop]
  209. return transitionKeys[f.numTrans-i-1]
  210. }
  211. func (f *fstStateV1) TransitionFor(b byte) (int, int, uint64) {
  212. if f.isEncodedSingle() {
  213. if f.singleTransChar == b {
  214. return 0, int(f.singleTransAddr), f.singleTransOut
  215. }
  216. return -1, noneAddr, 0
  217. }
  218. transitionKeys := f.data[f.transBottom:f.transTop]
  219. pos := bytes.IndexByte(transitionKeys, b)
  220. if pos < 0 {
  221. return -1, noneAddr, 0
  222. }
  223. transDests := f.data[f.destBottom:f.destTop]
  224. dest := int(readPackedUint(transDests[pos*f.transSize : pos*f.transSize+f.transSize]))
  225. if dest > 0 {
  226. // convert delta
  227. dest = f.bottom - dest
  228. }
  229. transVals := f.data[f.outBottom:f.outTop]
  230. var out uint64
  231. if f.outSize > 0 {
  232. out = readPackedUint(transVals[pos*f.outSize : pos*f.outSize+f.outSize])
  233. }
  234. return f.numTrans - pos - 1, dest, out
  235. }
  236. func (f *fstStateV1) String() string {
  237. rv := ""
  238. rv += fmt.Sprintf("State: %d (%#x)", f.top, f.top)
  239. if f.final {
  240. rv += " final"
  241. fout := f.FinalOutput()
  242. if fout != 0 {
  243. rv += fmt.Sprintf(" (%d)", fout)
  244. }
  245. }
  246. rv += "\n"
  247. rv += fmt.Sprintf("Data: % x\n", f.data[f.bottom:f.top+1])
  248. for i := 0; i < f.numTrans; i++ {
  249. transChar := f.TransitionAt(i)
  250. _, transDest, transOut := f.TransitionFor(transChar)
  251. rv += fmt.Sprintf(" - %d (%#x) '%s' ---> %d (%#x) with output: %d", transChar, transChar, string(transChar), transDest, transDest, transOut)
  252. rv += "\n"
  253. }
  254. if f.numTrans == 0 {
  255. rv += "\n"
  256. }
  257. return rv
  258. }
  259. func (f *fstStateV1) DotString(num int) string {
  260. rv := ""
  261. label := fmt.Sprintf("%d", num)
  262. final := ""
  263. if f.final {
  264. final = ",peripheries=2"
  265. }
  266. rv += fmt.Sprintf(" %d [label=\"%s\"%s];\n", f.top, label, final)
  267. for i := 0; i < f.numTrans; i++ {
  268. transChar := f.TransitionAt(i)
  269. _, transDest, transOut := f.TransitionFor(transChar)
  270. out := ""
  271. if transOut != 0 {
  272. out = fmt.Sprintf("/%d", transOut)
  273. }
  274. rv += fmt.Sprintf(" %d -> %d [label=\"%s%s\"];\n", f.top, transDest, escapeInput(transChar), out)
  275. }
  276. return rv
  277. }
  278. func escapeInput(b byte) string {
  279. x := strconv.AppendQuoteRune(nil, rune(b))
  280. return string(x[1:(len(x) - 1)])
  281. }